mirror of
https://github.com/kjanat/livedash-node.git
synced 2026-01-16 12:12:09 +01:00
291 lines
7.9 KiB
TypeScript
291 lines
7.9 KiB
TypeScript
// Fetches, parses, and returns chat session data for a company from a CSV URL
|
|
import fetch from "node-fetch";
|
|
import { parse } from "csv-parse/sync";
|
|
import ISO6391 from "iso-639-1";
|
|
import countries from "i18n-iso-countries";
|
|
|
|
// Register locales for i18n-iso-countries
|
|
import enLocale from "i18n-iso-countries/langs/en.json" with { type: "json" };
|
|
countries.registerLocale(enLocale);
|
|
|
|
// This type is used internally for parsing the CSV records
|
|
interface CSVRecord {
|
|
session_id: string;
|
|
start_time: string;
|
|
end_time?: string;
|
|
ip_address?: string;
|
|
country?: string;
|
|
language?: string;
|
|
messages_sent?: string;
|
|
sentiment?: string;
|
|
escalated?: string;
|
|
forwarded_hr?: string;
|
|
full_transcript_url?: string;
|
|
avg_response_time?: string;
|
|
tokens?: string;
|
|
tokens_eur?: string;
|
|
category?: string;
|
|
initial_msg?: string;
|
|
[key: string]: string | undefined;
|
|
}
|
|
|
|
interface SessionData {
|
|
id: string;
|
|
sessionId: string;
|
|
startTime: Date;
|
|
endTime: Date | null;
|
|
ipAddress?: string;
|
|
country?: string | null;
|
|
language?: string | null;
|
|
messagesSent: number;
|
|
sentiment?: string | null;
|
|
escalated: boolean;
|
|
forwardedHr: boolean;
|
|
fullTranscriptUrl?: string | null;
|
|
avgResponseTime: number | null;
|
|
tokens: number;
|
|
tokensEur: number;
|
|
category?: string | null;
|
|
initialMsg?: string;
|
|
}
|
|
|
|
/**
|
|
* Passes through country data as-is (no mapping)
|
|
* @param countryStr Raw country string from CSV
|
|
* @returns The country string as-is or null if empty
|
|
*/
|
|
function getCountryCode(countryStr?: string): string | null | undefined {
|
|
if (countryStr === undefined) return undefined;
|
|
if (countryStr === null || countryStr === "") return null;
|
|
|
|
const normalized = countryStr.trim();
|
|
return normalized || null;
|
|
}
|
|
|
|
/**
|
|
* Converts language names to ISO 639-1 codes
|
|
* @param languageStr Raw language string from CSV
|
|
* @returns ISO 639-1 language code or null if not found
|
|
*/
|
|
function getLanguageCode(languageStr?: string): string | null | undefined {
|
|
if (languageStr === undefined) return undefined;
|
|
if (languageStr === null || languageStr === "") return null;
|
|
|
|
// Clean the input
|
|
const normalized = languageStr.trim();
|
|
if (!normalized) return null;
|
|
|
|
// Direct ISO code check (if already a 2-letter code)
|
|
if (normalized.length === 2 && normalized === normalized.toLowerCase()) {
|
|
return ISO6391.validate(normalized) ? normalized : null;
|
|
}
|
|
|
|
// Special case mappings
|
|
const languageMapping: Record<string, string> = {
|
|
english: "en",
|
|
English: "en",
|
|
dutch: "nl",
|
|
Dutch: "nl",
|
|
nederlands: "nl",
|
|
Nederlands: "nl",
|
|
nl: "nl",
|
|
bosnian: "bs",
|
|
Bosnian: "bs",
|
|
turkish: "tr",
|
|
Turkish: "tr",
|
|
german: "de",
|
|
German: "de",
|
|
deutsch: "de",
|
|
Deutsch: "de",
|
|
french: "fr",
|
|
French: "fr",
|
|
français: "fr",
|
|
Français: "fr",
|
|
spanish: "es",
|
|
Spanish: "es",
|
|
español: "es",
|
|
Español: "es",
|
|
italian: "it",
|
|
Italian: "it",
|
|
italiano: "it",
|
|
Italiano: "it",
|
|
nizozemski: "nl", // "Dutch" in some Slavic languages
|
|
};
|
|
|
|
// Check mapping
|
|
if (normalized in languageMapping) {
|
|
return languageMapping[normalized];
|
|
}
|
|
|
|
// Try to get code using the ISO6391 library
|
|
try {
|
|
const code = ISO6391.getCode(normalized);
|
|
if (code) return code;
|
|
} catch (error) {
|
|
process.stderr.write(
|
|
`[CSV] Error converting language name to code: ${normalized} - ${error}\n`
|
|
);
|
|
}
|
|
// If all else fails, return null
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Passes through category data as-is (no mapping)
|
|
* @param categoryStr The raw category string from CSV
|
|
* @returns The category string as-is or null if empty
|
|
*/
|
|
function normalizeCategory(categoryStr?: string): string | null {
|
|
if (!categoryStr) return null;
|
|
|
|
const normalized = categoryStr.trim();
|
|
return normalized || null;
|
|
}
|
|
|
|
/**
|
|
* Checks if a string value should be considered as boolean true
|
|
* @param value The string value to check
|
|
* @returns True if the string indicates a positive/true value
|
|
*/
|
|
function isTruthyValue(value?: string): boolean {
|
|
if (!value) return false;
|
|
|
|
const truthyValues = [
|
|
"1",
|
|
"true",
|
|
"yes",
|
|
"y",
|
|
"ja",
|
|
"si",
|
|
"oui",
|
|
"да",
|
|
"да",
|
|
"はい",
|
|
];
|
|
|
|
return truthyValues.includes(value.toLowerCase());
|
|
}
|
|
|
|
/**
|
|
* Safely parses a date string into a Date object.
|
|
* Handles potential errors and various formats, prioritizing D-M-YYYY HH:MM:SS.
|
|
* @param dateStr The date string to parse.
|
|
* @returns A Date object or null if parsing fails.
|
|
*/
|
|
function safeParseDate(dateStr?: string): Date | null {
|
|
if (!dateStr) return null;
|
|
|
|
// Try to parse D-M-YYYY HH:MM:SS format (with hyphens or dots)
|
|
const dateTimeRegex =
|
|
/^(\d{1,2})[.-](\d{1,2})[.-](\d{4}) (\d{1,2}):(\d{1,2}):(\d{1,2})$/;
|
|
const match = dateStr.match(dateTimeRegex);
|
|
|
|
if (match) {
|
|
const day = match[1];
|
|
const month = match[2];
|
|
const year = match[3];
|
|
const hour = match[4];
|
|
const minute = match[5];
|
|
const second = match[6];
|
|
|
|
// Reformat to YYYY-MM-DDTHH:MM:SS (ISO-like, but local time)
|
|
// Ensure month and day are two digits
|
|
const formattedDateStr = `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}T${hour.padStart(2, "0")}:${minute.padStart(2, "0")}:${second.padStart(2, "0")}`;
|
|
|
|
try {
|
|
const date = new Date(formattedDateStr);
|
|
// Basic validation: check if the constructed date is valid
|
|
if (!isNaN(date.getTime())) {
|
|
// console.log(`[safeParseDate] Parsed from D-M-YYYY: ${dateStr} -> ${formattedDateStr} -> ${date.toISOString()}`);
|
|
return date;
|
|
}
|
|
} catch (e) {
|
|
console.warn(
|
|
`[safeParseDate] Error parsing reformatted string ${formattedDateStr} from ${dateStr}:`,
|
|
e
|
|
);
|
|
}
|
|
}
|
|
|
|
// Fallback for other potential formats (e.g., direct ISO 8601) or if the primary parse failed
|
|
try {
|
|
const parsedDate = new Date(dateStr);
|
|
if (!isNaN(parsedDate.getTime())) {
|
|
// console.log(`[safeParseDate] Parsed with fallback: ${dateStr} -> ${parsedDate.toISOString()}`);
|
|
return parsedDate;
|
|
}
|
|
} catch (e) {
|
|
console.warn(`[safeParseDate] Error parsing with fallback ${dateStr}:`, e);
|
|
}
|
|
|
|
console.warn(`Failed to parse date string: ${dateStr}`);
|
|
return null;
|
|
}
|
|
|
|
export async function fetchAndParseCsv(
|
|
url: string,
|
|
username?: string,
|
|
password?: string
|
|
): Promise<Partial<SessionData>[]> {
|
|
const authHeader =
|
|
username && password
|
|
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
|
|
: undefined;
|
|
|
|
const res = await fetch(url, {
|
|
headers: authHeader ? { Authorization: authHeader } : {},
|
|
});
|
|
if (!res.ok) throw new Error("Failed to fetch CSV: " + res.statusText);
|
|
|
|
const text = await res.text();
|
|
|
|
// Parse without expecting headers, using known order
|
|
const records: CSVRecord[] = parse(text, {
|
|
delimiter: ",",
|
|
columns: [
|
|
"session_id",
|
|
"start_time",
|
|
"end_time",
|
|
"ip_address",
|
|
"country",
|
|
"language",
|
|
"messages_sent",
|
|
"sentiment",
|
|
"escalated",
|
|
"forwarded_hr",
|
|
"full_transcript_url",
|
|
"avg_response_time",
|
|
"tokens",
|
|
"tokens_eur",
|
|
"category",
|
|
"initial_msg",
|
|
],
|
|
from_line: 1,
|
|
relax_column_count: true,
|
|
skip_empty_lines: true,
|
|
trim: true,
|
|
});
|
|
|
|
// Coerce types for relevant columns
|
|
return records.map((r) => ({
|
|
id: r.session_id,
|
|
startTime: safeParseDate(r.start_time) || new Date(), // Fallback to current date if invalid
|
|
endTime: safeParseDate(r.end_time),
|
|
ipAddress: r.ip_address,
|
|
country: getCountryCode(r.country),
|
|
language: getLanguageCode(r.language),
|
|
messagesSent: Number(r.messages_sent) || 0,
|
|
sentiment: r.sentiment,
|
|
escalated: isTruthyValue(r.escalated),
|
|
forwardedHr: isTruthyValue(r.forwarded_hr),
|
|
fullTranscriptUrl: r.full_transcript_url,
|
|
avgResponseTime: r.avg_response_time
|
|
? parseFloat(r.avg_response_time)
|
|
: null,
|
|
tokens: Number(r.tokens) || 0,
|
|
tokensEur: r.tokens_eur ? parseFloat(r.tokens_eur) : 0,
|
|
category: normalizeCategory(r.category),
|
|
initialMsg: r.initial_msg,
|
|
}));
|
|
}
|