mirror of
https://github.com/kjanat/livedash-node.git
synced 2026-01-16 08:52:10 +01:00
Refactor transcript fetching and processing scripts
- Introduced a new function `fetchTranscriptContent` to handle fetching transcripts with optional authentication. - Enhanced error handling and logging for transcript fetching. - Updated the `parseTranscriptToMessages` function to improve message parsing logic. - Replaced the old session processing logic with a new approach that utilizes `SessionImport` records. - Removed obsolete scripts related to manual triggers and whitespace fixing. - Updated the server initialization to remove direct server handling, transitioning to a more modular approach. - Improved overall code structure and readability across various scripts.
This commit is contained in:
@ -1,440 +1,41 @@
|
||||
// Fetches, parses, and returns chat session data for a company from a CSV URL
|
||||
// Simplified CSV fetcher - fetches and parses CSV data without any processing
|
||||
// Maps directly to SessionImport table fields
|
||||
import fetch from "node-fetch";
|
||||
import { parse } from "csv-parse/sync";
|
||||
import ISO6391 from "iso-639-1";
|
||||
import countries from "i18n-iso-countries";
|
||||
|
||||
// Register locales for i18n-iso-countries
|
||||
import enLocale from "i18n-iso-countries/langs/en.json" with { type: "json" };
|
||||
countries.registerLocale(enLocale);
|
||||
|
||||
// This type is used internally for parsing the CSV records
|
||||
interface CSVRecord {
|
||||
session_id: string;
|
||||
start_time: string;
|
||||
end_time?: string;
|
||||
ip_address?: string;
|
||||
country?: string;
|
||||
language?: string;
|
||||
messages_sent?: string;
|
||||
sentiment?: string;
|
||||
escalated?: string;
|
||||
forwarded_hr?: string;
|
||||
full_transcript_url?: string;
|
||||
avg_response_time?: string;
|
||||
tokens?: string;
|
||||
tokens_eur?: string;
|
||||
category?: string;
|
||||
initial_msg?: string;
|
||||
[key: string]: string | undefined;
|
||||
}
|
||||
|
||||
interface SessionData {
|
||||
id: string;
|
||||
sessionId: string;
|
||||
startTime: Date;
|
||||
endTime: Date | null;
|
||||
ipAddress?: string;
|
||||
country?: string | null; // Will store ISO 3166-1 alpha-2 country code or null/undefined
|
||||
language?: string | null; // Will store ISO 639-1 language code or null/undefined
|
||||
messagesSent: number;
|
||||
sentiment: number | null;
|
||||
escalated: boolean;
|
||||
forwardedHr: boolean;
|
||||
fullTranscriptUrl?: string | null;
|
||||
avgResponseTime: number | null;
|
||||
tokens: number;
|
||||
tokensEur: number;
|
||||
category?: string | null;
|
||||
initialMsg?: string;
|
||||
// Raw CSV data interface matching SessionImport schema
|
||||
interface RawSessionImport {
|
||||
externalSessionId: string;
|
||||
startTimeRaw: string;
|
||||
endTimeRaw: string;
|
||||
ipAddress: string | null;
|
||||
countryCode: string | null;
|
||||
language: string | null;
|
||||
messagesSent: number | null;
|
||||
sentimentRaw: string | null;
|
||||
escalatedRaw: string | null;
|
||||
forwardedHrRaw: string | null;
|
||||
fullTranscriptUrl: string | null;
|
||||
avgResponseTimeSeconds: number | null;
|
||||
tokens: number | null;
|
||||
tokensEur: number | null;
|
||||
category: string | null;
|
||||
initialMessage: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts country names to ISO 3166-1 alpha-2 codes
|
||||
* @param countryStr Raw country string from CSV
|
||||
* @returns ISO 3166-1 alpha-2 country code or null if not found
|
||||
* Fetches and parses CSV data from a URL without any processing
|
||||
* Maps CSV columns by position to SessionImport fields
|
||||
* @param url The CSV URL
|
||||
* @param username Optional username for authentication
|
||||
* @param password Optional password for authentication
|
||||
* @returns Array of raw session import data
|
||||
*/
|
||||
function getCountryCode(countryStr?: string): string | null | undefined {
|
||||
if (countryStr === undefined) return undefined;
|
||||
if (countryStr === null || countryStr === "") return null;
|
||||
|
||||
// Clean the input
|
||||
const normalized = countryStr.trim();
|
||||
if (!normalized) return null;
|
||||
|
||||
// Direct ISO code check (if already a 2-letter code)
|
||||
if (normalized.length === 2 && normalized === normalized.toUpperCase()) {
|
||||
return countries.isValid(normalized) ? normalized : null;
|
||||
}
|
||||
|
||||
// Special case for country codes used in the dataset
|
||||
const countryMapping: Record<string, string> = {
|
||||
BA: "BA", // Bosnia and Herzegovina
|
||||
NL: "NL", // Netherlands
|
||||
USA: "US", // United States
|
||||
UK: "GB", // United Kingdom
|
||||
GB: "GB", // Great Britain
|
||||
Nederland: "NL",
|
||||
Netherlands: "NL",
|
||||
Netherland: "NL",
|
||||
Holland: "NL",
|
||||
Germany: "DE",
|
||||
Deutschland: "DE",
|
||||
Belgium: "BE",
|
||||
België: "BE",
|
||||
Belgique: "BE",
|
||||
France: "FR",
|
||||
Frankreich: "FR",
|
||||
"United States": "US",
|
||||
"United States of America": "US",
|
||||
Bosnia: "BA",
|
||||
"Bosnia and Herzegovina": "BA",
|
||||
"Bosnia & Herzegovina": "BA",
|
||||
};
|
||||
|
||||
// Check mapping
|
||||
if (normalized in countryMapping) {
|
||||
return countryMapping[normalized];
|
||||
}
|
||||
|
||||
// Try to get the code from the country name (in English)
|
||||
try {
|
||||
const code = countries.getAlpha2Code(normalized, "en");
|
||||
if (code) return code;
|
||||
} catch (error) {
|
||||
process.stderr.write(
|
||||
`[CSV] Error converting country name to code: ${normalized} - ${error}\n`
|
||||
);
|
||||
}
|
||||
|
||||
// If all else fails, return null
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts language names to ISO 639-1 codes
|
||||
* @param languageStr Raw language string from CSV
|
||||
* @returns ISO 639-1 language code or null if not found
|
||||
*/
|
||||
function getLanguageCode(languageStr?: string): string | null | undefined {
|
||||
if (languageStr === undefined) return undefined;
|
||||
if (languageStr === null || languageStr === "") return null;
|
||||
|
||||
// Clean the input
|
||||
const normalized = languageStr.trim();
|
||||
if (!normalized) return null;
|
||||
|
||||
// Direct ISO code check (if already a 2-letter code)
|
||||
if (normalized.length === 2 && normalized === normalized.toLowerCase()) {
|
||||
return ISO6391.validate(normalized) ? normalized : null;
|
||||
}
|
||||
|
||||
// Special case mappings
|
||||
const languageMapping: Record<string, string> = {
|
||||
english: "en",
|
||||
English: "en",
|
||||
dutch: "nl",
|
||||
Dutch: "nl",
|
||||
nederlands: "nl",
|
||||
Nederlands: "nl",
|
||||
nl: "nl",
|
||||
bosnian: "bs",
|
||||
Bosnian: "bs",
|
||||
turkish: "tr",
|
||||
Turkish: "tr",
|
||||
german: "de",
|
||||
German: "de",
|
||||
deutsch: "de",
|
||||
Deutsch: "de",
|
||||
french: "fr",
|
||||
French: "fr",
|
||||
français: "fr",
|
||||
Français: "fr",
|
||||
spanish: "es",
|
||||
Spanish: "es",
|
||||
español: "es",
|
||||
Español: "es",
|
||||
italian: "it",
|
||||
Italian: "it",
|
||||
italiano: "it",
|
||||
Italiano: "it",
|
||||
nizozemski: "nl", // "Dutch" in some Slavic languages
|
||||
};
|
||||
|
||||
// Check mapping
|
||||
if (normalized in languageMapping) {
|
||||
return languageMapping[normalized];
|
||||
}
|
||||
|
||||
// Try to get code using the ISO6391 library
|
||||
try {
|
||||
const code = ISO6391.getCode(normalized);
|
||||
if (code) return code;
|
||||
} catch (error) {
|
||||
process.stderr.write(
|
||||
`[CSV] Error converting language name to code: ${normalized} - ${error}\n`
|
||||
);
|
||||
}
|
||||
// If all else fails, return null
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes category values to standard groups
|
||||
* @param categoryStr The raw category string from CSV
|
||||
* @returns A normalized category string
|
||||
*/
|
||||
function normalizeCategory(categoryStr?: string): string | null {
|
||||
if (!categoryStr) return null;
|
||||
|
||||
const normalized = categoryStr.toLowerCase().trim();
|
||||
|
||||
// Define category groups using keywords
|
||||
const categoryMapping: Record<string, string[]> = {
|
||||
Onboarding: [
|
||||
"onboarding",
|
||||
"start",
|
||||
"begin",
|
||||
"new",
|
||||
"orientation",
|
||||
"welcome",
|
||||
"intro",
|
||||
"getting started",
|
||||
"documents",
|
||||
"documenten",
|
||||
"first day",
|
||||
"eerste dag",
|
||||
],
|
||||
"General Information": [
|
||||
"general",
|
||||
"algemeen",
|
||||
"info",
|
||||
"information",
|
||||
"informatie",
|
||||
"question",
|
||||
"vraag",
|
||||
"inquiry",
|
||||
"chat",
|
||||
"conversation",
|
||||
"gesprek",
|
||||
"talk",
|
||||
],
|
||||
Greeting: [
|
||||
"greeting",
|
||||
"greet",
|
||||
"hello",
|
||||
"hi",
|
||||
"hey",
|
||||
"welcome",
|
||||
"hallo",
|
||||
"hoi",
|
||||
"greetings",
|
||||
],
|
||||
"HR & Payroll": [
|
||||
"salary",
|
||||
"salaris",
|
||||
"pay",
|
||||
"payroll",
|
||||
"loon",
|
||||
"loonstrook",
|
||||
"hr",
|
||||
"human resources",
|
||||
"benefits",
|
||||
"vacation",
|
||||
"leave",
|
||||
"verlof",
|
||||
"maaltijdvergoeding",
|
||||
"vergoeding",
|
||||
],
|
||||
"Schedules & Hours": [
|
||||
"schedule",
|
||||
"hours",
|
||||
"tijd",
|
||||
"time",
|
||||
"roster",
|
||||
"rooster",
|
||||
"planning",
|
||||
"shift",
|
||||
"dienst",
|
||||
"working hours",
|
||||
"werktijden",
|
||||
"openingstijden",
|
||||
],
|
||||
"Role & Responsibilities": [
|
||||
"role",
|
||||
"job",
|
||||
"function",
|
||||
"functie",
|
||||
"task",
|
||||
"taak",
|
||||
"responsibilities",
|
||||
"leidinggevende",
|
||||
"manager",
|
||||
"teamleider",
|
||||
"supervisor",
|
||||
"team",
|
||||
"lead",
|
||||
],
|
||||
"Technical Support": [
|
||||
"technical",
|
||||
"tech",
|
||||
"support",
|
||||
"laptop",
|
||||
"computer",
|
||||
"system",
|
||||
"systeem",
|
||||
"it",
|
||||
"software",
|
||||
"hardware",
|
||||
],
|
||||
Offboarding: [
|
||||
"offboarding",
|
||||
"leave",
|
||||
"exit",
|
||||
"quit",
|
||||
"resign",
|
||||
"resignation",
|
||||
"ontslag",
|
||||
"vertrek",
|
||||
"afsluiting",
|
||||
],
|
||||
};
|
||||
|
||||
// Try to match the category using keywords
|
||||
for (const [category, keywords] of Object.entries(categoryMapping)) {
|
||||
if (keywords.some((keyword) => normalized.includes(keyword))) {
|
||||
return category;
|
||||
}
|
||||
}
|
||||
|
||||
// If no match, return "Other"
|
||||
return "Other";
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts sentiment string values to numeric scores
|
||||
* @param sentimentStr The sentiment string from the CSV
|
||||
* @returns A numeric score representing the sentiment
|
||||
*/
|
||||
function mapSentimentToScore(sentimentStr?: string): number | null {
|
||||
if (!sentimentStr) return null;
|
||||
|
||||
// Convert to lowercase for case-insensitive matching
|
||||
const sentiment = sentimentStr.toLowerCase();
|
||||
|
||||
// Map sentiment strings to numeric values on a scale from -1 to 2
|
||||
const sentimentMap: Record<string, number> = {
|
||||
happy: 1.0,
|
||||
excited: 1.5,
|
||||
positive: 0.8,
|
||||
neutral: 0.0,
|
||||
playful: 0.7,
|
||||
negative: -0.8,
|
||||
angry: -1.0,
|
||||
sad: -0.7,
|
||||
frustrated: -0.9,
|
||||
positief: 0.8, // Dutch
|
||||
neutraal: 0.0, // Dutch
|
||||
negatief: -0.8, // Dutch
|
||||
positivo: 0.8, // Spanish/Italian
|
||||
neutro: 0.0, // Spanish/Italian
|
||||
negativo: -0.8, // Spanish/Italian
|
||||
yes: 0.5, // For any "yes" sentiment
|
||||
no: -0.5, // For any "no" sentiment
|
||||
};
|
||||
|
||||
return sentimentMap[sentiment] !== undefined
|
||||
? sentimentMap[sentiment]
|
||||
: isNaN(parseFloat(sentiment))
|
||||
? null
|
||||
: parseFloat(sentiment);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a string value should be considered as boolean true
|
||||
* @param value The string value to check
|
||||
* @returns True if the string indicates a positive/true value
|
||||
*/
|
||||
function isTruthyValue(value?: string): boolean {
|
||||
if (!value) return false;
|
||||
|
||||
const truthyValues = [
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"y",
|
||||
"ja",
|
||||
"si",
|
||||
"oui",
|
||||
"да",
|
||||
"да",
|
||||
"はい",
|
||||
];
|
||||
|
||||
return truthyValues.includes(value.toLowerCase());
|
||||
}
|
||||
|
||||
/**
|
||||
* Safely parses a date string into a Date object.
|
||||
* Handles potential errors and various formats, prioritizing D-M-YYYY HH:MM:SS.
|
||||
* @param dateStr The date string to parse.
|
||||
* @returns A Date object or null if parsing fails.
|
||||
*/
|
||||
function safeParseDate(dateStr?: string): Date | null {
|
||||
if (!dateStr) return null;
|
||||
|
||||
// Try to parse D-M-YYYY HH:MM:SS format (with hyphens or dots)
|
||||
const dateTimeRegex =
|
||||
/^(\d{1,2})[.-](\d{1,2})[.-](\d{4}) (\d{1,2}):(\d{1,2}):(\d{1,2})$/;
|
||||
const match = dateStr.match(dateTimeRegex);
|
||||
|
||||
if (match) {
|
||||
const day = match[1];
|
||||
const month = match[2];
|
||||
const year = match[3];
|
||||
const hour = match[4];
|
||||
const minute = match[5];
|
||||
const second = match[6];
|
||||
|
||||
// Reformat to YYYY-MM-DDTHH:MM:SS (ISO-like, but local time)
|
||||
// Ensure month and day are two digits
|
||||
const formattedDateStr = `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}T${hour.padStart(2, "0")}:${minute.padStart(2, "0")}:${second.padStart(2, "0")}`;
|
||||
|
||||
try {
|
||||
const date = new Date(formattedDateStr);
|
||||
// Basic validation: check if the constructed date is valid
|
||||
if (!isNaN(date.getTime())) {
|
||||
// console.log(`[safeParseDate] Parsed from D-M-YYYY: ${dateStr} -> ${formattedDateStr} -> ${date.toISOString()}`);
|
||||
return date;
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(
|
||||
`[safeParseDate] Error parsing reformatted string ${formattedDateStr} from ${dateStr}:`,
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback for other potential formats (e.g., direct ISO 8601) or if the primary parse failed
|
||||
try {
|
||||
const parsedDate = new Date(dateStr);
|
||||
if (!isNaN(parsedDate.getTime())) {
|
||||
// console.log(`[safeParseDate] Parsed with fallback: ${dateStr} -> ${parsedDate.toISOString()}`);
|
||||
return parsedDate;
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`[safeParseDate] Error parsing with fallback ${dateStr}:`, e);
|
||||
}
|
||||
|
||||
console.warn(`Failed to parse date string: ${dateStr}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function fetchAndParseCsv(
|
||||
url: string,
|
||||
username?: string,
|
||||
password?: string
|
||||
): Promise<Partial<SessionData>[]> {
|
||||
): Promise<RawSessionImport[]> {
|
||||
const authHeader =
|
||||
username && password
|
||||
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
|
||||
@ -443,56 +44,39 @@ export async function fetchAndParseCsv(
|
||||
const res = await fetch(url, {
|
||||
headers: authHeader ? { Authorization: authHeader } : {},
|
||||
});
|
||||
if (!res.ok) throw new Error("Failed to fetch CSV: " + res.statusText);
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Failed to fetch CSV: ${res.status} ${res.statusText}`);
|
||||
}
|
||||
|
||||
const text = await res.text();
|
||||
|
||||
// Parse without expecting headers, using known order
|
||||
const records: CSVRecord[] = parse(text, {
|
||||
// Parse CSV without headers, using positional column mapping
|
||||
const records: string[][] = parse(text, {
|
||||
delimiter: ",",
|
||||
columns: [
|
||||
"session_id",
|
||||
"start_time",
|
||||
"end_time",
|
||||
"ip_address",
|
||||
"country",
|
||||
"language",
|
||||
"messages_sent",
|
||||
"sentiment",
|
||||
"escalated",
|
||||
"forwarded_hr",
|
||||
"full_transcript_url",
|
||||
"avg_response_time",
|
||||
"tokens",
|
||||
"tokens_eur",
|
||||
"category",
|
||||
"initial_msg",
|
||||
],
|
||||
from_line: 1,
|
||||
from_line: 1, // Start from first line (no headers)
|
||||
relax_column_count: true,
|
||||
skip_empty_lines: true,
|
||||
trim: true,
|
||||
});
|
||||
|
||||
// Coerce types for relevant columns
|
||||
return records.map((r) => ({
|
||||
id: r.session_id,
|
||||
startTime: safeParseDate(r.start_time) || new Date(), // Fallback to current date if invalid
|
||||
endTime: safeParseDate(r.end_time),
|
||||
ipAddress: r.ip_address,
|
||||
country: getCountryCode(r.country),
|
||||
language: getLanguageCode(r.language),
|
||||
messagesSent: Number(r.messages_sent) || 0,
|
||||
sentiment: mapSentimentToScore(r.sentiment),
|
||||
escalated: isTruthyValue(r.escalated),
|
||||
forwardedHr: isTruthyValue(r.forwarded_hr),
|
||||
fullTranscriptUrl: r.full_transcript_url,
|
||||
avgResponseTime: r.avg_response_time
|
||||
? parseFloat(r.avg_response_time)
|
||||
: null,
|
||||
tokens: Number(r.tokens) || 0,
|
||||
tokensEur: r.tokens_eur ? parseFloat(r.tokens_eur) : 0,
|
||||
category: normalizeCategory(r.category),
|
||||
initialMsg: r.initial_msg,
|
||||
// Map CSV columns by position to SessionImport fields
|
||||
return records.map((row) => ({
|
||||
externalSessionId: row[0] || "",
|
||||
startTimeRaw: row[1] || "",
|
||||
endTimeRaw: row[2] || "",
|
||||
ipAddress: row[3] || null,
|
||||
countryCode: row[4] || null,
|
||||
language: row[5] || null,
|
||||
messagesSent: row[6] ? parseInt(row[6], 10) || null : null,
|
||||
sentimentRaw: row[7] || null,
|
||||
escalatedRaw: row[8] || null,
|
||||
forwardedHrRaw: row[9] || null,
|
||||
fullTranscriptUrl: row[10] || null,
|
||||
avgResponseTimeSeconds: row[11] ? parseFloat(row[11]) || null : null,
|
||||
tokens: row[12] ? parseInt(row[12], 10) || null : null,
|
||||
tokensEur: row[13] ? parseFloat(row[13]) || null : null,
|
||||
category: row[14] || null,
|
||||
initialMessage: row[15] || null,
|
||||
}));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user