mirror of
https://github.com/kjanat/livedash-node.git
synced 2026-01-16 09:52:09 +01:00
Adds functions to normalize language and category values from the CSV data, mapping variations to standard names and groups. This improves data consistency and enables more accurate analysis and reporting.
362 lines
8.2 KiB
TypeScript
362 lines
8.2 KiB
TypeScript
// Fetches, parses, and returns chat session data for a company from a CSV URL
|
|
import fetch from "node-fetch";
|
|
import { parse } from "csv-parse/sync";
|
|
|
|
// This type is used internally for parsing the CSV records
|
|
interface CSVRecord {
|
|
session_id: string;
|
|
start_time: string;
|
|
end_time?: string;
|
|
ip_address?: string;
|
|
country?: string;
|
|
language?: string;
|
|
messages_sent?: string;
|
|
sentiment?: string;
|
|
escalated?: string;
|
|
forwarded_hr?: string;
|
|
full_transcript_url?: string;
|
|
avg_response_time?: string;
|
|
tokens?: string;
|
|
tokens_eur?: string;
|
|
category?: string;
|
|
initial_msg?: string;
|
|
[key: string]: string | undefined;
|
|
}
|
|
|
|
interface SessionData {
|
|
id: string;
|
|
sessionId: string;
|
|
startTime: Date;
|
|
endTime: Date | null;
|
|
ipAddress?: string;
|
|
country?: string;
|
|
language?: string | null;
|
|
messagesSent: number;
|
|
sentiment: number | null;
|
|
escalated: boolean;
|
|
forwardedHr: boolean;
|
|
fullTranscriptUrl?: string | null;
|
|
avgResponseTime: number | null;
|
|
tokens: number;
|
|
tokensEur: number;
|
|
category?: string | null;
|
|
initialMsg?: string;
|
|
}
|
|
|
|
/**
|
|
* Normalizes language values to a standard set
|
|
* @param languageStr The raw language string from CSV
|
|
* @returns A normalized language string
|
|
*/
|
|
function normalizeLanguage(languageStr?: string): string | null {
|
|
if (!languageStr) return null;
|
|
|
|
const normalized = languageStr.toLowerCase().trim();
|
|
|
|
// Map of language variations to standard names
|
|
const languageMap: Record<string, string> = {
|
|
// English variations
|
|
english: "English",
|
|
en: "English",
|
|
eng: "English",
|
|
|
|
// Dutch variations
|
|
dutch: "Dutch",
|
|
nederlands: "Dutch",
|
|
nl: "Dutch",
|
|
nederland: "Dutch",
|
|
netherland: "Dutch",
|
|
netherlands: "Dutch",
|
|
hollands: "Dutch",
|
|
niederländisch: "Dutch",
|
|
nizozemski: "Dutch",
|
|
|
|
// Other languages that might appear
|
|
bosnian: "Bosnian",
|
|
bs: "Bosnian",
|
|
turkish: "Turkish",
|
|
tr: "Turkish",
|
|
turks: "Turkish",
|
|
german: "German",
|
|
de: "German",
|
|
duits: "German",
|
|
french: "French",
|
|
fr: "French",
|
|
frans: "French",
|
|
spanish: "Spanish",
|
|
es: "Spanish",
|
|
spaans: "Spanish",
|
|
};
|
|
|
|
return languageMap[normalized] || "Other";
|
|
}
|
|
|
|
/**
|
|
* Normalizes category values to standard groups
|
|
* @param categoryStr The raw category string from CSV
|
|
* @returns A normalized category string
|
|
*/
|
|
function normalizeCategory(categoryStr?: string): string | null {
|
|
if (!categoryStr) return null;
|
|
|
|
const normalized = categoryStr.toLowerCase().trim();
|
|
|
|
// Define category groups using keywords
|
|
const categoryMapping: Record<string, string[]> = {
|
|
"Onboarding": [
|
|
"onboarding",
|
|
"start",
|
|
"begin",
|
|
"new",
|
|
"orientation",
|
|
"welcome",
|
|
"intro",
|
|
"getting started",
|
|
"documents",
|
|
"documenten",
|
|
"first day",
|
|
"eerste dag",
|
|
],
|
|
"General Information": [
|
|
"general",
|
|
"algemeen",
|
|
"info",
|
|
"information",
|
|
"informatie",
|
|
"question",
|
|
"vraag",
|
|
"inquiry",
|
|
"chat",
|
|
"conversation",
|
|
"gesprek",
|
|
"talk",
|
|
],
|
|
"Greeting": [
|
|
"greeting",
|
|
"greet",
|
|
"hello",
|
|
"hi",
|
|
"hey",
|
|
"welcome",
|
|
"hallo",
|
|
"hoi",
|
|
"greetings",
|
|
],
|
|
"HR & Payroll": [
|
|
"salary",
|
|
"salaris",
|
|
"pay",
|
|
"payroll",
|
|
"loon",
|
|
"loonstrook",
|
|
"hr",
|
|
"human resources",
|
|
"benefits",
|
|
"vacation",
|
|
"leave",
|
|
"verlof",
|
|
"maaltijdvergoeding",
|
|
"vergoeding",
|
|
],
|
|
"Schedules & Hours": [
|
|
"schedule",
|
|
"hours",
|
|
"tijd",
|
|
"time",
|
|
"roster",
|
|
"rooster",
|
|
"planning",
|
|
"shift",
|
|
"dienst",
|
|
"working hours",
|
|
"werktijden",
|
|
"openingstijden",
|
|
],
|
|
"Role & Responsibilities": [
|
|
"role",
|
|
"job",
|
|
"function",
|
|
"functie",
|
|
"task",
|
|
"taak",
|
|
"responsibilities",
|
|
"leidinggevende",
|
|
"manager",
|
|
"teamleider",
|
|
"supervisor",
|
|
"team",
|
|
"lead",
|
|
],
|
|
"Technical Support": [
|
|
"technical",
|
|
"tech",
|
|
"support",
|
|
"laptop",
|
|
"computer",
|
|
"system",
|
|
"systeem",
|
|
"it",
|
|
"software",
|
|
"hardware",
|
|
],
|
|
"Offboarding": [
|
|
"offboarding",
|
|
"leave",
|
|
"exit",
|
|
"quit",
|
|
"resign",
|
|
"resignation",
|
|
"ontslag",
|
|
"vertrek",
|
|
"afsluiting",
|
|
],
|
|
};
|
|
|
|
// Try to match the category using keywords
|
|
for (const [category, keywords] of Object.entries(categoryMapping)) {
|
|
if (keywords.some((keyword) => normalized.includes(keyword))) {
|
|
return category;
|
|
}
|
|
}
|
|
|
|
// If no match, return "Other"
|
|
return "Other";
|
|
}
|
|
|
|
/**
|
|
* Converts sentiment string values to numeric scores
|
|
* @param sentimentStr The sentiment string from the CSV
|
|
* @returns A numeric score representing the sentiment
|
|
*/
|
|
function mapSentimentToScore(sentimentStr?: string): number | null {
|
|
if (!sentimentStr) return null;
|
|
|
|
// Convert to lowercase for case-insensitive matching
|
|
const sentiment = sentimentStr.toLowerCase();
|
|
|
|
// Map sentiment strings to numeric values on a scale from -1 to 2
|
|
const sentimentMap: Record<string, number> = {
|
|
happy: 1.0,
|
|
excited: 1.5,
|
|
positive: 0.8,
|
|
neutral: 0.0,
|
|
playful: 0.7,
|
|
negative: -0.8,
|
|
angry: -1.0,
|
|
sad: -0.7,
|
|
frustrated: -0.9,
|
|
positief: 0.8, // Dutch
|
|
neutraal: 0.0, // Dutch
|
|
negatief: -0.8, // Dutch
|
|
positivo: 0.8, // Spanish/Italian
|
|
neutro: 0.0, // Spanish/Italian
|
|
negativo: -0.8, // Spanish/Italian
|
|
yes: 0.5, // For any "yes" sentiment
|
|
no: -0.5, // For any "no" sentiment
|
|
};
|
|
|
|
return sentimentMap[sentiment] !== undefined
|
|
? sentimentMap[sentiment]
|
|
: isNaN(parseFloat(sentiment))
|
|
? null
|
|
: parseFloat(sentiment);
|
|
}
|
|
|
|
/**
|
|
* Checks if a string value should be considered as boolean true
|
|
* @param value The string value to check
|
|
* @returns True if the string indicates a positive/true value
|
|
*/
|
|
function isTruthyValue(value?: string): boolean {
|
|
if (!value) return false;
|
|
|
|
const truthyValues = [
|
|
"1",
|
|
"true",
|
|
"yes",
|
|
"y",
|
|
"ja",
|
|
"si",
|
|
"oui",
|
|
"да",
|
|
"да",
|
|
"はい",
|
|
];
|
|
|
|
return truthyValues.includes(value.toLowerCase());
|
|
}
|
|
|
|
export async function fetchAndParseCsv(
|
|
url: string,
|
|
username?: string,
|
|
password?: string,
|
|
): Promise<Partial<SessionData>[]> {
|
|
const authHeader =
|
|
username && password
|
|
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
|
|
: undefined;
|
|
|
|
const res = await fetch(url, {
|
|
headers: authHeader ? { Authorization: authHeader } : {},
|
|
});
|
|
if (!res.ok) throw new Error("Failed to fetch CSV: " + res.statusText);
|
|
|
|
const text = await res.text();
|
|
|
|
// Parse without expecting headers, using known order
|
|
const records: CSVRecord[] = parse(text, {
|
|
delimiter: ",",
|
|
columns: [
|
|
"session_id",
|
|
"start_time",
|
|
"end_time",
|
|
"ip_address",
|
|
"country",
|
|
"language",
|
|
"messages_sent",
|
|
"sentiment",
|
|
"escalated",
|
|
"forwarded_hr",
|
|
"full_transcript_url",
|
|
"avg_response_time",
|
|
"tokens",
|
|
"tokens_eur",
|
|
"category",
|
|
"initial_msg",
|
|
],
|
|
from_line: 1,
|
|
relax_column_count: true,
|
|
skip_empty_lines: true,
|
|
trim: true,
|
|
});
|
|
|
|
// Helper function to safely parse dates
|
|
function safeParseDate(dateStr?: string): Date | null {
|
|
if (!dateStr) return null;
|
|
const date = new Date(dateStr);
|
|
return !isNaN(date.getTime()) ? date : null;
|
|
}
|
|
|
|
// Coerce types for relevant columns
|
|
return records.map((r) => ({
|
|
id: r.session_id,
|
|
startTime: safeParseDate(r.start_time) || new Date(), // Fallback to current date if invalid
|
|
endTime: safeParseDate(r.end_time),
|
|
ipAddress: r.ip_address,
|
|
country: r.country,
|
|
language: normalizeLanguage(r.language),
|
|
messagesSent: Number(r.messages_sent) || 0,
|
|
sentiment: mapSentimentToScore(r.sentiment),
|
|
escalated: isTruthyValue(r.escalated),
|
|
forwardedHr: isTruthyValue(r.forwarded_hr),
|
|
fullTranscriptUrl: r.full_transcript_url,
|
|
avgResponseTime: r.avg_response_time
|
|
? parseFloat(r.avg_response_time)
|
|
: null,
|
|
tokens: Number(r.tokens) || 0,
|
|
tokensEur: r.tokens_eur ? parseFloat(r.tokens_eur) : 0,
|
|
category: normalizeCategory(r.category),
|
|
initialMsg: r.initial_msg,
|
|
}));
|
|
}
|