mirror of
https://github.com/kjanat/livedash-node.git
synced 2026-01-16 11:12:11 +01:00
- Updated formatting in SessionDetails component for better readability. - Enhanced documentation in scheduler-fixes.md to clarify issues and solutions. - Improved error handling and logging in csvFetcher.js and processingScheduler.js. - Standardized code formatting across various scripts and components for consistency. - Added validation checks for CSV URLs and transcript content to prevent processing errors. - Enhanced logging messages for better tracking of processing status and errors.
645 lines
17 KiB
JavaScript
645 lines
17 KiB
JavaScript
// JavaScript version of csvFetcher with session storage functionality
|
|
import fetch from "node-fetch";
|
|
import { parse } from "csv-parse/sync";
|
|
import ISO6391 from "iso-639-1";
|
|
import countries from "i18n-iso-countries";
|
|
import { PrismaClient } from "@prisma/client";
|
|
|
|
// Register locales for i18n-iso-countries
|
|
import enLocale from "i18n-iso-countries/langs/en.json" with { type: "json" };
|
|
countries.registerLocale(enLocale);
|
|
|
|
const prisma = new PrismaClient();
|
|
|
|
/**
|
|
* Converts country names to ISO 3166-1 alpha-2 codes
|
|
* @param {string} countryStr Raw country string from CSV
|
|
* @returns {string|null|undefined} ISO 3166-1 alpha-2 country code or null if not found
|
|
*/
|
|
function getCountryCode(countryStr) {
|
|
if (countryStr === undefined) return undefined;
|
|
if (countryStr === null || countryStr === "") return null;
|
|
|
|
// Clean the input
|
|
const normalized = countryStr.trim();
|
|
if (!normalized) return null;
|
|
|
|
// Direct ISO code check (if already a 2-letter code)
|
|
if (normalized.length === 2 && normalized === normalized.toUpperCase()) {
|
|
return countries.isValid(normalized) ? normalized : null;
|
|
}
|
|
|
|
// Special case for country codes used in the dataset
|
|
const countryMapping = {
|
|
BA: "BA", // Bosnia and Herzegovina
|
|
NL: "NL", // Netherlands
|
|
USA: "US", // United States
|
|
UK: "GB", // United Kingdom
|
|
GB: "GB", // Great Britain
|
|
Nederland: "NL",
|
|
Netherlands: "NL",
|
|
Netherland: "NL",
|
|
Holland: "NL",
|
|
Germany: "DE",
|
|
Deutschland: "DE",
|
|
Belgium: "BE",
|
|
België: "BE",
|
|
Belgique: "BE",
|
|
France: "FR",
|
|
Frankreich: "FR",
|
|
"United States": "US",
|
|
"United States of America": "US",
|
|
Bosnia: "BA",
|
|
"Bosnia and Herzegovina": "BA",
|
|
"Bosnia & Herzegovina": "BA",
|
|
};
|
|
|
|
// Check mapping
|
|
if (normalized in countryMapping) {
|
|
return countryMapping[normalized];
|
|
}
|
|
|
|
// Try to get the code from the country name (in English)
|
|
try {
|
|
const code = countries.getAlpha2Code(normalized, "en");
|
|
if (code) return code;
|
|
} catch (error) {
|
|
process.stderr.write(
|
|
`[CSV] Error converting country name to code: ${normalized} - ${error}\n`
|
|
);
|
|
}
|
|
|
|
// If all else fails, return null
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Converts language names to ISO 639-1 codes
|
|
* @param {string} languageStr Raw language string from CSV
|
|
* @returns {string|null|undefined} ISO 639-1 language code or null if not found
|
|
*/
|
|
function getLanguageCode(languageStr) {
|
|
if (languageStr === undefined) return undefined;
|
|
if (languageStr === null || languageStr === "") return null;
|
|
|
|
// Clean the input
|
|
const normalized = languageStr.trim();
|
|
if (!normalized) return null;
|
|
|
|
// Direct ISO code check (if already a 2-letter code)
|
|
if (normalized.length === 2 && normalized === normalized.toLowerCase()) {
|
|
return ISO6391.validate(normalized) ? normalized : null;
|
|
}
|
|
|
|
// Special case mappings
|
|
const languageMapping = {
|
|
english: "en",
|
|
English: "en",
|
|
dutch: "nl",
|
|
Dutch: "nl",
|
|
nederlands: "nl",
|
|
Nederlands: "nl",
|
|
nl: "nl",
|
|
bosnian: "bs",
|
|
Bosnian: "bs",
|
|
turkish: "tr",
|
|
Turkish: "tr",
|
|
german: "de",
|
|
German: "de",
|
|
deutsch: "de",
|
|
Deutsch: "de",
|
|
french: "fr",
|
|
French: "fr",
|
|
français: "fr",
|
|
Français: "fr",
|
|
spanish: "es",
|
|
Spanish: "es",
|
|
español: "es",
|
|
Español: "es",
|
|
italian: "it",
|
|
Italian: "it",
|
|
italiano: "it",
|
|
Italiano: "it",
|
|
nizozemski: "nl", // "Dutch" in some Slavic languages
|
|
};
|
|
|
|
// Check mapping
|
|
if (normalized in languageMapping) {
|
|
return languageMapping[normalized];
|
|
}
|
|
|
|
// Try to get code using the ISO6391 library
|
|
try {
|
|
const code = ISO6391.getCode(normalized);
|
|
if (code) return code;
|
|
} catch (error) {
|
|
process.stderr.write(
|
|
`[CSV] Error converting language name to code: ${normalized} - ${error}\n`
|
|
);
|
|
}
|
|
// If all else fails, return null
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Normalizes category values to standard groups
|
|
* @param {string} categoryStr The raw category string from CSV
|
|
* @returns {string|null} A normalized category string
|
|
*/
|
|
function normalizeCategory(categoryStr) {
|
|
if (!categoryStr) return null;
|
|
|
|
const normalized = categoryStr.toLowerCase().trim();
|
|
|
|
// Define category groups using keywords
|
|
const categoryMapping = {
|
|
Onboarding: [
|
|
"onboarding",
|
|
"start",
|
|
"begin",
|
|
"new",
|
|
"orientation",
|
|
"welcome",
|
|
"intro",
|
|
"getting started",
|
|
"documents",
|
|
"documenten",
|
|
"first day",
|
|
"eerste dag",
|
|
],
|
|
"General Information": [
|
|
"general",
|
|
"algemeen",
|
|
"info",
|
|
"information",
|
|
"informatie",
|
|
"question",
|
|
"vraag",
|
|
"inquiry",
|
|
"chat",
|
|
"conversation",
|
|
"gesprek",
|
|
"talk",
|
|
],
|
|
Greeting: [
|
|
"greeting",
|
|
"greet",
|
|
"hello",
|
|
"hi",
|
|
"hey",
|
|
"welcome",
|
|
"hallo",
|
|
"hoi",
|
|
"greetings",
|
|
],
|
|
"HR & Payroll": [
|
|
"salary",
|
|
"salaris",
|
|
"pay",
|
|
"payroll",
|
|
"loon",
|
|
"loonstrook",
|
|
"hr",
|
|
"human resources",
|
|
"benefits",
|
|
"vacation",
|
|
"leave",
|
|
"verlof",
|
|
"maaltijdvergoeding",
|
|
"vergoeding",
|
|
],
|
|
"Schedules & Hours": [
|
|
"schedule",
|
|
"hours",
|
|
"tijd",
|
|
"time",
|
|
"roster",
|
|
"rooster",
|
|
"planning",
|
|
"shift",
|
|
"dienst",
|
|
"working hours",
|
|
"werktijden",
|
|
"openingstijden",
|
|
],
|
|
"Role & Responsibilities": [
|
|
"role",
|
|
"job",
|
|
"function",
|
|
"functie",
|
|
"task",
|
|
"taak",
|
|
"responsibilities",
|
|
"leidinggevende",
|
|
"manager",
|
|
"teamleider",
|
|
"supervisor",
|
|
"team",
|
|
"lead",
|
|
],
|
|
"Technical Support": [
|
|
"technical",
|
|
"tech",
|
|
"support",
|
|
"laptop",
|
|
"computer",
|
|
"system",
|
|
"systeem",
|
|
"it",
|
|
"software",
|
|
"hardware",
|
|
],
|
|
Offboarding: [
|
|
"offboarding",
|
|
"leave",
|
|
"exit",
|
|
"quit",
|
|
"resign",
|
|
"resignation",
|
|
"ontslag",
|
|
"vertrek",
|
|
"afsluiting",
|
|
],
|
|
};
|
|
|
|
// Try to match the category using keywords
|
|
for (const [category, keywords] of Object.entries(categoryMapping)) {
|
|
if (keywords.some((keyword) => normalized.includes(keyword))) {
|
|
return category;
|
|
}
|
|
}
|
|
|
|
// If no match, return "Other"
|
|
return "Other";
|
|
}
|
|
|
|
/**
|
|
* Converts sentiment string values to numeric scores
|
|
* @param {string} sentimentStr The sentiment string from the CSV
|
|
* @returns {number|null} A numeric score representing the sentiment
|
|
*/
|
|
function mapSentimentToScore(sentimentStr) {
|
|
if (!sentimentStr) return null;
|
|
|
|
// Convert to lowercase for case-insensitive matching
|
|
const sentiment = sentimentStr.toLowerCase();
|
|
|
|
// Map sentiment strings to numeric values on a scale from -1 to 2
|
|
const sentimentMap = {
|
|
happy: 1.0,
|
|
excited: 1.5,
|
|
positive: 0.8,
|
|
neutral: 0.0,
|
|
playful: 0.7,
|
|
negative: -0.8,
|
|
angry: -1.0,
|
|
sad: -0.7,
|
|
frustrated: -0.9,
|
|
positief: 0.8, // Dutch
|
|
neutraal: 0.0, // Dutch
|
|
negatief: -0.8, // Dutch
|
|
positivo: 0.8, // Spanish/Italian
|
|
neutro: 0.0, // Spanish/Italian
|
|
negativo: -0.8, // Spanish/Italian
|
|
yes: 0.5, // For any "yes" sentiment
|
|
no: -0.5, // For any "no" sentiment
|
|
};
|
|
|
|
return sentimentMap[sentiment] !== undefined
|
|
? sentimentMap[sentiment]
|
|
: isNaN(parseFloat(sentiment))
|
|
? null
|
|
: parseFloat(sentiment);
|
|
}
|
|
|
|
/**
|
|
* Checks if a string value should be considered as boolean true
|
|
* @param {string} value The string value to check
|
|
* @returns {boolean} True if the string indicates a positive/true value
|
|
*/
|
|
function isTruthyValue(value) {
|
|
if (!value) return false;
|
|
|
|
const truthyValues = [
|
|
"1",
|
|
"true",
|
|
"yes",
|
|
"y",
|
|
"ja",
|
|
"si",
|
|
"oui",
|
|
"да",
|
|
"да",
|
|
"はい",
|
|
];
|
|
|
|
return truthyValues.includes(value.toLowerCase());
|
|
}
|
|
|
|
/**
|
|
* Safely parses a date string into a Date object.
|
|
* @param {string} dateStr The date string to parse.
|
|
* @returns {Date|null} A Date object or null if parsing fails.
|
|
*/
|
|
function safeParseDate(dateStr) {
|
|
if (!dateStr) return null;
|
|
|
|
// Try to parse D-M-YYYY HH:MM:SS format (with hyphens or dots)
|
|
const dateTimeRegex =
|
|
/^(\d{1,2})[.-](\d{1,2})[.-](\d{4}) (\d{1,2}):(\d{1,2}):(\d{1,2})$/;
|
|
const match = dateStr.match(dateTimeRegex);
|
|
|
|
if (match) {
|
|
const day = match[1];
|
|
const month = match[2];
|
|
const year = match[3];
|
|
const hour = match[4];
|
|
const minute = match[5];
|
|
const second = match[6];
|
|
|
|
// Reformat to YYYY-MM-DDTHH:MM:SS (ISO-like, but local time)
|
|
// Ensure month and day are two digits
|
|
const formattedDateStr = `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}T${hour.padStart(2, "0")}:${minute.padStart(2, "0")}:${second.padStart(2, "0")}`;
|
|
|
|
try {
|
|
const date = new Date(formattedDateStr);
|
|
// Basic validation: check if the constructed date is valid
|
|
if (!isNaN(date.getTime())) {
|
|
return date;
|
|
}
|
|
} catch (e) {
|
|
console.warn(
|
|
`[safeParseDate] Error parsing reformatted string ${formattedDateStr} from ${dateStr}:`,
|
|
e
|
|
);
|
|
}
|
|
}
|
|
|
|
// Fallback for other potential formats (e.g., direct ISO 8601) or if the primary parse failed
|
|
try {
|
|
const parsedDate = new Date(dateStr);
|
|
if (!isNaN(parsedDate.getTime())) {
|
|
return parsedDate;
|
|
}
|
|
} catch (e) {
|
|
console.warn(`[safeParseDate] Error parsing with fallback ${dateStr}:`, e);
|
|
}
|
|
|
|
console.warn(`Failed to parse date string: ${dateStr}`);
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Fetches transcript content from a URL
|
|
* @param {string} url The URL to fetch the transcript from
|
|
* @param {string} username Optional username for authentication
|
|
* @param {string} password Optional password for authentication
|
|
* @returns {Promise<string|null>} The transcript content or null if fetching fails
|
|
*/
|
|
async function fetchTranscriptContent(url, username, password) {
|
|
try {
|
|
const authHeader =
|
|
username && password
|
|
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
|
|
: undefined;
|
|
|
|
const response = await fetch(url, {
|
|
headers: authHeader ? { Authorization: authHeader } : {},
|
|
timeout: 10000, // 10 second timeout
|
|
});
|
|
|
|
if (!response.ok) {
|
|
// Only log error once per batch, not for every transcript
|
|
if (Math.random() < 0.1) {
|
|
// Log ~10% of errors to avoid spam
|
|
console.warn(
|
|
`[CSV] Transcript fetch failed for ${url}: ${response.status} ${response.statusText}`
|
|
);
|
|
}
|
|
return null;
|
|
}
|
|
return await response.text();
|
|
} catch (error) {
|
|
// Only log error once per batch, not for every transcript
|
|
if (Math.random() < 0.1) {
|
|
// Log ~10% of errors to avoid spam
|
|
console.warn(`[CSV] Transcript fetch error for ${url}:`, error.message);
|
|
}
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetches and parses CSV data from a URL
|
|
* @param {string} url The CSV URL
|
|
* @param {string} username Optional username for authentication
|
|
* @param {string} password Optional password for authentication
|
|
* @returns {Promise<Object[]>} Array of parsed session data
|
|
*/
|
|
export async function fetchAndParseCsv(url, username, password) {
|
|
const authHeader =
|
|
username && password
|
|
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
|
|
: undefined;
|
|
|
|
const res = await fetch(url, {
|
|
headers: authHeader ? { Authorization: authHeader } : {},
|
|
});
|
|
if (!res.ok) throw new Error("Failed to fetch CSV: " + res.statusText);
|
|
|
|
const text = await res.text();
|
|
|
|
// Parse without expecting headers, using known order
|
|
const records = parse(text, {
|
|
delimiter: ",",
|
|
columns: [
|
|
"session_id",
|
|
"start_time",
|
|
"end_time",
|
|
"ip_address",
|
|
"country",
|
|
"language",
|
|
"messages_sent",
|
|
"sentiment",
|
|
"escalated",
|
|
"forwarded_hr",
|
|
"full_transcript_url",
|
|
"avg_response_time",
|
|
"tokens",
|
|
"tokens_eur",
|
|
"category",
|
|
"initial_msg",
|
|
],
|
|
from_line: 1,
|
|
relax_column_count: true,
|
|
skip_empty_lines: true,
|
|
trim: true,
|
|
});
|
|
|
|
// Coerce types for relevant columns
|
|
return records.map((r) => ({
|
|
id: r.session_id,
|
|
startTime: safeParseDate(r.start_time) || new Date(), // Fallback to current date if invalid
|
|
endTime: safeParseDate(r.end_time),
|
|
ipAddress: r.ip_address,
|
|
country: getCountryCode(r.country),
|
|
language: getLanguageCode(r.language),
|
|
messagesSent: Number(r.messages_sent) || 0,
|
|
sentiment: mapSentimentToScore(r.sentiment),
|
|
escalated: isTruthyValue(r.escalated),
|
|
forwardedHr: isTruthyValue(r.forwarded_hr),
|
|
fullTranscriptUrl: r.full_transcript_url,
|
|
avgResponseTime: r.avg_response_time
|
|
? parseFloat(r.avg_response_time)
|
|
: null,
|
|
tokens: Number(r.tokens) || 0,
|
|
tokensEur: r.tokens_eur ? parseFloat(r.tokens_eur) : 0,
|
|
category: normalizeCategory(r.category),
|
|
initialMsg: r.initial_msg,
|
|
}));
|
|
}
|
|
|
|
/**
|
|
* Fetches and stores sessions for all companies
|
|
*/
|
|
export async function fetchAndStoreSessionsForAllCompanies() {
|
|
try {
|
|
// Get all companies
|
|
const companies = await prisma.company.findMany();
|
|
|
|
for (const company of companies) {
|
|
if (!company.csvUrl) {
|
|
console.log(
|
|
`[Scheduler] Skipping company ${company.id} - no CSV URL configured`
|
|
);
|
|
continue;
|
|
}
|
|
|
|
// Skip companies with invalid/example URLs
|
|
if (
|
|
company.csvUrl.includes("example.com") ||
|
|
company.csvUrl === "https://example.com/data.csv"
|
|
) {
|
|
console.log(
|
|
`[Scheduler] Skipping company ${company.id} - invalid/example CSV URL: ${company.csvUrl}`
|
|
);
|
|
continue;
|
|
}
|
|
|
|
console.log(`[Scheduler] Processing sessions for company: ${company.id}`);
|
|
|
|
try {
|
|
const sessions = await fetchAndParseCsv(
|
|
company.csvUrl,
|
|
company.csvUsername,
|
|
company.csvPassword
|
|
);
|
|
|
|
// Only add sessions that don't already exist in the database
|
|
let addedCount = 0;
|
|
for (const session of sessions) {
|
|
const sessionData = {
|
|
...session,
|
|
companyId: company.id,
|
|
id:
|
|
session.id ||
|
|
session.sessionId ||
|
|
`sess_${Date.now()}_${Math.random().toString(36).substring(2, 7)}`,
|
|
// Ensure startTime is not undefined
|
|
startTime: session.startTime || new Date(),
|
|
};
|
|
|
|
// Validate dates to prevent "Invalid Date" errors
|
|
const startTime =
|
|
sessionData.startTime instanceof Date &&
|
|
!isNaN(sessionData.startTime.getTime())
|
|
? sessionData.startTime
|
|
: new Date();
|
|
|
|
const endTime =
|
|
session.endTime instanceof Date && !isNaN(session.endTime.getTime())
|
|
? session.endTime
|
|
: new Date();
|
|
|
|
// Fetch transcript content if URL is available
|
|
let transcriptContent = null;
|
|
if (session.fullTranscriptUrl) {
|
|
transcriptContent = await fetchTranscriptContent(
|
|
session.fullTranscriptUrl,
|
|
company.csvUsername,
|
|
company.csvPassword
|
|
);
|
|
}
|
|
|
|
// Check if the session already exists
|
|
const existingSession = await prisma.session.findUnique({
|
|
where: { id: sessionData.id },
|
|
});
|
|
|
|
if (existingSession) {
|
|
// Skip this session as it already exists
|
|
continue;
|
|
}
|
|
|
|
// Only include fields that are properly typed for Prisma
|
|
await prisma.session.create({
|
|
data: {
|
|
id: sessionData.id,
|
|
companyId: sessionData.companyId,
|
|
startTime: startTime,
|
|
endTime: endTime,
|
|
ipAddress: session.ipAddress || null,
|
|
country: session.country || null,
|
|
language: session.language || null,
|
|
messagesSent:
|
|
typeof session.messagesSent === "number"
|
|
? session.messagesSent
|
|
: 0,
|
|
sentiment:
|
|
typeof session.sentiment === "number"
|
|
? session.sentiment
|
|
: null,
|
|
escalated:
|
|
typeof session.escalated === "boolean"
|
|
? session.escalated
|
|
: null,
|
|
forwardedHr:
|
|
typeof session.forwardedHr === "boolean"
|
|
? session.forwardedHr
|
|
: null,
|
|
fullTranscriptUrl: session.fullTranscriptUrl || null,
|
|
transcriptContent: transcriptContent, // Add the transcript content
|
|
avgResponseTime:
|
|
typeof session.avgResponseTime === "number"
|
|
? session.avgResponseTime
|
|
: null,
|
|
tokens:
|
|
typeof session.tokens === "number" ? session.tokens : null,
|
|
tokensEur:
|
|
typeof session.tokensEur === "number"
|
|
? session.tokensEur
|
|
: null,
|
|
category: session.category || null,
|
|
initialMsg: session.initialMsg || null,
|
|
},
|
|
});
|
|
|
|
addedCount++;
|
|
}
|
|
|
|
console.log(
|
|
`[Scheduler] Added ${addedCount} new sessions for company ${company.id}`
|
|
);
|
|
} catch (error) {
|
|
console.error(
|
|
`[Scheduler] Error processing company ${company.id}:`,
|
|
error
|
|
);
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error("[Scheduler] Error fetching companies:", error);
|
|
throw error;
|
|
}
|
|
}
|