From 4db0104e2cffd29cdcd49d97a89b3c09d9d0d51e Mon Sep 17 00:00:00 2001 From: Kaj Kowalski Date: Thu, 22 May 2025 00:30:01 +0200 Subject: [PATCH] Improves CSV data parsing and normalization Normalizes data from CSV files by mapping sentiment strings to numeric scores and standardizing boolean values. This change enhances data consistency and accuracy, ensuring reliable data processing for sentiment analysis and boolean evaluations. It also handles multiple languages for sentiment strings. --- lib/csvFetcher.ts | 58 +++++++++++++++++++++++++++-- pages/api/admin/refresh-sessions.ts | 21 ++++++++--- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/lib/csvFetcher.ts b/lib/csvFetcher.ts index 95c9adb..8875126 100644 --- a/lib/csvFetcher.ts +++ b/lib/csvFetcher.ts @@ -43,6 +43,58 @@ interface SessionData { initialMsg?: string; } +/** + * Converts sentiment string values to numeric scores + * @param sentimentStr The sentiment string from the CSV + * @returns A numeric score representing the sentiment + */ +function mapSentimentToScore(sentimentStr?: string): number | null { + if (!sentimentStr) return null; + + // Convert to lowercase for case-insensitive matching + const sentiment = sentimentStr.toLowerCase(); + + // Map sentiment strings to numeric values on a scale from -1 to 2 + const sentimentMap: Record = { + 'happy': 1.0, + 'excited': 1.5, + 'positive': 0.8, + 'neutral': 0.0, + 'playful': 0.7, + 'negative': -0.8, + 'angry': -1.0, + 'sad': -0.7, + 'frustrated': -0.9, + 'positief': 0.8, // Dutch + 'neutraal': 0.0, // Dutch + 'negatief': -0.8, // Dutch + 'positivo': 0.8, // Spanish/Italian + 'neutro': 0.0, // Spanish/Italian + 'negativo': -0.8, // Spanish/Italian + 'yes': 0.5, // For any "yes" sentiment + 'no': -0.5, // For any "no" sentiment + }; + + return sentimentMap[sentiment] !== undefined + ? sentimentMap[sentiment] + : isNaN(parseFloat(sentiment)) ? null : parseFloat(sentiment); +} + +/** + * Checks if a string value should be considered as boolean true + * @param value The string value to check + * @returns True if the string indicates a positive/true value + */ +function isTruthyValue(value?: string): boolean { + if (!value) return false; + + const truthyValues = [ + '1', 'true', 'yes', 'y', 'ja', 'si', 'oui', 'да', 'да', 'はい' + ]; + + return truthyValues.includes(value.toLowerCase()); +} + export async function fetchAndParseCsv( url: string, username?: string, @@ -103,9 +155,9 @@ export async function fetchAndParseCsv( country: r.country, language: r.language, messagesSent: Number(r.messages_sent) || 0, - sentiment: r.sentiment ? parseFloat(r.sentiment) : null, - escalated: r.escalated === "1" || r.escalated === "true", - forwardedHr: r.forwarded_hr === "1" || r.forwarded_hr === "true", + sentiment: mapSentimentToScore(r.sentiment), + escalated: isTruthyValue(r.escalated), + forwardedHr: isTruthyValue(r.forwarded_hr), fullTranscriptUrl: r.full_transcript_url, avgResponseTime: r.avg_response_time ? parseFloat(r.avg_response_time) diff --git a/pages/api/admin/refresh-sessions.ts b/pages/api/admin/refresh-sessions.ts index 9abe6a4..36e11a1 100644 --- a/pages/api/admin/refresh-sessions.ts +++ b/pages/api/admin/refresh-sessions.ts @@ -87,17 +87,28 @@ export default async function handler( data: { id: sessionData.id, companyId: sessionData.companyId, - startTime: startTime, - // endTime is required in the schema, so use valid startTime if not available + startTime: startTime, endTime: endTime, ipAddress: session.ipAddress || null, country: session.country || null, - language: session.language || null, - sentiment: - typeof session.sentiment === "number" ? session.sentiment : null, + language: session.language || null, messagesSent: typeof session.messagesSent === "number" ? session.messagesSent : 0, + sentiment: + typeof session.sentiment === "number" ? session.sentiment : null, + escalated: + typeof session.escalated === "boolean" ? session.escalated : null, + forwardedHr: + typeof session.forwardedHr === "boolean" ? session.forwardedHr : null, + fullTranscriptUrl: session.fullTranscriptUrl || null, + avgResponseTime: + typeof session.avgResponseTime === "number" ? session.avgResponseTime : null, + tokens: + typeof session.tokens === "number" ? session.tokens : null, + tokensEur: + typeof session.tokensEur === "number" ? session.tokensEur : null, category: session.category || null, + initialMsg: session.initialMsg || null, }, }); }