feat: Refactor sentiment handling and enhance processing logic for session data

This commit is contained in:
Max Kowalski
2025-06-26 21:14:24 +02:00
parent 653d70022b
commit 8774a1f155
11 changed files with 195 additions and 280 deletions

View File

@ -35,10 +35,10 @@ interface SessionData {
startTime: Date;
endTime: Date | null;
ipAddress?: string;
country?: string | null; // Will store ISO 3166-1 alpha-2 country code or null/undefined
language?: string | null; // Will store ISO 639-1 language code or null/undefined
country?: string | null;
language?: string | null;
messagesSent: number;
sentiment: number | null;
sentiment?: string | null;
escalated: boolean;
forwardedHr: boolean;
fullTranscriptUrl?: string | null;
@ -142,45 +142,6 @@ function normalizeCategory(categoryStr?: string): string | null {
return normalized || null;
}
/**
* Converts sentiment string values to numeric scores
* @param sentimentStr The sentiment string from the CSV
* @returns A numeric score representing the sentiment
*/
function mapSentimentToScore(sentimentStr?: string): number | null {
if (!sentimentStr) return null;
// Convert to lowercase for case-insensitive matching
const sentiment = sentimentStr.toLowerCase();
// Map sentiment strings to numeric values on a scale from -1 to 2
const sentimentMap: Record<string, number> = {
happy: 1.0,
excited: 1.5,
positive: 0.8,
neutral: 0.0,
playful: 0.7,
negative: -0.8,
angry: -1.0,
sad: -0.7,
frustrated: -0.9,
positief: 0.8, // Dutch
neutraal: 0.0, // Dutch
negatief: -0.8, // Dutch
positivo: 0.8, // Spanish/Italian
neutro: 0.0, // Spanish/Italian
negativo: -0.8, // Spanish/Italian
yes: 0.5, // For any "yes" sentiment
no: -0.5, // For any "no" sentiment
};
return sentimentMap[sentiment] !== undefined
? sentimentMap[sentiment]
: isNaN(parseFloat(sentiment))
? null
: parseFloat(sentiment);
}
/**
* Checks if a string value should be considered as boolean true
* @param value The string value to check
@ -314,7 +275,7 @@ export async function fetchAndParseCsv(
country: getCountryCode(r.country),
language: getLanguageCode(r.language),
messagesSent: Number(r.messages_sent) || 0,
sentiment: mapSentimentToScore(r.sentiment),
sentiment: r.sentiment,
escalated: isTruthyValue(r.escalated),
forwardedHr: isTruthyValue(r.forwarded_hr),
fullTranscriptUrl: r.full_transcript_url,

View File

@ -357,7 +357,7 @@ export function sessionMetrics(
let totalTokens = 0;
let totalTokensEur = 0;
const wordCounts: { [key: string]: number } = {};
let alerts = 0;
const alerts = 0;
// New metrics variables
const hourlySessionCounts: { [hour: string]: number } = {};
@ -463,22 +463,15 @@ export function sessionMetrics(
if (session.forwardedHr) forwardedHrCount++;
// Sentiment
if (session.sentiment !== undefined && session.sentiment !== null) {
// Example thresholds, adjust as needed
if (session.sentiment > 0.3) sentimentPositiveCount++;
else if (session.sentiment < -0.3) sentimentNegativeCount++;
else sentimentNeutralCount++;
if (session.sentiment === "positive") {
sentimentPositiveCount++;
} else if (session.sentiment === "neutral") {
sentimentNeutralCount++;
} else if (session.sentiment === "negative") {
sentimentNegativeCount++;
}
// Sentiment Alert Check
if (
companyConfig.sentimentAlert !== undefined &&
session.sentiment !== undefined &&
session.sentiment !== null &&
session.sentiment < companyConfig.sentimentAlert
) {
alerts++;
}
// Tokens
if (session.tokens !== undefined && session.tokens !== null) {

View File

@ -38,14 +38,14 @@ const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
interface ProcessedData {
language: string;
messages_sent: number;
sentiment: SentimentCategory;
sentiment: "positive" | "neutral" | "negative";
escalated: boolean;
forwarded_hr: boolean;
category: ValidCategory;
questions: string[];
questions: string | string[];
summary: string;
session_id: string;
tokens: number;
tokens_eur: number;
}
interface ProcessingResult {
@ -76,30 +76,31 @@ System: You are a JSON-generating assistant. Your task is to analyze raw chat tr
Here is the schema you must follow:
{
{{
"language": "ISO 639-1 code, e.g., 'en', 'nl'",
"messages_sent": "integer, number of messages from the user",
"sentiment": "'positive', 'neutral', or 'negative'",
"escalated": "bool: true if the assistant connected or referred to a human agent, otherwise false",
"forwarded_hr": "bool: true if HR contact info was given, otherwise false",
"category": "one of: 'Schedule & Hours', 'Leave & Vacation', 'Sick Leave & Recovery', 'Salary & Compensation', 'Contract & Hours', 'Onboarding', 'Offboarding', 'Workwear & Staff Pass', 'Team & Contacts', 'Personal Questions', 'Access & Login', 'Social questions', 'Unrecognized / Other'",
"questions": array of simplified questions asked by the user formulated in English, try to make a question out of messages,
"questions": "a single question or an array of simplified questions asked by the user formulated in English, try to make a question out of messages",
"summary": "Brief summary (12 sentences) of the conversation",
}
"tokens": "integer, number of tokens used for the API call",
"tokens_eur": "float, cost of the API call in EUR",
}}
You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
"JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
For example, the example "JSON Schema" instance {{"properties": {{"foo": {{"description": "a list of test words", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}}
For example, the example "JSON Schema" instance {"properties": {"foo": {"description": "a list of test words", "type": "array", "items": {"type": "string"}}}}, "required": ["foo"]}}
would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
Thus, the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of this example "JSON Schema". The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
Thus, the object {"foo": ["bar", "baz"]} is a well-formatted instance of this example "JSON Schema". The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.
Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!
Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
\`\`\`json
{"type":"object","properties":{"language":{"type":"string","pattern":"^[a-z]{2}$","description":"ISO 639-1 code for the user's primary language"},"messages_sent":{"type":"integer","minimum":0,"description":"Number of messages sent by the user"},"sentiment":{"type":"string","enum":["positive","neutral","negative"],"description":"Overall tone of the user during the conversation"},"escalated":{"type":"boolean","description":"Whether the assistant indicated it could not help"},"forwarded_hr":{"type":"boolean","description":"Whether HR contact was mentioned or provided"},"category":{"type":"string","enum":["Schedule & Hours","Leave & Vacation","Sick Leave & Recovery","Salary & Compensation","Contract & Hours","Onboarding","Offboarding","Workwear & Staff Pass","Team & Contacts","Personal Questions","Access & Login","Social questions","Unrecognized / Other"],"description":"Best-fitting topic category for the conversation"},"questions":{"type":"array","items":{"type":"string","minLength":5},"minItems":0,"maxItems":5,"description":"List of paraphrased questions asked by the user in English"},"summary":{"type":"string","minLength":10,"maxLength":300,"description":"Brief summary of the conversation"},"session_id":{"type":"string","pattern":"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$","minLength":36,"maxLength":36,"description":"Unique identifier for the conversation session"}},"required":["language","messages_sent","sentiment","escalated","forwarded_hr","category","questions","summary","session_id"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}
\`\`\`
{{"type":"object","properties":{"language":{"type":"string","pattern":"^[a-z]{2}$","description":"ISO 639-1 code for the user's primary language"},"sentiment":{"type":"string","enum":["positive","neutral","negative"],"description":"Overall tone of the user during the conversation"},"escalated":{"type":"boolean","description":"Whether the assistant indicated it could not help"},"forwarded_hr":{"type":"boolean","description":"Whether HR contact was mentioned or provided"},"category":{"type":"string","enum":["Schedule & Hours","Leave & Vacation","Sick Leave & Recovery","Salary & Compensation","Contract & Hours","Onboarding","Offboarding","Workwear & Staff Pass","Team & Contacts","Personal Questions","Access & Login","Social questions","Unrecognized / Other"],"description":"Best-fitting topic category for the conversation"},"questions":{"oneOf":[{"type":"string"},{"type":"array","items":{"type":"string"}}],"description":"A single question or a list of paraphrased questions asked by the user in English"},"summary":{"type":"string","minLength":10,"maxLength":300,"description":"Brief summary of the conversation"},"tokens":{"type":"integer","description":"Number of tokens used for the API call"},"tokens_eur":{"type":"number","description":"Cost of the API call in EUR"}},"required":["language","sentiment","escalated","forwarded_hr","category","questions","summary","tokens","tokens_eur"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}}
`;
try {
@ -151,13 +152,14 @@ function validateOpenAIResponse(data: any): void {
// Check required fields
const requiredFields = [
"language",
"messages_sent",
"sentiment",
"escalated",
"forwarded_hr",
"category",
"questions",
"summary",
"tokens",
"tokens_eur",
];
for (const field of requiredFields) {
@ -173,10 +175,6 @@ function validateOpenAIResponse(data: any): void {
);
}
if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
throw new Error("Invalid messages_sent. Expected non-negative number");
}
if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
throw new Error(
"Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
@ -197,8 +195,8 @@ function validateOpenAIResponse(data: any): void {
);
}
if (!Array.isArray(data.questions)) {
throw new Error("Invalid questions. Expected array of strings");
if (typeof data.questions !== "string" && !Array.isArray(data.questions)) {
throw new Error("Invalid questions. Expected string or array of strings");
}
if (
@ -211,9 +209,12 @@ function validateOpenAIResponse(data: any): void {
);
}
// session_id is optional in the response, we'll use the one we passed in
if (data.session_id && typeof data.session_id !== "string") {
throw new Error("Invalid session_id. Expected string");
if (typeof data.tokens !== "number" || data.tokens < 0) {
throw new Error("Invalid tokens. Expected non-negative number");
}
if (typeof data.tokens_eur !== "number" || data.tokens_eur < 0) {
throw new Error("Invalid tokens_eur. Expected non-negative number");
}
}
@ -275,7 +276,11 @@ async function processSingleSession(session: any): Promise<ProcessingResult> {
);
// Check if the processed data indicates low quality (empty questions, very short summary, etc.)
const hasValidQuestions = processedData.questions && processedData.questions.length > 0;
const hasValidQuestions =
processedData.questions &&
(Array.isArray(processedData.questions)
? processedData.questions.length > 0
: typeof processedData.questions === "string");
const hasValidSummary = processedData.summary && processedData.summary.length >= 10;
const isValidData = hasValidQuestions && hasValidSummary;
@ -284,14 +289,18 @@ async function processSingleSession(session: any): Promise<ProcessingResult> {
where: { id: session.id },
data: {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: null, // Remove numeric sentiment, use only sentimentCategory
sentimentCategory: processedData.sentiment,
sentiment: processedData.sentiment,
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,
questions: JSON.stringify(processedData.questions),
questions: processedData.questions,
summary: processedData.summary,
tokens: {
increment: processedData.tokens,
},
tokensEur: {
increment: processedData.tokens_eur,
},
processed: true,
},
});

View File

@ -36,14 +36,14 @@ const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
interface ProcessedData {
language: string;
messages_sent: number;
sentiment: SentimentCategory;
sentiment: "positive" | "neutral" | "negative";
escalated: boolean;
forwarded_hr: boolean;
category: ValidCategory;
questions: string[];
questions: string | string[];
summary: string;
session_id: string;
tokens: number;
tokens_eur: number;
}
interface ProcessingResult {
@ -76,14 +76,16 @@ Here is the schema you must follow:
{
"language": "ISO 639-1 code, e.g., 'en', 'nl'",
"messages_sent": "integer, number of messages from the user",
"sentiment": "'positive', 'neutral', or 'negative'",
"escalated": "bool: true if the assistant connected or referred to a human agent, otherwise false",
"forwarded_hr": "bool: true if HR contact info was given, otherwise false",
"category": "one of: 'Schedule & Hours', 'Leave & Vacation', 'Sick Leave & Recovery', 'Salary & Compensation', 'Contract & Hours', 'Onboarding', 'Offboarding', 'Workwear & Staff Pass', 'Team & Contacts', 'Personal Questions', 'Access & Login', 'Social questions', 'Unrecognized / Other'",
"questions": array of simplified questions asked by the user formulated in English, try to make a question out of messages,
"questions": "a single question or an array of simplified questions asked by the user formulated in English, try to make a question out of messages",
"summary": "Brief summary (12 sentences) of the conversation",
"tokens": "integer, number of tokens used for the API call",
"tokens_eur": "float, cost of the API call in EUR",
}
You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
"JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
@ -95,9 +97,9 @@ Thus, the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of this
Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!
Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
\`\`\`json
{"type":"object","properties":{"language":{"type":"string","pattern":"^[a-z]{2}$","description":"ISO 639-1 code for the user's primary language"},"messages_sent":{"type":"integer","minimum":0,"description":"Number of messages sent by the user"},"sentiment":{"type":"string","enum":["positive","neutral","negative"],"description":"Overall tone of the user during the conversation"},"escalated":{"type":"boolean","description":"Whether the assistant indicated it could not help"},"forwarded_hr":{"type":"boolean","description":"Whether HR contact was mentioned or provided"},"category":{"type":"string","enum":["Schedule & Hours","Leave & Vacation","Sick Leave & Recovery","Salary & Compensation","Contract & Hours","Onboarding","Offboarding","Workwear & Staff Pass","Team & Contacts","Personal Questions","Access & Login","Social questions","Unrecognized / Other"],"description":"Best-fitting topic category for the conversation"},"questions":{"type":"array","items":{"type":"string","minLength":5},"minItems":0,"maxItems":5,"description":"List of paraphrased questions asked by the user in English"},"summary":{"type":"string","minLength":10,"maxLength":300,"description":"Brief summary of the conversation"},"session_id":{"type":"string","pattern":"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$","minLength":36,"maxLength":36,"description":"Unique identifier for the conversation session"}},"required":["language","messages_sent","sentiment","escalated","forwarded_hr","category","questions","summary","session_id"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}
\`\`\`
{{"type":"object","properties":{"language":{"type":"string","pattern":"^[a-z]{2}$","description":"ISO 639-1 code for the user's primary language"},"sentiment":{"type":"string","enum":["positive","neutral","negative"],"description":"Overall tone of the user during the conversation"},"escalated":{"type":"boolean","description":"Whether the assistant indicated it could not help"},"forwarded_hr":{"type":"boolean","description":"Whether HR contact was mentioned or provided"},"category":{"type":"string","enum":["Schedule & Hours","Leave & Vacation","Sick Leave & Recovery","Salary & Compensation","Contract & Hours","Onboarding","Offboarding","Workwear & Staff Pass","Team & Contacts","Personal Questions","Access & Login","Social questions","Unrecognized / Other"],"description":"Best-fitting topic category for the conversation"},"questions":{"oneOf":[{"type":"string"},{"type":"array","items":{"type":"string"}}],"description":"A single question or a list of paraphrased questions asked by the user in English"},"summary":{"type":"string","minLength":10,"maxLength":300,"description":"Brief summary of the conversation"},"tokens":{"type":"integer","description":"Number of tokens used for the API call"},"tokens_eur":{"type":"number","description":"Cost of the API call in EUR"}},"required":["language","sentiment","escalated","forwarded_hr","category","questions","summary","tokens","tokens_eur"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}}
`;
try {
@ -149,13 +151,14 @@ function validateOpenAIResponse(data: any): void {
// Check required fields
const requiredFields = [
"language",
"messages_sent",
"sentiment",
"escalated",
"forwarded_hr",
"category",
"questions",
"summary",
"tokens",
"tokens_eur",
];
for (const field of requiredFields) {
@ -171,10 +174,6 @@ function validateOpenAIResponse(data: any): void {
);
}
if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
throw new Error("Invalid messages_sent. Expected non-negative number");
}
if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
throw new Error(
"Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
@ -195,8 +194,8 @@ function validateOpenAIResponse(data: any): void {
);
}
if (!Array.isArray(data.questions)) {
throw new Error("Invalid questions. Expected array of strings");
if (typeof data.questions !== "string" && !Array.isArray(data.questions)) {
throw new Error("Invalid questions. Expected string or array of strings");
}
if (
@ -209,9 +208,12 @@ function validateOpenAIResponse(data: any): void {
);
}
// session_id is optional in the response, we'll use the one we passed in
if (data.session_id && typeof data.session_id !== "string") {
throw new Error("Invalid session_id. Expected string");
if (typeof data.tokens !== "number" || data.tokens < 0) {
throw new Error("Invalid tokens. Expected non-negative number");
}
if (typeof data.tokens_eur !== "number" || data.tokens_eur < 0) {
throw new Error("Invalid tokens_eur. Expected non-negative number");
}
}
@ -273,7 +275,11 @@ async function processSingleSession(session: any): Promise<ProcessingResult> {
);
// Check if the processed data indicates low quality (empty questions, very short summary, etc.)
const hasValidQuestions = processedData.questions && processedData.questions.length > 0;
const hasValidQuestions =
processedData.questions &&
(Array.isArray(processedData.questions)
? processedData.questions.length > 0
: typeof processedData.questions === "string");
const hasValidSummary = processedData.summary && processedData.summary.length >= 10;
const isValidData = hasValidQuestions && hasValidSummary;
@ -282,14 +288,18 @@ async function processSingleSession(session: any): Promise<ProcessingResult> {
where: { id: session.id },
data: {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: null, // Remove numeric sentiment, use only sentimentCategory
sentimentCategory: processedData.sentiment,
sentiment: processedData.sentiment,
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,
questions: JSON.stringify(processedData.questions),
questions: processedData.questions,
summary: processedData.summary,
tokens: {
increment: processedData.tokens,
},
tokensEur: {
increment: processedData.tokens_eur,
},
processed: true,
},
});

View File

@ -75,8 +75,7 @@ export interface ChatSession {
language?: string | null;
country?: string | null;
ipAddress?: string | null;
sentiment?: number | null;
sentimentCategory?: string | null; // "positive", "neutral", "negative" from OpenAPI
sentiment?: string | null;
messagesSent?: number;
startTime: Date;
endTime?: Date | null;