feat: Refactor sentiment handling and enhance processing logic for session data

2026-01-16 23:32:08 +01:00 · 2025-06-26 21:14:24 +02:00
parent 653d70022b
commit 8774a1f155
11 changed files with 195 additions and 280 deletions
--- a/lib/processingScheduler.ts
+++ b/lib/processingScheduler.ts
@ -38,14 +38,14 @@ const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";

 interface ProcessedData {
  language: string;
-  messages_sent: number;
-  sentiment: SentimentCategory;
+  sentiment: "positive" | "neutral" | "negative";
  escalated: boolean;
  forwarded_hr: boolean;
  category: ValidCategory;
-  questions: string[];
+  questions: string | string[];
  summary: string;
-  session_id: string;
+  tokens: number;
+  tokens_eur: number;
 }

 interface ProcessingResult {
@ -76,30 +76,31 @@ System: You are a JSON-generating assistant. Your task is to analyze raw chat tr

 Here is the schema you must follow:

-{
+{{
 "language": "ISO 639-1 code, e.g., 'en', 'nl'",
-"messages_sent": "integer, number of messages from the user",
 "sentiment": "'positive', 'neutral', or 'negative'",
 "escalated": "bool: true if the assistant connected or referred to a human agent, otherwise false",
 "forwarded_hr": "bool: true if HR contact info was given, otherwise false",
 "category": "one of: 'Schedule & Hours', 'Leave & Vacation', 'Sick Leave & Recovery', 'Salary & Compensation', 'Contract & Hours', 'Onboarding', 'Offboarding', 'Workwear & Staff Pass', 'Team & Contacts', 'Personal Questions', 'Access & Login', 'Social questions', 'Unrecognized / Other'",
-"questions": array of simplified questions asked by the user formulated in English, try to make a question out of messages,
+"questions": "a single question or an array of simplified questions asked by the user formulated in English, try to make a question out of messages",
 "summary": "Brief summary (1–2 sentences) of the conversation",
-}
+"tokens": "integer, number of tokens used for the API call",
+"tokens_eur": "float, cost of the API call in EUR",
+}}
+
 You must format your output as a JSON value that adheres to a given "JSON Schema" instance.

 "JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.

-For example, the example "JSON Schema" instance {{"properties": {{"foo": {{"description": "a list of test words", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}}
+For example, the example "JSON Schema" instance {"properties": {"foo": {"description": "a list of test words", "type": "array", "items": {"type": "string"}}}}, "required": ["foo"]}}
 would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
-Thus, the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of this example "JSON Schema". The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
+Thus, the object {"foo": ["bar", "baz"]} is a well-formatted instance of this example "JSON Schema". The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

 Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!

 Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
-\`\`\`json
-{"type":"object","properties":{"language":{"type":"string","pattern":"^[a-z]{2}$","description":"ISO 639-1 code for the user's primary language"},"messages_sent":{"type":"integer","minimum":0,"description":"Number of messages sent by the user"},"sentiment":{"type":"string","enum":["positive","neutral","negative"],"description":"Overall tone of the user during the conversation"},"escalated":{"type":"boolean","description":"Whether the assistant indicated it could not help"},"forwarded_hr":{"type":"boolean","description":"Whether HR contact was mentioned or provided"},"category":{"type":"string","enum":["Schedule & Hours","Leave & Vacation","Sick Leave & Recovery","Salary & Compensation","Contract & Hours","Onboarding","Offboarding","Workwear & Staff Pass","Team & Contacts","Personal Questions","Access & Login","Social questions","Unrecognized / Other"],"description":"Best-fitting topic category for the conversation"},"questions":{"type":"array","items":{"type":"string","minLength":5},"minItems":0,"maxItems":5,"description":"List of paraphrased questions asked by the user in English"},"summary":{"type":"string","minLength":10,"maxLength":300,"description":"Brief summary of the conversation"},"session_id":{"type":"string","pattern":"^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$","minLength":36,"maxLength":36,"description":"Unique identifier for the conversation session"}},"required":["language","messages_sent","sentiment","escalated","forwarded_hr","category","questions","summary","session_id"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}
-\`\`\`
+
+{{"type":"object","properties":{"language":{"type":"string","pattern":"^[a-z]{2}$","description":"ISO 639-1 code for the user's primary language"},"sentiment":{"type":"string","enum":["positive","neutral","negative"],"description":"Overall tone of the user during the conversation"},"escalated":{"type":"boolean","description":"Whether the assistant indicated it could not help"},"forwarded_hr":{"type":"boolean","description":"Whether HR contact was mentioned or provided"},"category":{"type":"string","enum":["Schedule & Hours","Leave & Vacation","Sick Leave & Recovery","Salary & Compensation","Contract & Hours","Onboarding","Offboarding","Workwear & Staff Pass","Team & Contacts","Personal Questions","Access & Login","Social questions","Unrecognized / Other"],"description":"Best-fitting topic category for the conversation"},"questions":{"oneOf":[{"type":"string"},{"type":"array","items":{"type":"string"}}],"description":"A single question or a list of paraphrased questions asked by the user in English"},"summary":{"type":"string","minLength":10,"maxLength":300,"description":"Brief summary of the conversation"},"tokens":{"type":"integer","description":"Number of tokens used for the API call"},"tokens_eur":{"type":"number","description":"Cost of the API call in EUR"}},"required":["language","sentiment","escalated","forwarded_hr","category","questions","summary","tokens","tokens_eur"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}}
  `;

  try {
@ -151,13 +152,14 @@ function validateOpenAIResponse(data: any): void {
  // Check required fields
  const requiredFields = [
    "language",
-    "messages_sent",
    "sentiment",
    "escalated",
    "forwarded_hr",
    "category",
    "questions",
    "summary",
+    "tokens",
+    "tokens_eur",
  ];

  for (const field of requiredFields) {
@ -173,10 +175,6 @@ function validateOpenAIResponse(data: any): void {
    );
  }

-  if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
-    throw new Error("Invalid messages_sent. Expected non-negative number");
-  }
-
  if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
    throw new Error(
      "Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
@ -197,8 +195,8 @@ function validateOpenAIResponse(data: any): void {
    );
  }

-  if (!Array.isArray(data.questions)) {
-    throw new Error("Invalid questions. Expected array of strings");
+  if (typeof data.questions !== "string" && !Array.isArray(data.questions)) {
+    throw new Error("Invalid questions. Expected string or array of strings");
  }

  if (
@ -211,9 +209,12 @@ function validateOpenAIResponse(data: any): void {
    );
  }

-  // session_id is optional in the response, we'll use the one we passed in
-  if (data.session_id && typeof data.session_id !== "string") {
-    throw new Error("Invalid session_id. Expected string");
+  if (typeof data.tokens !== "number" || data.tokens < 0) {
+    throw new Error("Invalid tokens. Expected non-negative number");
+  }
+
+  if (typeof data.tokens_eur !== "number" || data.tokens_eur < 0) {
+    throw new Error("Invalid tokens_eur. Expected non-negative number");
  }
 }

@ -275,7 +276,11 @@ async function processSingleSession(session: any): Promise<ProcessingResult> {
    );

    // Check if the processed data indicates low quality (empty questions, very short summary, etc.)
-    const hasValidQuestions = processedData.questions && processedData.questions.length > 0;
+    const hasValidQuestions =
+      processedData.questions &&
+      (Array.isArray(processedData.questions)
+        ? processedData.questions.length > 0
+        : typeof processedData.questions === "string");
    const hasValidSummary = processedData.summary && processedData.summary.length >= 10;
    const isValidData = hasValidQuestions && hasValidSummary;

@ -284,14 +289,18 @@ async function processSingleSession(session: any): Promise<ProcessingResult> {
      where: { id: session.id },
      data: {
        language: processedData.language,
-        messagesSent: processedData.messages_sent,
-        sentiment: null, // Remove numeric sentiment, use only sentimentCategory
-        sentimentCategory: processedData.sentiment,
+        sentiment: processedData.sentiment,
        escalated: processedData.escalated,
        forwardedHr: processedData.forwarded_hr,
        category: processedData.category,
-        questions: JSON.stringify(processedData.questions),
+        questions: processedData.questions,
        summary: processedData.summary,
+        tokens: {
+          increment: processedData.tokens,
+        },
+        tokensEur: {
+          increment: processedData.tokens_eur,
+        },
        processed: true,
      },
    });