feat: Enhance session processing and metrics

- Updated session processing commands in documentation for clarity. - Removed transcript content fetching from session processing, allowing on-demand retrieval. - Improved session metrics calculations and added new metrics for dashboard. - Refactored processing scheduler to handle sessions in parallel with concurrency limits. - Added manual trigger API for processing unprocessed sessions with admin checks. - Implemented scripts for fetching and parsing transcripts, checking transcript content, and testing processing status. - Updated Prisma schema to enforce default values for processed sessions. - Added error handling and logging improvements throughout the processing workflow.
2026-03-03 00:21:30 +01:00 · 2025-06-26 17:12:42 +02:00
parent 8f3c1e0f7c
commit 8c43a35632
20 changed files with 851 additions and 229 deletions
--- a/lib/csvFetcher.js
+++ b/lib/csvFetcher.js
@@ -561,15 +561,8 @@ export async function fetchAndStoreSessionsForAllCompanies() {
              ? session.endTime
              : new Date();

-          // Fetch transcript content if URL is available
-          let transcriptContent = null;
-          if (session.fullTranscriptUrl) {
-            transcriptContent = await fetchTranscriptContent(
-              session.fullTranscriptUrl,
-              company.csvUsername,
-              company.csvPassword
-            );
-          }
+          // Note: transcriptContent field was removed from schema
+          // Transcript content can be fetched on-demand from fullTranscriptUrl

          // Check if the session already exists
          const existingSession = await prisma.session.findUnique({
@@ -608,7 +601,6 @@ export async function fetchAndStoreSessionsForAllCompanies() {
                  ? session.forwardedHr
                  : null,
              fullTranscriptUrl: session.fullTranscriptUrl || null,
-              transcriptContent: transcriptContent, // Add the transcript content
              avgResponseTime:
                typeof session.avgResponseTime === "number"
                  ? session.avgResponseTime
--- a/lib/metrics.ts
+++ b/lib/metrics.ts
@@ -349,7 +349,7 @@ export function sessionMetrics(
  let totalTokensEur = 0;
  const wordCounts: { [key: string]: number } = {};
  let alerts = 0;
-  
+
    // New metrics variables
    const hourlySessionCounts: { [hour: string]: number } = {};
    let resolvedChatsCount = 0;
@@ -530,7 +530,7 @@ export function sessionMetrics(
          .forEach(msg => {
            const content = msg.content.trim();
            // Simple heuristic: if message ends with ? or contains question words, treat as question
-            if (content.endsWith('?') || 
+            if (content.endsWith('?') ||
                /\b(what|when|where|why|how|who|which|can|could|would|will|is|are|do|does|did)\b/i.test(content)) {
              questionCounts[content] = (questionCounts[content] || 0) + 1;
            }
@@ -540,7 +540,7 @@ export function sessionMetrics(
      // 3. Extract questions from initial message as fallback
      if (session.initialMsg) {
        const content = session.initialMsg.trim();
-        if (content.endsWith('?') || 
+        if (content.endsWith('?') ||
            /\b(what|when|where|why|how|who|which|can|could|would|will|is|are|do|does|did)\b/i.test(content)) {
          questionCounts[content] = (questionCounts[content] || 0) + 1;
        }
@@ -611,10 +611,10 @@ export function sessionMetrics(
  );

  // Calculate new metrics
-  
+
  // 1. Average Daily Costs (euros)
  const avgDailyCosts = numDaysWithSessions > 0 ? totalTokensEur / numDaysWithSessions : 0;
-  
+
  // 2. Peak Usage Time
  let peakUsageTime = "N/A";
  if (Object.keys(hourlySessionCounts).length > 0) {
@@ -624,7 +624,7 @@ export function sessionMetrics(
    const endHour = (peakHourNum + 1) % 24;
    peakUsageTime = `${peakHour}-${endHour.toString().padStart(2, '0')}:00`;
  }
-  
+
  // 3. Resolved Chats Percentage
  const resolvedChatsPercentage = totalSessions > 0 ? (resolvedChatsCount / totalSessions) * 100 : 0;

@@ -672,7 +672,7 @@ export function sessionMetrics(
    lastUpdated: Date.now(),
    totalSessionDuration,
    validSessionsForDuration,
-    
+
    // New metrics
    avgDailyCosts,
    peakUsageTime,
--- a/lib/processingScheduler.js
+++ b/lib/processingScheduler.js
@@ -14,7 +14,7 @@ const envPath = join(__dirname, '..', '.env.local');
 try {
  const envFile = readFileSync(envPath, 'utf8');
  const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
-  
+
  envVars.forEach(line => {
    const [key, ...valueParts] = line.split('=');
    if (key && valueParts.length > 0) {
@@ -216,24 +216,130 @@ function validateOpenAIResponse(data) {
 }

 /**
- * Process unprocessed sessions
+ * Process a single session
+ * @param {Object} session The session to process
+ * @returns {Promise<Object>} Result object with success/error info
 */
-export async function processUnprocessedSessions() {
+async function processSingleSession(session) {
+  if (session.messages.length === 0) {
+    return {
+      sessionId: session.id,
+      success: false,
+      error: "Session has no messages",
+    };
+  }
+
+  try {
+    // Convert messages back to transcript format for OpenAI processing
+    const transcript = session.messages
+      .map(
+        (msg) =>
+          `[${new Date(msg.timestamp)
+            .toLocaleString("en-GB", {
+              day: "2-digit",
+              month: "2-digit",
+              year: "numeric",
+              hour: "2-digit",
+              minute: "2-digit",
+              second: "2-digit",
+            })
+            .replace(",", "")}] ${msg.role}: ${msg.content}`
+      )
+      .join("\n");
+
+    const processedData = await processTranscriptWithOpenAI(
+      session.id,
+      transcript
+    );
+
+    // Map sentiment string to float value for compatibility with existing data
+    const sentimentMap = {
+      positive: 0.8,
+      neutral: 0.0,
+      negative: -0.8,
+    };
+
+    // Update the session with processed data
+    await prisma.session.update({
+      where: { id: session.id },
+      data: {
+        language: processedData.language,
+        messagesSent: processedData.messages_sent,
+        sentiment: sentimentMap[processedData.sentiment] || 0,
+        sentimentCategory: processedData.sentiment,
+        escalated: processedData.escalated,
+        forwardedHr: processedData.forwarded_hr,
+        category: processedData.category,
+        questions: JSON.stringify(processedData.questions),
+        summary: processedData.summary,
+        processed: true,
+      },
+    });
+
+    return {
+      sessionId: session.id,
+      success: true,
+    };
+  } catch (error) {
+    return {
+      sessionId: session.id,
+      success: false,
+      error: error.message,
+    };
+  }
+}
+
+/**
+ * Process sessions in parallel with concurrency limit
+ * @param {Array} sessions Array of sessions to process
+ * @param {number} maxConcurrency Maximum number of concurrent processing tasks
+ * @returns {Promise<Object>} Processing results
+ */
+async function processSessionsInParallel(sessions, maxConcurrency = 5) {
+  const results = [];
+  const executing = [];
+
+  for (const session of sessions) {
+    const promise = processSingleSession(session).then((result) => {
+      process.stdout.write(
+        result.success
+          ? `[ProcessingScheduler] ✓ Successfully processed session ${result.sessionId}\n`
+          : `[ProcessingScheduler] ✗ Failed to process session ${result.sessionId}: ${result.error}\n`
+      );
+      return result;
+    });
+
+    results.push(promise);
+    executing.push(promise);
+
+    if (executing.length >= maxConcurrency) {
+      await Promise.race(executing);
+      executing.splice(
+        executing.findIndex((p) => p === promise),
+        1
+      );
+    }
+  }
+
+  return Promise.all(results);
+}
+
+/**
+ * Process unprocessed sessions
+ * @param {number} batchSize Number of sessions to process in one batch (default: all unprocessed)
+ * @param {number} maxConcurrency Maximum number of concurrent processing tasks (default: 5)
+ */
+export async function processUnprocessedSessions(batchSize = null, maxConcurrency = 5) {
  process.stdout.write(
    "[ProcessingScheduler] Starting to process unprocessed sessions...\n"
  );

  // Find sessions that have messages but haven't been processed
-  const sessionsToProcess = await prisma.session.findMany({
+  const queryOptions = {
    where: {
      AND: [
        { messages: { some: {} } }, // Must have messages
-        { 
-          OR: [
-            { processed: false },
-            { processed: null }
-          ]
-        }
+        { processed: false }, // Only unprocessed sessions (no longer checking for null)
      ],
    },
    include: {
@@ -241,8 +347,14 @@ export async function processUnprocessedSessions() {
        orderBy: { order: "asc" },
      },
    },
-    take: 10, // Process in batches to avoid overloading the system
-  });
+  };
+
+  // Add batch size limit if specified
+  if (batchSize && batchSize > 0) {
+    queryOptions.take = batchSize;
+  }
+
+  const sessionsToProcess = await prisma.session.findMany(queryOptions);

  // Filter to only sessions that have messages
  const sessionsWithMessages = sessionsToProcess.filter(
@@ -257,80 +369,15 @@ export async function processUnprocessedSessions() {
  }

  process.stdout.write(
-    `[ProcessingScheduler] Found ${sessionsWithMessages.length} sessions to process.\n`
+    `[ProcessingScheduler] Found ${sessionsWithMessages.length} sessions to process (max concurrency: ${maxConcurrency}).\n`
  );
-  let successCount = 0;
-  let errorCount = 0;

-  for (const session of sessionsWithMessages) {
-    if (session.messages.length === 0) {
-      process.stderr.write(
-        `[ProcessingScheduler] Session ${session.id} has no messages, skipping.\n`
-      );
-      continue;
-    }
+  const startTime = Date.now();
+  const results = await processSessionsInParallel(sessionsWithMessages, maxConcurrency);
+  const endTime = Date.now();

-    process.stdout.write(
-      `[ProcessingScheduler] Processing messages for session ${session.id}...\n`
-    );
-    try {
-      // Convert messages back to transcript format for OpenAI processing
-      const transcript = session.messages
-        .map(
-          (msg) =>
-            `[${new Date(msg.timestamp)
-              .toLocaleString("en-GB", {
-                day: "2-digit",
-                month: "2-digit",
-                year: "numeric",
-                hour: "2-digit",
-                minute: "2-digit",
-                second: "2-digit",
-              })
-              .replace(",", "")}] ${msg.role}: ${msg.content}`
-        )
-        .join("\n");
-
-      const processedData = await processTranscriptWithOpenAI(
-        session.id,
-        transcript
-      );
-
-      // Map sentiment string to float value for compatibility with existing data
-      const sentimentMap = {
-        positive: 0.8,
-        neutral: 0.0,
-        negative: -0.8,
-      };
-
-      // Update the session with processed data
-      await prisma.session.update({
-        where: { id: session.id },
-        data: {
-          language: processedData.language,
-          messagesSent: processedData.messages_sent,
-          sentiment: sentimentMap[processedData.sentiment] || 0,
-          sentimentCategory: processedData.sentiment,
-          escalated: processedData.escalated,
-          forwardedHr: processedData.forwarded_hr,
-          category: processedData.category,
-          questions: JSON.stringify(processedData.questions),
-          summary: processedData.summary,
-          processed: true,
-        },
-      });
-
-      process.stdout.write(
-        `[ProcessingScheduler] Successfully processed session ${session.id}.\n`
-      );
-      successCount++;
-    } catch (error) {
-      process.stderr.write(
-        `[ProcessingScheduler] Error processing session ${session.id}: ${error}\n`
-      );
-      errorCount++;
-    }
-  }
+  const successCount = results.filter((r) => r.success).length;
+  const errorCount = results.filter((r) => !r.success).length;

  process.stdout.write("[ProcessingScheduler] Session processing complete.\n");
  process.stdout.write(
@@ -339,6 +386,9 @@ export async function processUnprocessedSessions() {
  process.stdout.write(
    `[ProcessingScheduler] Failed to process: ${errorCount} sessions.\n`
  );
+  process.stdout.write(
+    `[ProcessingScheduler] Total processing time: ${((endTime - startTime) / 1000).toFixed(2)}s\n`
+  );
 }

 /**
--- a/lib/processingScheduler.ts
+++ b/lib/processingScheduler.ts
@@ -1,14 +1,38 @@
-// node-cron job to process unprocessed sessions every hour
+// Session processing scheduler - TypeScript version
 import cron from "node-cron";
 import { PrismaClient } from "@prisma/client";
 import fetch from "node-fetch";
+import { readFileSync } from "fs";
+import { fileURLToPath } from "url";
+import { dirname, join } from "path";
+
+// Load environment variables from .env.local
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+const envPath = join(__dirname, '..', '.env.local');
+
+try {
+  const envFile = readFileSync(envPath, 'utf8');
+  const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
+
+  envVars.forEach(line => {
+    const [key, ...valueParts] = line.split('=');
+    if (key && valueParts.length > 0) {
+      const value = valueParts.join('=').trim();
+      if (!process.env[key.trim()]) {
+        process.env[key.trim()] = value;
+      }
+    }
+  });
+} catch (error) {
+  // Silently fail if .env.local doesn't exist
+}

 const prisma = new PrismaClient();
 const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
 const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";

-// Define the expected response structure from OpenAI
-interface OpenAIProcessedData {
+interface ProcessedData {
  language: string;
  messages_sent: number;
  sentiment: "positive" | "neutral" | "negative";
@@ -20,16 +44,16 @@ interface OpenAIProcessedData {
  session_id: string;
 }

+interface ProcessingResult {
+  sessionId: string;
+  success: boolean;
+  error?: string;
+}
+
 /**
 * Processes a session transcript using OpenAI API
- * @param sessionId The session ID
- * @param transcript The transcript content to process
- * @returns Processed data from OpenAI
 */
-async function processTranscriptWithOpenAI(
-  sessionId: string,
-  transcript: string
-): Promise<OpenAIProcessedData> {
+async function processTranscriptWithOpenAI(sessionId: string, transcript: string): Promise<ProcessedData> {
  if (!OPENAI_API_KEY) {
    throw new Error("OPENAI_API_KEY environment variable is not set");
  }
@@ -103,7 +127,7 @@ async function processTranscriptWithOpenAI(
      throw new Error(`OpenAI API error: ${response.status} - ${errorText}`);
    }

-    const data = (await response.json()) as any;
+    const data: any = await response.json();
    const processedData = JSON.parse(data.choices[0].message.content);

    // Validate the response against our expected schema
@@ -118,11 +142,8 @@ async function processTranscriptWithOpenAI(

 /**
 * Validates the OpenAI response against our expected schema
- * @param data The data to validate
 */
-function validateOpenAIResponse(
-  data: any
-): asserts data is OpenAIProcessedData {
+function validateOpenAIResponse(data: any): void {
  // Check required fields
  const requiredFields = [
    "language",
@@ -208,31 +229,146 @@ function validateOpenAIResponse(
  }
 }

+/**
+ * Process a single session
+ */
+async function processSingleSession(session: any): Promise<ProcessingResult> {
+  if (session.messages.length === 0) {
+    return {
+      sessionId: session.id,
+      success: false,
+      error: "Session has no messages",
+    };
+  }
+
+  try {
+    // Convert messages back to transcript format for OpenAI processing
+    const transcript = session.messages
+      .map(
+        (msg: any) =>
+          `[${new Date(msg.timestamp)
+            .toLocaleString("en-GB", {
+              day: "2-digit",
+              month: "2-digit",
+              year: "numeric",
+              hour: "2-digit",
+              minute: "2-digit",
+              second: "2-digit",
+            })
+            .replace(",", "")}] ${msg.role}: ${msg.content}`
+      )
+      .join("\n");
+
+    const processedData = await processTranscriptWithOpenAI(
+      session.id,
+      transcript
+    );
+
+    // Map sentiment string to float value for compatibility with existing data
+    const sentimentMap = {
+      positive: 0.8,
+      neutral: 0.0,
+      negative: -0.8,
+    };
+
+    // Update the session with processed data
+    await prisma.session.update({
+      where: { id: session.id },
+      data: {
+        language: processedData.language,
+        messagesSent: processedData.messages_sent,
+        sentiment: sentimentMap[processedData.sentiment] || 0,
+        sentimentCategory: processedData.sentiment,
+        escalated: processedData.escalated,
+        forwardedHr: processedData.forwarded_hr,
+        category: processedData.category,
+        questions: JSON.stringify(processedData.questions),
+        summary: processedData.summary,
+        processed: true,
+      },
+    });
+
+    return {
+      sessionId: session.id,
+      success: true,
+    };
+  } catch (error) {
+    return {
+      sessionId: session.id,
+      success: false,
+      error: error instanceof Error ? error.message : String(error),
+    };
+  }
+}
+
+/**
+ * Process sessions in parallel with concurrency limit
+ */
+async function processSessionsInParallel(sessions: any[], maxConcurrency: number = 5): Promise<ProcessingResult[]> {
+  const results: Promise<ProcessingResult>[] = [];
+  const executing: Promise<ProcessingResult>[] = [];
+
+  for (const session of sessions) {
+    const promise = processSingleSession(session).then((result) => {
+      process.stdout.write(
+        result.success
+          ? `[ProcessingScheduler] ✓ Successfully processed session ${result.sessionId}\n`
+          : `[ProcessingScheduler] ✗ Failed to process session ${result.sessionId}: ${result.error}\n`
+      );
+      return result;
+    });
+
+    results.push(promise);
+    executing.push(promise);
+
+    if (executing.length >= maxConcurrency) {
+      await Promise.race(executing);
+      const completedIndex = executing.findIndex(p => p === promise);
+      if (completedIndex !== -1) {
+        executing.splice(completedIndex, 1);
+      }
+    }
+  }
+
+  return Promise.all(results);
+}
+
 /**
 * Process unprocessed sessions
 */
-async function processUnprocessedSessions() {
+export async function processUnprocessedSessions(batchSize: number | null = null, maxConcurrency: number = 5): Promise<void> {
  process.stdout.write(
    "[ProcessingScheduler] Starting to process unprocessed sessions...\n"
  );

-  // Find sessions that have transcript content but haven't been processed
-  const sessionsToProcess = await prisma.session.findMany({
+  // Find sessions that have messages but haven't been processed
+  const queryOptions: any = {
    where: {
      AND: [
-        { transcriptContent: { not: null } },
-        { transcriptContent: { not: "" } },
-        { processed: { not: true } }, // Either false or null
+        { messages: { some: {} } }, // Must have messages
+        { processed: false }, // Only unprocessed sessions
      ],
    },
-    select: {
-      id: true,
-      transcriptContent: true,
+    include: {
+      messages: {
+        orderBy: { order: "asc" },
+      },
    },
-    take: 10, // Process in batches to avoid overloading the system
-  });
+  };

-  if (sessionsToProcess.length === 0) {
+  // Add batch size limit if specified
+  if (batchSize && batchSize > 0) {
+    queryOptions.take = batchSize;
+  }
+
+  const sessionsToProcess = await prisma.session.findMany(queryOptions);
+
+  // Filter to only sessions that have messages
+  const sessionsWithMessages = sessionsToProcess.filter(
+    (session: any) => session.messages && session.messages.length > 0
+  );
+
+  if (sessionsWithMessages.length === 0) {
    process.stdout.write(
      "[ProcessingScheduler] No sessions found requiring processing.\n"
    );
@@ -240,64 +376,15 @@ async function processUnprocessedSessions() {
  }

  process.stdout.write(
-    `[ProcessingScheduler] Found ${sessionsToProcess.length} sessions to process.\n`
+    `[ProcessingScheduler] Found ${sessionsWithMessages.length} sessions to process (max concurrency: ${maxConcurrency}).\n`
  );
-  let successCount = 0;
-  let errorCount = 0;

-  for (const session of sessionsToProcess) {
-    if (!session.transcriptContent) {
-      // Should not happen due to query, but good for type safety
-      process.stderr.write(
-        `[ProcessingScheduler] Session ${session.id} has no transcript content, skipping.\n`
-      );
-      continue;
-    }
+  const startTime = Date.now();
+  const results = await processSessionsInParallel(sessionsWithMessages, maxConcurrency);
+  const endTime = Date.now();

-    process.stdout.write(
-      `[ProcessingScheduler] Processing transcript for session ${session.id}...\n`
-    );
-    try {
-      const processedData = await processTranscriptWithOpenAI(
-        session.id,
-        session.transcriptContent
-      );
-
-      // Map sentiment string to float value for compatibility with existing data
-      const sentimentMap: Record<string, number> = {
-        positive: 0.8,
-        neutral: 0.0,
-        negative: -0.8,
-      };
-
-      // Update the session with processed data
-      await prisma.session.update({
-        where: { id: session.id },
-        data: {
-          language: processedData.language,
-          messagesSent: processedData.messages_sent,
-          sentiment: sentimentMap[processedData.sentiment] || 0,
-          sentimentCategory: processedData.sentiment,
-          escalated: processedData.escalated,
-          forwardedHr: processedData.forwarded_hr,
-          category: processedData.category,
-          questions: JSON.stringify(processedData.questions),
-          summary: processedData.summary,
-          processed: true,
-        },
-      });
-
-      process.stdout.write(
-        `[ProcessingScheduler] Successfully processed session ${session.id}.\n`
-      );
-      successCount++;
-    } catch (error) {
-      process.stderr.write(
-        `[ProcessingScheduler] Error processing session ${session.id}: ${error}\n`
-      );
-      errorCount++;
-    }
-  }
+  const successCount = results.filter((r) => r.success).length;
+  const errorCount = results.filter((r) => !r.success).length;

  process.stdout.write("[ProcessingScheduler] Session processing complete.\n");
  process.stdout.write(
@@ -306,12 +393,15 @@ async function processUnprocessedSessions() {
  process.stdout.write(
    `[ProcessingScheduler] Failed to process: ${errorCount} sessions.\n`
  );
+  process.stdout.write(
+    `[ProcessingScheduler] Total processing time: ${((endTime - startTime) / 1000).toFixed(2)}s\n`
+  );
 }

 /**
 * Start the processing scheduler
 */
-export function startProcessingScheduler() {
+export function startProcessingScheduler(): void {
  // Process unprocessed sessions every hour
  cron.schedule("0 * * * *", async () => {
    try {
--- a/lib/types.ts
+++ b/lib/types.ts
@@ -157,7 +157,7 @@ export interface MetricsResult {
  usersTrend?: number; // e.g., percentage change in uniqueUsers
  avgSessionTimeTrend?: number; // e.g., percentage change in avgSessionLength
  avgResponseTimeTrend?: number; // e.g., percentage change in avgResponseTime
-  
+
  // New metrics for enhanced dashboard
  avgDailyCosts?: number; // Average daily costs in euros
  peakUsageTime?: string; // Peak usage time (e.g., "14:00-15:00")