feat: Refactor data processing pipeline with AI cost tracking and enhanced session management

- Updated environment configuration to include Postgres database settings. - Enhanced import processing to minimize field copying and rely on AI for analysis. - Implemented detailed AI processing request tracking, including token usage and costs. - Added new models for Question and SessionQuestion to manage user inquiries separately. - Improved session processing scheduler with AI cost reporting functionality. - Created a test script to validate the refactored pipeline and display processing statistics. - Updated Prisma schema and migration files to reflect new database structure and relationships.
2026-03-03 10:21:29 +01:00 · 2025-06-27 21:15:44 +02:00
parent 601e2e4026
commit 6f9ac219c2
10 changed files with 747 additions and 198 deletions
--- a/lib/importProcessor.ts
+++ b/lib/importProcessor.ts
@@ -1,5 +1,5 @@
 // SessionImport to Session processor
-import { PrismaClient, ImportStatus, SentimentCategory } from "@prisma/client";
+import { PrismaClient, ImportStatus, SentimentCategory, SessionCategory } from "@prisma/client";
 import { getSchedulerConfig } from "./env";
 import { fetchTranscriptContent, isValidTranscriptUrl } from "./transcriptFetcher";
 import cron from "node-cron";
@@ -38,8 +38,33 @@ function parseEuropeanDate(dateStr: string): Date {
  return date;
 }

+/**
+ * Helper function to parse sentiment from raw string (fallback only)
+ */
+function parseFallbackSentiment(sentimentRaw: string | null): SentimentCategory | null {
+  if (!sentimentRaw) return null;
+  
+  const sentimentStr = sentimentRaw.toLowerCase();
+  if (sentimentStr.includes('positive')) {
+    return SentimentCategory.POSITIVE;
+  } else if (sentimentStr.includes('negative')) {
+    return SentimentCategory.NEGATIVE;
+  } else {
+    return SentimentCategory.NEUTRAL;
+  }
+}
+
+/**
+ * Helper function to parse boolean from raw string (fallback only)
+ */
+function parseFallbackBoolean(rawValue: string | null): boolean | null {
+  if (!rawValue) return null;
+  return ['true', '1', 'yes', 'escalated', 'forwarded'].includes(rawValue.toLowerCase());
+}
+
 /**
 * Process a single SessionImport record into a Session record
+ * NEW STRATEGY: Only copy minimal fields, let AI processing handle the rest
 */
 async function processSingleImport(importRecord: any): Promise<{ success: boolean; error?: string }> {
  try {
@@ -49,34 +74,6 @@ async function processSingleImport(importRecord: any): Promise<{ success: boolea

    console.log(`[Import Processor] Parsed dates for ${importRecord.externalSessionId}: ${startTime.toISOString()} - ${endTime.toISOString()}`);

-    // Process sentiment
-    let sentiment: number | null = null;
-    let sentimentCategory: SentimentCategory | null = null;
-    
-    if (importRecord.sentimentRaw) {
-      const sentimentStr = importRecord.sentimentRaw.toLowerCase();
-      if (sentimentStr.includes('positive')) {
-        sentiment = 0.8;
-        sentimentCategory = SentimentCategory.POSITIVE;
-      } else if (sentimentStr.includes('negative')) {
-        sentiment = -0.8;
-        sentimentCategory = SentimentCategory.NEGATIVE;
-      } else {
-        sentiment = 0.0;
-        sentimentCategory = SentimentCategory.NEUTRAL;
-      }
-    }
-
-    // Process boolean fields
-    const escalated = importRecord.escalatedRaw ? 
-      ['true', '1', 'yes', 'escalated'].includes(importRecord.escalatedRaw.toLowerCase()) : null;
-    
-    const forwardedHr = importRecord.forwardedHrRaw ? 
-      ['true', '1', 'yes', 'forwarded'].includes(importRecord.forwardedHrRaw.toLowerCase()) : null;
-
-    // Keep country code as-is, will be processed by OpenAI later
-    const country = importRecord.countryCode;
-
    // Fetch transcript content if URL is provided and not already fetched
    let transcriptContent = importRecord.rawTranscriptContent;
    if (!transcriptContent && importRecord.fullTranscriptUrl && isValidTranscriptUrl(importRecord.fullTranscriptUrl)) {
@@ -108,7 +105,8 @@ async function processSingleImport(importRecord: any): Promise<{ success: boolea
      }
    }

-    // Create or update Session record
+    // Create or update Session record with MINIMAL processing
+    // Only copy fields that don't need AI analysis
    const session = await prisma.session.upsert({
      where: {
        importId: importRecord.id,
@@ -116,20 +114,22 @@ async function processSingleImport(importRecord: any): Promise<{ success: boolea
      update: {
        startTime,
        endTime,
+        // Direct copies (minimal processing)
        ipAddress: importRecord.ipAddress,
-        country,
-        language: importRecord.language,
-        messagesSent: importRecord.messagesSent,
-        sentiment,
-        sentimentCategory,
-        escalated,
-        forwardedHr,
+        country: importRecord.countryCode, // Keep as country code
        fullTranscriptUrl: importRecord.fullTranscriptUrl,
        avgResponseTime: importRecord.avgResponseTimeSeconds,
-        tokens: importRecord.tokens,
-        tokensEur: importRecord.tokensEur,
-        category: importRecord.category,
        initialMsg: importRecord.initialMessage,
+        
+        // AI-processed fields: Leave empty, will be filled by AI processing
+        // language: null,        // AI will detect
+        // messagesSent: null,    // AI will count from Messages
+        // sentiment: null,       // AI will analyze
+        // escalated: null,       // AI will detect
+        // forwardedHr: null,     // AI will detect
+        // category: null,        // AI will categorize
+        // summary: null,         // AI will generate
+        
        processed: false, // Will be processed later by AI
      },
      create: {
@@ -137,20 +137,15 @@ async function processSingleImport(importRecord: any): Promise<{ success: boolea
        importId: importRecord.id,
        startTime,
        endTime,
+        // Direct copies (minimal processing)
        ipAddress: importRecord.ipAddress,
-        country,
-        language: importRecord.language,
-        messagesSent: importRecord.messagesSent,
-        sentiment,
-        sentimentCategory,
-        escalated,
-        forwardedHr,
+        country: importRecord.countryCode, // Keep as country code
        fullTranscriptUrl: importRecord.fullTranscriptUrl,
        avgResponseTime: importRecord.avgResponseTimeSeconds,
-        tokens: importRecord.tokens,
-        tokensEur: importRecord.tokensEur,
-        category: importRecord.category,
        initialMsg: importRecord.initialMessage,
+        
+        // AI-processed fields: Leave empty, will be filled by AI processing
+        // All these will be null initially and filled by AI
        processed: false, // Will be processed later by AI
      },
    });