DB refactor

2026-03-03 03:21:29 +01:00 · 2025-06-27 23:05:46 +02:00
parent 185bb6da58
commit 2dfc49f840
20 changed files with 1607 additions and 339 deletions
--- a/lib/importProcessor.ts
+++ b/lib/importProcessor.ts
@@ -1,7 +1,8 @@
 // SessionImport to Session processor
-import { PrismaClient, ImportStatus, SentimentCategory, SessionCategory } from "@prisma/client";
+import { PrismaClient, SentimentCategory, SessionCategory, ProcessingStage } from "@prisma/client";
 import { getSchedulerConfig } from "./env";
 import { fetchTranscriptContent, isValidTranscriptUrl } from "./transcriptFetcher";
+import { ProcessingStatusManager } from "./processingStatusManager";
 import cron from "node-cron";

 const prisma = new PrismaClient();
@@ -62,21 +63,130 @@ function parseFallbackBoolean(rawValue: string | null): boolean | null {
  return ['true', '1', 'yes', 'escalated', 'forwarded'].includes(rawValue.toLowerCase());
 }

+/**
+ * Parse transcript content into Message records
+ */
+async function parseTranscriptIntoMessages(sessionId: string, transcriptContent: string): Promise<void> {
+  // Clear existing messages for this session
+  await prisma.message.deleteMany({
+    where: { sessionId }
+  });
+
+  // Split transcript into lines and parse each message
+  const lines = transcriptContent.split('\n').filter(line => line.trim());
+  let order = 0;
+
+  for (const line of lines) {
+    const trimmedLine = line.trim();
+    if (!trimmedLine) continue;
+
+    // Try to parse different formats:
+    // Format 1: "User: message" or "Assistant: message"
+    // Format 2: "[timestamp] User: message" or "[timestamp] Assistant: message"
+    
+    let role = 'unknown';
+    let content = trimmedLine;
+    let timestamp: Date | null = null;
+
+    // Check for timestamp format: [DD.MM.YYYY HH:mm:ss] Role: content
+    const timestampMatch = trimmedLine.match(/^\[([^\]]+)\]\s*(.+)$/);
+    if (timestampMatch) {
+      try {
+        timestamp = parseEuropeanDate(timestampMatch[1]);
+        content = timestampMatch[2];
+      } catch (error) {
+        // If timestamp parsing fails, treat the whole line as content
+        content = trimmedLine;
+      }
+    }
+
+    // Extract role and message content
+    const roleMatch = content.match(/^(User|Assistant|System):\s*(.*)$/i);
+    if (roleMatch) {
+      role = roleMatch[1].toLowerCase();
+      content = roleMatch[2].trim();
+    } else {
+      // If no role prefix found, try to infer from context or use 'unknown'
+      role = 'unknown';
+    }
+
+    // Skip empty content
+    if (!content) continue;
+
+    // Create message record
+    await prisma.message.create({
+      data: {
+        sessionId,
+        timestamp,
+        role,
+        content,
+        order,
+      },
+    });
+
+    order++;
+  }
+
+  console.log(`[Import Processor] ✓ Parsed ${order} messages for session ${sessionId}`);
+}
+
 /**
 * Process a single SessionImport record into a Session record
- * NEW STRATEGY: Only copy minimal fields, let AI processing handle the rest
+ * Uses new unified processing status tracking
 */
 async function processSingleImport(importRecord: any): Promise<{ success: boolean; error?: string }> {
+  let sessionId: string | null = null;
+  
  try {
    // Parse dates using European format parser
    const startTime = parseEuropeanDate(importRecord.startTimeRaw);
    const endTime = parseEuropeanDate(importRecord.endTimeRaw);

-    console.log(`[Import Processor] Parsed dates for ${importRecord.externalSessionId}: ${startTime.toISOString()} - ${endTime.toISOString()}`);
+    console.log(`[Import Processor] Processing ${importRecord.externalSessionId}: ${startTime.toISOString()} - ${endTime.toISOString()}`);

-    // Fetch transcript content if URL is provided and not already fetched
+    // Create or update Session record with MINIMAL processing
+    const session = await prisma.session.upsert({
+      where: {
+        importId: importRecord.id,
+      },
+      update: {
+        startTime,
+        endTime,
+        // Direct copies (minimal processing)
+        ipAddress: importRecord.ipAddress,
+        country: importRecord.countryCode, // Keep as country code
+        fullTranscriptUrl: importRecord.fullTranscriptUrl,
+        avgResponseTime: importRecord.avgResponseTimeSeconds,
+        initialMsg: importRecord.initialMessage,
+      },
+      create: {
+        companyId: importRecord.companyId,
+        importId: importRecord.id,
+        startTime,
+        endTime,
+        // Direct copies (minimal processing)
+        ipAddress: importRecord.ipAddress,
+        country: importRecord.countryCode, // Keep as country code
+        fullTranscriptUrl: importRecord.fullTranscriptUrl,
+        avgResponseTime: importRecord.avgResponseTimeSeconds,
+        initialMsg: importRecord.initialMessage,
+      },
+    });
+
+    sessionId = session.id;
+
+    // Initialize processing status for this session
+    await ProcessingStatusManager.initializeSession(sessionId);
+
+    // Mark CSV_IMPORT as completed
+    await ProcessingStatusManager.completeStage(sessionId, ProcessingStage.CSV_IMPORT);
+
+    // Handle transcript fetching
    let transcriptContent = importRecord.rawTranscriptContent;
+    
    if (!transcriptContent && importRecord.fullTranscriptUrl && isValidTranscriptUrl(importRecord.fullTranscriptUrl)) {
+      await ProcessingStatusManager.startStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH);
+      
      console.log(`[Import Processor] Fetching transcript for ${importRecord.externalSessionId}...`);
      
      // Get company credentials for transcript fetching
@@ -100,125 +210,123 @@ async function processSingleImport(importRecord: any): Promise<{ success: boolea
          where: { id: importRecord.id },
          data: { rawTranscriptContent: transcriptContent },
        });
+
+        await ProcessingStatusManager.completeStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, {
+          contentLength: transcriptContent?.length || 0,
+          url: importRecord.fullTranscriptUrl
+        });
      } else {
        console.log(`[Import Processor] ⚠️ Failed to fetch transcript for ${importRecord.externalSessionId}: ${transcriptResult.error}`);
+        await ProcessingStatusManager.failStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, transcriptResult.error || 'Unknown error');
      }
+    } else if (!importRecord.fullTranscriptUrl) {
+      // No transcript URL available - skip this stage
+      await ProcessingStatusManager.skipStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, 'No transcript URL provided');
+    } else {
+      // Transcript already fetched
+      await ProcessingStatusManager.completeStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, {
+        contentLength: transcriptContent?.length || 0,
+        source: 'already_fetched'
+      });
    }

-    // Create or update Session record with MINIMAL processing
-    // Only copy fields that don't need AI analysis
-    const session = await prisma.session.upsert({
-      where: {
-        importId: importRecord.id,
-      },
-      update: {
-        startTime,
-        endTime,
-        // Direct copies (minimal processing)
-        ipAddress: importRecord.ipAddress,
-        country: importRecord.countryCode, // Keep as country code
-        fullTranscriptUrl: importRecord.fullTranscriptUrl,
-        avgResponseTime: importRecord.avgResponseTimeSeconds,
-        initialMsg: importRecord.initialMessage,
-        
-        // AI-processed fields: Leave empty, will be filled by AI processing
-        // language: null,        // AI will detect
-        // messagesSent: null,    // AI will count from Messages
-        // sentiment: null,       // AI will analyze
-        // escalated: null,       // AI will detect
-        // forwardedHr: null,     // AI will detect
-        // category: null,        // AI will categorize
-        // summary: null,         // AI will generate
-        
-        processed: false, // Will be processed later by AI
-      },
-      create: {
-        companyId: importRecord.companyId,
-        importId: importRecord.id,
-        startTime,
-        endTime,
-        // Direct copies (minimal processing)
-        ipAddress: importRecord.ipAddress,
-        country: importRecord.countryCode, // Keep as country code
-        fullTranscriptUrl: importRecord.fullTranscriptUrl,
-        avgResponseTime: importRecord.avgResponseTimeSeconds,
-        initialMsg: importRecord.initialMessage,
-        
-        // AI-processed fields: Leave empty, will be filled by AI processing
-        // All these will be null initially and filled by AI
-        processed: false, // Will be processed later by AI
-      },
-    });
+    // Handle session creation (parse messages)
+    await ProcessingStatusManager.startStage(sessionId, ProcessingStage.SESSION_CREATION);
+    
+    if (transcriptContent) {
+      await parseTranscriptIntoMessages(sessionId, transcriptContent);
+    }

-    // Update import status to DONE
-    await prisma.sessionImport.update({
-      where: { id: importRecord.id },
-      data: {
-        status: ImportStatus.DONE,
-        processedAt: new Date(),
-        errorMsg: null,
-      },
+    await ProcessingStatusManager.completeStage(sessionId, ProcessingStage.SESSION_CREATION, {
+      hasTranscript: !!transcriptContent,
+      transcriptLength: transcriptContent?.length || 0
    });

    return { success: true };
  } catch (error) {
-    // Update import status to ERROR
-    await prisma.sessionImport.update({
-      where: { id: importRecord.id },
-      data: {
-        status: ImportStatus.ERROR,
-        errorMsg: error instanceof Error ? error.message : String(error),
-      },
-    });
+    const errorMessage = error instanceof Error ? error.message : String(error);
+    
+    // Mark the current stage as failed if we have a sessionId
+    if (sessionId) {
+      // Determine which stage failed based on the error
+      if (errorMessage.includes('transcript') || errorMessage.includes('fetch')) {
+        await ProcessingStatusManager.failStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, errorMessage);
+      } else if (errorMessage.includes('message') || errorMessage.includes('parse')) {
+        await ProcessingStatusManager.failStage(sessionId, ProcessingStage.SESSION_CREATION, errorMessage);
+      } else {
+        // General failure - mark CSV_IMPORT as failed
+        await ProcessingStatusManager.failStage(sessionId, ProcessingStage.CSV_IMPORT, errorMessage);
+      }
+    }

    return {
      success: false,
-      error: error instanceof Error ? error.message : String(error),
+      error: errorMessage,
    };
  }
 }

 /**
- * Process queued SessionImport records into Session records
+ * Process unprocessed SessionImport records into Session records
+ * Uses new processing status system to find imports that need processing
 */
 export async function processQueuedImports(batchSize: number = 50): Promise<void> {
-  console.log('[Import Processor] Starting to process queued imports...');
+  console.log('[Import Processor] Starting to process unprocessed imports...');

-  // Find queued imports
-  const queuedImports = await prisma.sessionImport.findMany({
-    where: {
-      status: ImportStatus.QUEUED,
-    },
-    take: batchSize,
-    orderBy: {
-      createdAt: 'asc', // Process oldest first
-    },
-  });
+  let totalSuccessCount = 0;
+  let totalErrorCount = 0;
+  let batchNumber = 1;

-  if (queuedImports.length === 0) {
-    console.log('[Import Processor] No queued imports found');
-    return;
-  }
+  while (true) {
+    // Find SessionImports that don't have a corresponding Session yet
+    const unprocessedImports = await prisma.sessionImport.findMany({
+      where: {
+        session: null, // No session created yet
+      },
+      take: batchSize,
+      orderBy: {
+        createdAt: 'asc', // Process oldest first
+      },
+    });

-  console.log(`[Import Processor] Processing ${queuedImports.length} queued imports...`);
+    if (unprocessedImports.length === 0) {
+      if (batchNumber === 1) {
+        console.log('[Import Processor] No unprocessed imports found');
+      } else {
+        console.log(`[Import Processor] All batches completed. Total: ${totalSuccessCount} successful, ${totalErrorCount} failed`);
+      }
+      return;
+    }

-  let successCount = 0;
-  let errorCount = 0;
+    console.log(`[Import Processor] Processing batch ${batchNumber}: ${unprocessedImports.length} imports...`);

-  // Process each import
-  for (const importRecord of queuedImports) {
-    const result = await processSingleImport(importRecord);
-    
-    if (result.success) {
-      successCount++;
-      console.log(`[Import Processor] ✓ Processed import ${importRecord.externalSessionId}`);
-    } else {
-      errorCount++;
-      console.log(`[Import Processor] ✗ Failed to process import ${importRecord.externalSessionId}: ${result.error}`);
+    let batchSuccessCount = 0;
+    let batchErrorCount = 0;
+
+    // Process each import in this batch
+    for (const importRecord of unprocessedImports) {
+      const result = await processSingleImport(importRecord);
+      
+      if (result.success) {
+        batchSuccessCount++;
+        totalSuccessCount++;
+        console.log(`[Import Processor] ✓ Processed import ${importRecord.externalSessionId}`);
+      } else {
+        batchErrorCount++;
+        totalErrorCount++;
+        console.log(`[Import Processor] ✗ Failed to process import ${importRecord.externalSessionId}: ${result.error}`);
+      }
+    }
+
+    console.log(`[Import Processor] Batch ${batchNumber} completed: ${batchSuccessCount} successful, ${batchErrorCount} failed`);
+    batchNumber++;
+
+    // If this batch was smaller than the batch size, we're done
+    if (unprocessedImports.length < batchSize) {
+      console.log(`[Import Processor] All batches completed. Total: ${totalSuccessCount} successful, ${totalErrorCount} failed`);
+      return;
    }
  }
-
-  console.log(`[Import Processor] Completed: ${successCount} successful, ${errorCount} failed`);
 }

 /**