Refactor transcript fetching and processing scripts

- Introduced a new function `fetchTranscriptContent` to handle fetching transcripts with optional authentication. - Enhanced error handling and logging for transcript fetching. - Updated the `parseTranscriptToMessages` function to improve message parsing logic. - Replaced the old session processing logic with a new approach that utilizes `SessionImport` records. - Removed obsolete scripts related to manual triggers and whitespace fixing. - Updated the server initialization to remove direct server handling, transitioning to a more modular approach. - Improved overall code structure and readability across various scripts.
2026-06-14 14:35:43 +02:00 · 2025-06-27 16:38:16 +02:00
parent d7ac0ba208
commit 1dd618b666
35 changed files with 6536 additions and 12797 deletions
@@ -3,74 +3,82 @@ import cron from "node-cron";
 import { prisma } from "./prisma";
 import { fetchAndParseCsv } from "./csvFetcher";

-interface SessionCreateData {
-  id: string;
-  startTime: Date;
-  companyId: string;
-  [key: string]: unknown;
-}
-
 export function startScheduler() {
  cron.schedule("*/15 * * * *", async () => {
    const companies = await prisma.company.findMany();
    for (const company of companies) {
      try {
-        const sessions = await fetchAndParseCsv(
+        const rawSessionData = await fetchAndParseCsv(
          company.csvUrl,
          company.csvUsername as string | undefined,
          company.csvPassword as string | undefined
        );
-        // Only add sessions that don't already exist in the database
-        for (const session of sessions) {
-          const sessionData: SessionCreateData = {
-            ...session,
-            companyId: company.id,
-            id: session.id || session.sessionId || `sess_${Date.now()}`,
-            // Ensure startTime is not undefined
-            startTime: session.startTime || new Date(),
-          };

-          // Check if the session already exists
-          const existingSession = await prisma.session.findUnique({
-            where: { id: sessionData.id },
-          });
-
-          if (existingSession) {
-            // Skip this session as it already exists
-            continue;
+        // Create SessionImport records for new data
+        for (const rawSession of rawSessionData) {
+          try {
+            // Use upsert to handle duplicates gracefully
+            await prisma.sessionImport.upsert({
+              where: {
+                companyId_externalSessionId: {
+                  companyId: company.id,
+                  externalSessionId: rawSession.externalSessionId,
+                },
+              },
+              update: {
+                // Update existing record with latest data
+                startTimeRaw: rawSession.startTimeRaw,
+                endTimeRaw: rawSession.endTimeRaw,
+                ipAddress: rawSession.ipAddress,
+                countryCode: rawSession.countryCode,
+                language: rawSession.language,
+                messagesSent: rawSession.messagesSent,
+                sentimentRaw: rawSession.sentimentRaw,
+                escalatedRaw: rawSession.escalatedRaw,
+                forwardedHrRaw: rawSession.forwardedHrRaw,
+                fullTranscriptUrl: rawSession.fullTranscriptUrl,
+                avgResponseTimeSeconds: rawSession.avgResponseTimeSeconds,
+                tokens: rawSession.tokens,
+                tokensEur: rawSession.tokensEur,
+                category: rawSession.category,
+                initialMessage: rawSession.initialMessage,
+                status: "QUEUED", // Reset status for reprocessing if needed
+              },
+              create: {
+                companyId: company.id,
+                externalSessionId: rawSession.externalSessionId,
+                startTimeRaw: rawSession.startTimeRaw,
+                endTimeRaw: rawSession.endTimeRaw,
+                ipAddress: rawSession.ipAddress,
+                countryCode: rawSession.countryCode,
+                language: rawSession.language,
+                messagesSent: rawSession.messagesSent,
+                sentimentRaw: rawSession.sentimentRaw,
+                escalatedRaw: rawSession.escalatedRaw,
+                forwardedHrRaw: rawSession.forwardedHrRaw,
+                fullTranscriptUrl: rawSession.fullTranscriptUrl,
+                avgResponseTimeSeconds: rawSession.avgResponseTimeSeconds,
+                tokens: rawSession.tokens,
+                tokensEur: rawSession.tokensEur,
+                category: rawSession.category,
+                initialMessage: rawSession.initialMessage,
+                status: "QUEUED",
+              },
+            });
+          } catch (error) {
+            // Log individual session import errors but continue processing
+            process.stderr.write(
+              `[Scheduler] Failed to import session ${rawSession.externalSessionId} for company ${company.name}: ${error}\n`
+            );
          }
-
-          // Only include fields that are properly typed for Prisma
-          await prisma.session.create({
-            data: {
-              id: sessionData.id,
-              companyId: sessionData.companyId,
-              startTime: sessionData.startTime,
-              // endTime is required in the schema, so use startTime if not available
-              endTime: session.endTime || new Date(),
-              ipAddress: session.ipAddress || null,
-              country: session.country || null,
-              language: session.language || null,
-              sentiment:
-                typeof session.sentiment === "number"
-                  ? session.sentiment
-                  : null,
-              messagesSent:
-                typeof session.messagesSent === "number"
-                  ? session.messagesSent
-                  : 0,
-              category: session.category || null,
-            },
-          });
        }
-        // Using process.stdout.write instead of console.log to avoid ESLint warning
+
        process.stdout.write(
-          `[Scheduler] Refreshed sessions for company: ${company.name}\n`
+          `[Scheduler] Imported ${rawSessionData.length} session records for company: ${company.name}\n`
        );
      } catch (e) {
-        // Using process.stderr.write instead of console.error to avoid ESLint warning
        process.stderr.write(
-          `[Scheduler] Failed for company: ${company.name} - ${e}\n`
+          `[Scheduler] Failed to fetch CSV for company: ${company.name} - ${e}\n`
        );
      }
    }