Refactor transcript fetching and processing scripts

- Introduced a new function `fetchTranscriptContent` to handle fetching transcripts with optional authentication. - Enhanced error handling and logging for transcript fetching. - Updated the `parseTranscriptToMessages` function to improve message parsing logic. - Replaced the old session processing logic with a new approach that utilizes `SessionImport` records. - Removed obsolete scripts related to manual triggers and whitespace fixing. - Updated the server initialization to remove direct server handling, transitioning to a more modular approach. - Improved overall code structure and readability across various scripts.
2026-03-03 09:01:31 +01:00 · 2025-06-27 16:38:16 +02:00
parent d7ac0ba208
commit 1dd618b666
35 changed files with 6536 additions and 12797 deletions
--- a/scripts/check-transcript-content.js
+++ b/scripts/check-transcript-content.js
@@ -1,73 +0,0 @@
-// Script to check what's in the transcript files
-// Usage: node scripts/check-transcript-content.js
-
-import { PrismaClient } from '@prisma/client';
-import fetch from 'node-fetch';
-
-const prisma = new PrismaClient();
-
-async function checkTranscriptContent() {
-  try {
-    // Get a few sessions without messages
-    const sessions = await prisma.session.findMany({
-      where: {
-        AND: [
-          { fullTranscriptUrl: { not: null } },
-          { messages: { none: {} } },
-        ]
-      },
-      include: { company: true },
-      take: 3,
-    });
-
-    for (const session of sessions) {
-      console.log(`\n📄 Checking session ${session.id}:`);
-      console.log(`   URL: ${session.fullTranscriptUrl}`);
-
-      try {
-        const authHeader = session.company.csvUsername && session.company.csvPassword
-          ? "Basic " + Buffer.from(`${session.company.csvUsername}:${session.company.csvPassword}`).toString("base64")
-          : undefined;
-
-        const response = await fetch(session.fullTranscriptUrl, {
-          headers: authHeader ? { Authorization: authHeader } : {},
-          timeout: 10000,
-        });
-
-        if (!response.ok) {
-          console.log(`   ❌ HTTP ${response.status}: ${response.statusText}`);
-          continue;
-        }
-
-        const content = await response.text();
-        console.log(`   📏 Content length: ${content.length} characters`);
-
-        if (content.length === 0) {
-          console.log(`   ⚠️  Empty file`);
-        } else if (content.length < 100) {
-          console.log(`   📝 Full content: "${content}"`);
-        } else {
-          console.log(`   📝 First 200 chars: "${content.substring(0, 200)}..."`);
-        }
-
-        // Check if it matches our expected format
-        const lines = content.split('\n').filter(line => line.trim());
-        const formatMatches = lines.filter(line =>
-          line.match(/^\[([^\]]+)\]\s*([^:]+):\s*(.+)$/)
-        );
-
-        console.log(`   🔍 Lines total: ${lines.length}, Format matches: ${formatMatches.length}`);
-
-      } catch (error) {
-        console.log(`   ❌ Error: ${error.message}`);
-      }
-    }
-
-  } catch (error) {
-    console.error('❌ Error:', error);
-  } finally {
-    await prisma.$disconnect();
-  }
-}
-
-checkTranscriptContent();
--- a/scripts/fetch-and-parse-transcripts.js
+++ b/scripts/fetch-and-parse-transcripts.js
@@ -1,185 +0,0 @@
-// Script to fetch transcripts and parse them into messages
-// Usage: node scripts/fetch-and-parse-transcripts.js
-
-import { PrismaClient } from '@prisma/client';
-import fetch from 'node-fetch';
-
-const prisma = new PrismaClient();
-
-/**
- * Fetches transcript content from a URL
- */
-async function fetchTranscriptContent(url, username, password) {
-  try {
-    const authHeader = username && password
-      ? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
-      : undefined;
-
-    const response = await fetch(url, {
-      headers: authHeader ? { Authorization: authHeader } : {},
-      timeout: 10000,
-    });
-
-    if (!response.ok) {
-      console.log(`❌ Failed to fetch ${url}: ${response.status} ${response.statusText}`);
-      return null;
-    }
-    return await response.text();
-  } catch (error) {
-    console.log(`❌ Error fetching ${url}: ${error.message}`);
-    return null;
-  }
-}
-
-/**
- * Parses transcript content into messages
- */
-function parseTranscriptToMessages(transcript, sessionId) {
-  if (!transcript || transcript.trim() === '') {
-    return [];
-  }
-
-  const lines = transcript.split('\n').filter(line => line.trim());
-  const messages = [];
-  let messageOrder = 0;
-  let currentTimestamp = new Date();
-
-  for (const line of lines) {
-    // Try format 1: [DD-MM-YYYY HH:MM:SS] Role: Content
-    const timestampMatch = line.match(/^\[([^\]]+)\]\s*([^:]+):\s*(.+)$/);
-
-    if (timestampMatch) {
-      const [, timestamp, role, content] = timestampMatch;
-
-      // Parse timestamp (DD-MM-YYYY HH:MM:SS)
-      const dateMatch = timestamp.match(/^(\d{1,2})-(\d{1,2})-(\d{4}) (\d{1,2}):(\d{1,2}):(\d{1,2})$/);
-      let parsedTimestamp = new Date();
-
-      if (dateMatch) {
-        const [, day, month, year, hour, minute, second] = dateMatch;
-        parsedTimestamp = new Date(
-          parseInt(year),
-          parseInt(month) - 1, // Month is 0-indexed
-          parseInt(day),
-          parseInt(hour),
-          parseInt(minute),
-          parseInt(second)
-        );
-      }
-
-      messages.push({
-        sessionId,
-        role: role.trim().toLowerCase(),
-        content: content.trim(),
-        timestamp: parsedTimestamp,
-        order: messageOrder++,
-      });
-      continue;
-    }
-
-    // Try format 2: Role: Content (simple format)
-    const simpleMatch = line.match(/^([^:]+):\s*(.+)$/);
-
-    if (simpleMatch) {
-      const [, role, content] = simpleMatch;
-
-      // Use incremental timestamps (add 1 minute per message)
-      currentTimestamp = new Date(currentTimestamp.getTime() + 60000);
-
-      messages.push({
-        sessionId,
-        role: role.trim().toLowerCase(),
-        content: content.trim(),
-        timestamp: new Date(currentTimestamp),
-        order: messageOrder++,
-      });
-    }
-  }
-
-  return messages;
-}
-
-/**
- * Process sessions without messages
- */
-async function fetchAndParseTranscripts() {
-  try {
-    console.log('🔍 Finding sessions without messages...\n');
-
-    // Get sessions that have fullTranscriptUrl but no messages
-    const sessionsWithoutMessages = await prisma.session.findMany({
-      where: {
-        AND: [
-          { fullTranscriptUrl: { not: null } },
-          { messages: { none: {} } }, // No messages
-        ]
-      },
-      include: {
-        company: true,
-      },
-      take: 20, // Process 20 at a time to avoid overwhelming
-    });
-
-    if (sessionsWithoutMessages.length === 0) {
-      console.log('✅ All sessions with transcript URLs already have messages!');
-      return;
-    }
-
-    console.log(`📥 Found ${sessionsWithoutMessages.length} sessions to process\n`);
-
-    let successCount = 0;
-    let errorCount = 0;
-
-    for (const session of sessionsWithoutMessages) {
-      console.log(`📄 Processing session ${session.id.substring(0, 8)}...`);
-
-      try {
-        // Fetch transcript content
-        const transcriptContent = await fetchTranscriptContent(
-          session.fullTranscriptUrl,
-          session.company.csvUsername,
-          session.company.csvPassword
-        );
-
-        if (!transcriptContent) {
-          console.log(`   ⚠️  No transcript content available`);
-          errorCount++;
-          continue;
-        }
-
-        // Parse transcript into messages
-        const messages = parseTranscriptToMessages(transcriptContent, session.id);
-
-        if (messages.length === 0) {
-          console.log(`   ⚠️  No messages found in transcript`);
-          errorCount++;
-          continue;
-        }
-
-        // Save messages to database
-        await prisma.message.createMany({
-          data: messages,
-        });
-
-        console.log(`   ✅ Added ${messages.length} messages`);
-        successCount++;
-
-      } catch (error) {
-        console.log(`   ❌ Error: ${error.message}`);
-        errorCount++;
-      }
-    }
-
-    console.log(`\n📊 Results:`);
-    console.log(`   ✅ Successfully processed: ${successCount} sessions`);
-    console.log(`   ❌ Failed to process: ${errorCount} sessions`);
-    console.log(`\n💡 Now you can run the processing scheduler to analyze these sessions!`);
-
-  } catch (error) {
-    console.error('❌ Error:', error);
-  } finally {
-    await prisma.$disconnect();
-  }
-}
-
-fetchAndParseTranscripts();
--- a/scripts/fetch_transcripts.ts
+++ b/scripts/fetch_transcripts.ts
@@ -1,83 +1,182 @@
 import { PrismaClient } from "@prisma/client";
+import fetch from "node-fetch";

 const prisma = new PrismaClient();

-async function main() {
-  console.log("Starting to fetch missing transcripts...");
+/**
+ * Fetches transcript content from a URL with optional authentication
+ */
+async function fetchTranscriptContent(
+  url: string,
+  username?: string,
+  password?: string
+): Promise<string | null> {
+  try {
+    const authHeader =
+      username && password
+        ? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
+        : undefined;

-  const sessionsToUpdate = await prisma.session.findMany({
+    const response = await fetch(url, {
+      headers: authHeader ? { Authorization: authHeader } : {},
+    });
+
+    if (!response.ok) {
+      console.warn(`Failed to fetch transcript from ${url}: ${response.statusText}`);
+      return null;
+    }
+
+    return await response.text();
+  } catch (error) {
+    console.warn(`Error fetching transcript from ${url}:`, error);
+    return null;
+  }
+}
+
+/**
+ * Parse transcript content into individual messages
+ */
+function parseTranscriptToMessages(transcriptContent: string): Array<{
+  timestamp: Date | null;
+  role: string;
+  content: string;
+  order: number;
+}> {
+  const lines = transcriptContent.split('\n').filter(line => line.trim());
+  const messages: Array<{
+    timestamp: Date | null;
+    role: string;
+    content: string;
+    order: number;
+  }> = [];
+
+  let order = 0;
+
+  for (const line of lines) {
+    // Try to parse lines in format: [timestamp] role: content
+    const match = line.match(/^\[([^\]]+)\]\s*([^:]+):\s*(.+)$/);
+    
+    if (match) {
+      const [, timestampStr, role, content] = match;
+      
+      // Try to parse the timestamp
+      let timestamp: Date | null = null;
+      try {
+        timestamp = new Date(timestampStr);
+        if (isNaN(timestamp.getTime())) {
+          timestamp = null;
+        }
+      } catch {
+        timestamp = null;
+      }
+
+      messages.push({
+        timestamp,
+        role: role.trim(),
+        content: content.trim(),
+        order: order++,
+      });
+    } else {
+      // If line doesn't match expected format, treat as content continuation
+      if (messages.length > 0) {
+        messages[messages.length - 1].content += '\n' + line;
+      } else {
+        // First line doesn't match format, create a generic message
+        messages.push({
+          timestamp: null,
+          role: 'unknown',
+          content: line,
+          order: order++,
+        });
+      }
+    }
+  }
+
+  return messages;
+}
+
+/**
+ * Main function to fetch transcripts for sessions that don't have messages yet
+ */
+async function fetchTranscriptsForSessions() {
+  console.log("Starting to fetch transcripts for sessions without messages...");
+
+  // Find sessions that have transcript URLs but no messages
+  const sessionsNeedingTranscripts = await prisma.session.findMany({
    where: {
      AND: [
        { fullTranscriptUrl: { not: null } },
-        { fullTranscriptUrl: { not: "" } }, // Ensure URL is not an empty string
-        { transcriptContent: null },
+        { messages: { none: {} } }, // No messages yet
      ],
    },
-    select: {
-      id: true,
-      fullTranscriptUrl: true,
+    include: {
+      company: true,
+      messages: true,
    },
  });

-  if (sessionsToUpdate.length === 0) {
-    console.log("No sessions found requiring transcript fetching.");
+  if (sessionsNeedingTranscripts.length === 0) {
+    console.log("No sessions found that need transcript fetching.");
    return;
  }

-  console.log(`Found ${sessionsToUpdate.length} sessions to update.`);
+  console.log(`Found ${sessionsNeedingTranscripts.length} sessions that need transcript fetching.`);
  let successCount = 0;
  let errorCount = 0;

-  for (const session of sessionsToUpdate) {
+  for (const session of sessionsNeedingTranscripts) {
    if (!session.fullTranscriptUrl) {
-      // Should not happen due to query, but good for type safety
-      console.warn(`Session ${session.id} has no fullTranscriptUrl, skipping.`);
+      console.warn(`Session ${session.id} has no transcript URL, skipping.`);
      continue;
    }

-    console.log(
-      `Fetching transcript for session ${session.id} from ${session.fullTranscriptUrl}...`
-    );
+    console.log(`Fetching transcript for session ${session.id}...`);
+    
    try {
-      const response = await fetch(session.fullTranscriptUrl);
-      if (!response.ok) {
-        console.error(
-          `Failed to fetch transcript for session ${session.id}: ${response.status} ${response.statusText}`
-        );
-        const errorBody = await response.text();
-        console.error(`Error details: ${errorBody.substring(0, 500)}`); // Log first 500 chars of error
-        errorCount++;
-        continue;
-      }
-
-      const transcriptText = await response.text();
-
-      if (transcriptText.trim() === "") {
-        console.warn(
-          `Fetched empty transcript for session ${session.id}. Storing as empty string.`
-        );
-      }
-
-      await prisma.session.update({
-        where: { id: session.id },
-        data: { transcriptContent: transcriptText },
-      });
-      console.log(
-        `Successfully fetched and stored transcript for session ${session.id}.`
+      // Fetch transcript content
+      const transcriptContent = await fetchTranscriptContent(
+        session.fullTranscriptUrl,
+        session.company.csvUsername || undefined,
+        session.company.csvPassword || undefined
      );
+
+      if (!transcriptContent) {
+        throw new Error("Failed to fetch transcript content");
+      }
+
+      // Parse transcript into messages
+      const messages = parseTranscriptToMessages(transcriptContent);
+
+      if (messages.length === 0) {
+        throw new Error("No messages found in transcript");
+      }
+
+      // Create messages in database
+      await prisma.message.createMany({
+        data: messages.map(msg => ({
+          sessionId: session.id,
+          timestamp: msg.timestamp,
+          role: msg.role,
+          content: msg.content,
+          order: msg.order,
+        })),
+      });
+
+      console.log(`Successfully fetched transcript for session ${session.id} (${messages.length} messages)`);
      successCount++;
    } catch (error) {
-      console.error(`Error processing session ${session.id}:`, error);
+      console.error(`Error fetching transcript for session ${session.id}:`, error);
      errorCount++;
    }
  }

  console.log("Transcript fetching complete.");
-  console.log(`Successfully updated: ${successCount} sessions.`);
-  console.log(`Failed to update: ${errorCount} sessions.`);
+  console.log(`Successfully fetched: ${successCount} transcripts.`);
+  console.log(`Failed to fetch: ${errorCount} transcripts.`);
 }

-main()
+// Run the main function
+fetchTranscriptsForSessions()
  .catch((e) => {
    console.error("An error occurred during the script execution:", e);
    process.exitCode = 1;
--- a/scripts/fix-whitespace.js
+++ b/scripts/fix-whitespace.js
@@ -1,68 +0,0 @@
-// Fix Trailing Whitespace
-// This script removes trailing whitespace from specified file types
-
-import fs from "fs";
-import path from "path";
-import { fileURLToPath } from "url";
-
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = path.dirname(__filename);
-
-// Configure which file types to process
-const fileTypes = [".ts", ".tsx", ".js", ".jsx", ".json", ".md", ".css"];
-
-// Configure directories to ignore
-const ignoreDirs = ["node_modules", ".next", ".git", "out", "build", "dist"];
-
-// Recursively process directories
-async function processDirectory(dir) {
-  try {
-    const files = await fs.promises.readdir(dir, { withFileTypes: true });
-
-    for (const file of files) {
-      const fullPath = path.join(dir, file.name);
-
-      // Skip ignored directories
-      if (file.isDirectory()) {
-        if (!ignoreDirs.includes(file.name)) {
-          await processDirectory(fullPath);
-        }
-        continue;
-      }
-
-      // Process only files with matching extensions
-      const ext = path.extname(file.name);
-      if (!fileTypes.includes(ext)) {
-        continue;
-      }
-
-      try {
-        // Read and process the file
-        const content = await fs.promises.readFile(fullPath, "utf8");
-
-        // Remove trailing whitespace from each line
-        const processedContent = content
-          .split("\n")
-          .map((line) => line.replace(/\s+$/, ""))
-          .join("\n");
-
-        // Only write if changes were made
-        if (processedContent !== content) {
-          await fs.promises.writeFile(fullPath, processedContent, "utf8");
-          console.log(`Fixed trailing whitespace in ${fullPath}`);
-        }
-      } catch (fileError) {
-        console.error(`Error processing file ${fullPath}:`, fileError);
-      }
-    }
-  } catch (dirError) {
-    console.error(`Error reading directory ${dir}:`, dirError);
-  }
-}
-
-// Start processing from root directory
-const rootDir = process.cwd();
-console.log(`Starting whitespace cleanup from ${rootDir}`);
-processDirectory(rootDir)
-  .then(() => console.log("Whitespace cleanup completed"))
-  .catch((err) => console.error("Error in whitespace cleanup:", err));
--- a/scripts/manual-trigger-test.js
+++ b/scripts/manual-trigger-test.js
@@ -1,38 +0,0 @@
-// Simple script to test the manual processing trigger
-// Usage: node scripts/manual-trigger-test.js
-
-import fetch from 'node-fetch';
-
-async function testManualTrigger() {
-  try {
-    console.log('Testing manual processing trigger...');
-
-    const response = await fetch('http://localhost:3000/api/admin/trigger-processing', {
-      method: 'POST',
-      headers: {
-        'Content-Type': 'application/json',
-        // Note: In a real scenario, you'd need to include authentication cookies
-        // For testing, you might need to login first and copy the session cookie
-      },
-      body: JSON.stringify({
-        batchSize: 5,  // Process max 5 sessions
-        maxConcurrency: 3  // Use 3 concurrent workers
-      })
-    });
-
-    const result = await response.json();
-
-    if (response.ok) {
-      console.log('✅ Manual trigger successful:');
-      console.log(JSON.stringify(result, null, 2));
-    } else {
-      console.log('❌ Manual trigger failed:');
-      console.log(JSON.stringify(result, null, 2));
-    }
-
-  } catch (error) {
-    console.error('❌ Error testing manual trigger:', error.message);
-  }
-}
-
-testManualTrigger();
--- a/scripts/manual-triggers.js
+++ b/scripts/manual-triggers.js
@@ -1,243 +0,0 @@
-// Manual trigger scripts for both schedulers
-import { fetchAndStoreSessionsForAllCompanies } from "../lib/csvFetcher.js";
-import { processAllUnparsedTranscripts } from "../lib/transcriptParser.js";
-import { PrismaClient } from "@prisma/client";
-import fetch from "node-fetch";
-import { readFileSync } from "fs";
-import { fileURLToPath } from "url";
-import { dirname, join } from "path";
-
-// Load environment variables from .env.local
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = dirname(__filename);
-const envPath = join(__dirname, '..', '.env.local');
-
-try {
-  const envFile = readFileSync(envPath, 'utf8');
-  const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
-
-  envVars.forEach(line => {
-    const [key, ...valueParts] = line.split('=');
-    if (key && valueParts.length > 0) {
-      const value = valueParts.join('=').trim();
-      if (!process.env[key.trim()]) {
-        process.env[key.trim()] = value;
-      }
-    }
-  });
-
-  console.log("✅ Environment variables loaded from .env.local");
-} catch (error) {
-  console.warn("⚠️  Could not load .env.local file:", error.message);
-}
-
-const prisma = new PrismaClient();
-
-/**
- * Manually trigger the session refresh scheduler
- */
-async function triggerSessionRefresh() {
-  console.log("=== Manual Session Refresh Trigger ===");
-  try {
-    await fetchAndStoreSessionsForAllCompanies();
-    console.log("✅ Session refresh completed successfully");
-  } catch (error) {
-    console.error("❌ Session refresh failed:", error);
-  }
-}
-
-/**
- * Manually trigger the processing scheduler
- */
-async function triggerProcessingScheduler() {
-  console.log("=== Manual Processing Scheduler Trigger ===");
-
-  const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
-  if (!OPENAI_API_KEY) {
-    console.error("❌ OPENAI_API_KEY environment variable is not set");
-    return;
-  }
-
-  try {
-    // Find sessions that need processing
-    const sessionsToProcess = await prisma.session.findMany({
-      where: {
-        AND: [
-          { messages: { some: {} } },
-          {
-            OR: [
-              { processed: false },
-              { processed: null }
-            ]
-          }
-        ],
-      },
-      select: {
-        id: true,
-        processed: true,
-      },
-      take: 5, // Process 5 sessions for manual testing
-    });
-
-    console.log(`Found ${sessionsToProcess.length} sessions to process:`);
-    sessionsToProcess.forEach((session) => {
-      console.log(`- Session ${session.id}: processed=${session.processed}`);
-    });
-
-    if (sessionsToProcess.length === 0) {
-      console.log("✅ No sessions found requiring processing");
-      return;
-    }
-
-    // Import and run the processing function
-    const { processUnprocessedSessions } = await import(
-      "../lib/processingScheduler.js"
-    );
-    await processUnprocessedSessions();
-
-    console.log("✅ Processing scheduler completed");
-  } catch (error) {
-    console.error("❌ Processing scheduler failed:", error);
-  }
-}
-
-/**
- * Manually trigger transcript parsing
- */
-async function triggerTranscriptParsing() {
-  console.log("=== Manual Transcript Parsing Trigger ===");
-  try {
-    const result = await processAllUnparsedTranscripts();
-    console.log(
-      `✅ Transcript parsing completed: ${result.processed} processed, ${result.errors} errors`
-    );
-  } catch (error) {
-    console.error("❌ Transcript parsing failed:", error);
-  }
-}
-
-/**
- * Show current processing status
- */
-async function showProcessingStatus() {
-  console.log("=== Processing Status ===");
-
-  try {
-    const totalSessions = await prisma.session.count();
-    const processedSessions = await prisma.session.count({
-      where: { processed: true },
-    });
-    const unprocessedSessions = await prisma.session.count({
-      where: {
-        OR: [
-          { processed: false },
-          { processed: null }
-        ]
-      },
-    });
-    const withMessages = await prisma.session.count({
-      where: {
-        messages: {
-          some: {},
-        },
-      },
-    });
-    const readyForProcessing = await prisma.session.count({
-      where: {
-        AND: [
-          { messages: { some: {} } },
-          {
-            OR: [
-              { processed: false },
-              { processed: null }
-            ]
-          }
-        ],
-      },
-    });
-
-    console.log(`📊 Total sessions: ${totalSessions}`);
-    console.log(`✅ Processed sessions: ${processedSessions}`);
-    console.log(`⏳ Unprocessed sessions: ${unprocessedSessions}`);
-    console.log(`📄 Sessions with messages: ${withMessages}`);
-    console.log(`🔄 Ready for processing: ${readyForProcessing}`);
-
-    // Show some examples of unprocessed sessions
-    if (readyForProcessing > 0) {
-      console.log("\n📋 Sample unprocessed sessions:");
-      const samples = await prisma.session.findMany({
-        where: {
-          AND: [
-            { messages: { some: {} } },
-            {
-              OR: [
-                { processed: false },
-                { processed: null }
-              ]
-            }
-          ],
-        },
-        select: {
-          id: true,
-          processed: true,
-          startTime: true,
-        },
-        take: 3,
-      });
-
-      samples.forEach((session) => {
-        console.log(
-          `- ${session.id} (${session.startTime.toISOString()}) - processed: ${session.processed}`
-        );
-      });
-    }
-  } catch (error) {
-    console.error("❌ Failed to get processing status:", error);
-  }
-}
-
-// Main execution based on command line argument
-const command = process.argv[2];
-
-switch (command) {
-  case "refresh":
-    await triggerSessionRefresh();
-    break;
-  case "process":
-    await triggerProcessingScheduler();
-    break;
-  case "parse":
-    await triggerTranscriptParsing();
-    break;
-  case "status":
-    await showProcessingStatus();
-    break;
-  case "both":
-    await triggerSessionRefresh();
-    console.log("\n" + "=".repeat(50) + "\n");
-    await triggerProcessingScheduler();
-    break;
-  case "all":
-    await triggerSessionRefresh();
-    console.log("\n" + "=".repeat(50) + "\n");
-    await triggerTranscriptParsing();
-    console.log("\n" + "=".repeat(50) + "\n");
-    await triggerProcessingScheduler();
-    break;
-  default:
-    console.log("Usage: node scripts/manual-triggers.js [command]");
-    console.log("Commands:");
-    console.log(
-      "  refresh  - Trigger session refresh (fetch new sessions from CSV)"
-    );
-    console.log("  parse    - Parse transcripts into structured messages");
-    console.log(
-      "  process  - Trigger processing scheduler (process unprocessed sessions)"
-    );
-    console.log("  status   - Show current processing status");
-    console.log("  both     - Run both refresh and processing");
-    console.log("  all      - Run refresh, parse, and processing in sequence");
-    break;
-}
-
-await prisma.$disconnect();
--- a/scripts/process_sessions.mjs
+++ b/scripts/process_sessions.mjs
@@ -1,283 +0,0 @@
-// Script to manually process unprocessed sessions with OpenAI
-import { PrismaClient } from "@prisma/client";
-import fetch from "node-fetch";
-
-const prisma = new PrismaClient();
-const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
-const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
-
-/**
- * Processes a session transcript using OpenAI API
- * @param {string} sessionId The session ID
- * @param {string} transcript The transcript content to process
- * @returns {Promise<Object>} Processed data from OpenAI
- */
-async function processTranscriptWithOpenAI(sessionId, transcript) {
-  if (!OPENAI_API_KEY) {
-    throw new Error("OPENAI_API_KEY environment variable is not set");
-  }
-
-  // Create a system message with instructions
-  const systemMessage = `
-    You are an AI assistant tasked with analyzing chat transcripts.
-    Extract the following information from the transcript:
-    1. The primary language used by the user (ISO 639-1 code)
-    2. Number of messages sent by the user
-    3. Overall sentiment (positive, neutral, or negative)
-    4. Whether the conversation was escalated
-    5. Whether HR contact was mentioned or provided
-    6. The best-fitting category for the conversation from this list:
-       - Schedule & Hours
-       - Leave & Vacation
-       - Sick Leave & Recovery
-       - Salary & Compensation
-       - Contract & Hours
-       - Onboarding
-       - Offboarding
-       - Workwear & Staff Pass
-       - Team & Contacts
-       - Personal Questions
-       - Access & Login
-       - Social questions
-       - Unrecognized / Other
-    7. Up to 5 paraphrased questions asked by the user (in English)
-    8. A brief summary of the conversation (10-300 characters)
-    
-    Return the data in JSON format matching this schema:
-    {
-      "language": "ISO 639-1 code",
-      "messages_sent": number,
-      "sentiment": "positive|neutral|negative",
-      "escalated": boolean,
-      "forwarded_hr": boolean,
-      "category": "one of the categories listed above",
-      "questions": ["question 1", "question 2", ...],
-      "summary": "brief summary",
-      "session_id": "${sessionId}"
-    }
-  `;
-
-  try {
-    const response = await fetch(OPENAI_API_URL, {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        Authorization: `Bearer ${OPENAI_API_KEY}`,
-      },
-      body: JSON.stringify({
-        model: "gpt-4-turbo",
-        messages: [
-          {
-            role: "system",
-            content: systemMessage,
-          },
-          {
-            role: "user",
-            content: transcript,
-          },
-        ],
-        temperature: 0.3, // Lower temperature for more consistent results
-        response_format: { type: "json_object" },
-      }),
-    });
-
-    if (!response.ok) {
-      const errorText = await response.text();
-      throw new Error(`OpenAI API error: ${response.status} - ${errorText}`);
-    }
-
-    const data = await response.json();
-    const processedData = JSON.parse(data.choices[0].message.content);
-
-    // Validate the response against our expected schema
-    validateOpenAIResponse(processedData);
-
-    return processedData;
-  } catch (error) {
-    console.error(`Error processing transcript with OpenAI:`, error);
-    throw error;
-  }
-}
-
-/**
- * Validates the OpenAI response against our expected schema
- * @param {Object} data The data to validate
- */
-function validateOpenAIResponse(data) {
-  // Check required fields
-  const requiredFields = [
-    "language",
-    "messages_sent",
-    "sentiment",
-    "escalated",
-    "forwarded_hr",
-    "category",
-    "questions",
-    "summary",
-    "session_id",
-  ];
-
-  for (const field of requiredFields) {
-    if (!(field in data)) {
-      throw new Error(`Missing required field: ${field}`);
-    }
-  }
-
-  // Validate field types
-  if (typeof data.language !== "string" || !/^[a-z]{2}$/.test(data.language)) {
-    throw new Error(
-      "Invalid language format. Expected ISO 639-1 code (e.g., 'en')"
-    );
-  }
-
-  if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
-    throw new Error("Invalid messages_sent. Expected non-negative number");
-  }
-
-  if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
-    throw new Error(
-      "Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
-    );
-  }
-
-  if (typeof data.escalated !== "boolean") {
-    throw new Error("Invalid escalated. Expected boolean");
-  }
-
-  if (typeof data.forwarded_hr !== "boolean") {
-    throw new Error("Invalid forwarded_hr. Expected boolean");
-  }
-
-  const validCategories = [
-    "Schedule & Hours",
-    "Leave & Vacation",
-    "Sick Leave & Recovery",
-    "Salary & Compensation",
-    "Contract & Hours",
-    "Onboarding",
-    "Offboarding",
-    "Workwear & Staff Pass",
-    "Team & Contacts",
-    "Personal Questions",
-    "Access & Login",
-    "Social questions",
-    "Unrecognized / Other",
-  ];
-
-  if (!validCategories.includes(data.category)) {
-    throw new Error(
-      `Invalid category. Expected one of: ${validCategories.join(", ")}`
-    );
-  }
-
-  if (!Array.isArray(data.questions)) {
-    throw new Error("Invalid questions. Expected array of strings");
-  }
-
-  if (
-    typeof data.summary !== "string" ||
-    data.summary.length < 10 ||
-    data.summary.length > 300
-  ) {
-    throw new Error(
-      "Invalid summary. Expected string between 10-300 characters"
-    );
-  }
-
-  if (typeof data.session_id !== "string") {
-    throw new Error("Invalid session_id. Expected string");
-  }
-}
-
-/**
- * Main function to process unprocessed sessions
- */
-async function processUnprocessedSessions() {
-  console.log("Starting to process unprocessed sessions...");
-
-  // Find sessions that have transcript content but haven't been processed
-  const sessionsToProcess = await prisma.session.findMany({
-    where: {
-      AND: [
-        { transcriptContent: { not: null } },
-        { transcriptContent: { not: "" } },
-        { processed: { not: true } }, // Either false or null
-      ],
-    },
-    select: {
-      id: true,
-      transcriptContent: true,
-    },
-  });
-
-  if (sessionsToProcess.length === 0) {
-    console.log("No sessions found requiring processing.");
-    return;
-  }
-
-  console.log(`Found ${sessionsToProcess.length} sessions to process.`);
-  let successCount = 0;
-  let errorCount = 0;
-
-  for (const session of sessionsToProcess) {
-    if (!session.transcriptContent) {
-      // Should not happen due to query, but good for type safety
-      console.warn(
-        `Session ${session.id} has no transcript content, skipping.`
-      );
-      continue;
-    }
-
-    console.log(`Processing transcript for session ${session.id}...`);
-    try {
-      const processedData = await processTranscriptWithOpenAI(
-        session.id,
-        session.transcriptContent
-      );
-
-      // Map sentiment string to float value for compatibility with existing data
-      const sentimentMap = {
-        positive: 0.8,
-        neutral: 0.0,
-        negative: -0.8,
-      };
-
-      // Update the session with processed data
-      await prisma.session.update({
-        where: { id: session.id },
-        data: {
-          language: processedData.language,
-          messagesSent: processedData.messages_sent,
-          sentiment: sentimentMap[processedData.sentiment] || 0,
-          sentimentCategory: processedData.sentiment,
-          escalated: processedData.escalated,
-          forwardedHr: processedData.forwarded_hr,
-          category: processedData.category,
-          questions: JSON.stringify(processedData.questions),
-          summary: processedData.summary,
-          processed: true,
-        },
-      });
-
-      console.log(`Successfully processed session ${session.id}.`);
-      successCount++;
-    } catch (error) {
-      console.error(`Error processing session ${session.id}:`, error);
-      errorCount++;
-    }
-  }
-
-  console.log("Session processing complete.");
-  console.log(`Successfully processed: ${successCount} sessions.`);
-  console.log(`Failed to process: ${errorCount} sessions.`);
-}
-
-// Run the main function
-processUnprocessedSessions()
-  .catch((e) => {
-    console.error("An error occurred during the script execution:", e);
-    process.exitCode = 1;
-  })
-  .finally(async () => {
-    await prisma.$disconnect();
-  });
--- a/scripts/process_sessions.ts
+++ b/scripts/process_sessions.ts
@@ -18,11 +18,37 @@ interface OpenAIProcessedData {
  session_id: string;
 }

+/**
+ * Fetches transcript content from a URL
+ */
+async function fetchTranscriptContent(
+  url: string,
+  username?: string,
+  password?: string
+): Promise<string | null> {
+  try {
+    const authHeader =
+      username && password
+        ? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
+        : undefined;
+
+    const response = await fetch(url, {
+      headers: authHeader ? { Authorization: authHeader } : {},
+    });
+
+    if (!response.ok) {
+      console.warn(`Failed to fetch transcript from ${url}: ${response.statusText}`);
+      return null;
+    }
+    return await response.text();
+  } catch (error) {
+    console.warn(`Error fetching transcript from ${url}:`, error);
+    return null;
+  }
+}
+
 /**
 * Processes a session transcript using OpenAI API
- * @param sessionId The session ID
- * @param transcript The transcript content to process
- * @returns Processed data from OpenAI
 */
 async function processTranscriptWithOpenAI(
  sessionId: string,
@@ -32,7 +58,6 @@ async function processTranscriptWithOpenAI(
    throw new Error("OPENAI_API_KEY environment variable is not set");
  }

-  // Create a system message with instructions
  const systemMessage = `
    You are an AI assistant tasked with analyzing chat transcripts.
    Extract the following information from the transcript:
@@ -91,7 +116,7 @@ async function processTranscriptWithOpenAI(
            content: transcript,
          },
        ],
-        temperature: 0.3, // Lower temperature for more consistent results
+        temperature: 0.3,
        response_format: { type: "json_object" },
      }),
    });
@@ -104,9 +129,7 @@ async function processTranscriptWithOpenAI(
    const data = (await response.json()) as any;
    const processedData = JSON.parse(data.choices[0].message.content);

-    // Validate the response against our expected schema
    validateOpenAIResponse(processedData);
-
    return processedData;
  } catch (error) {
    console.error(`Error processing transcript with OpenAI:`, error);
@@ -116,22 +139,11 @@ async function processTranscriptWithOpenAI(

 /**
 * Validates the OpenAI response against our expected schema
- * @param data The data to validate
 */
-function validateOpenAIResponse(
-  data: any
-): asserts data is OpenAIProcessedData {
-  // Check required fields
+function validateOpenAIResponse(data: any): asserts data is OpenAIProcessedData {
  const requiredFields = [
-    "language",
-    "messages_sent",
-    "sentiment",
-    "escalated",
-    "forwarded_hr",
-    "category",
-    "questions",
-    "summary",
-    "session_id",
+    "language", "messages_sent", "sentiment", "escalated", 
+    "forwarded_hr", "category", "questions", "summary", "session_id"
  ];

  for (const field of requiredFields) {
@@ -140,11 +152,8 @@ function validateOpenAIResponse(
    }
  }

-  // Validate field types
  if (typeof data.language !== "string" || !/^[a-z]{2}$/.test(data.language)) {
-    throw new Error(
-      "Invalid language format. Expected ISO 639-1 code (e.g., 'en')"
-    );
+    throw new Error("Invalid language format. Expected ISO 639-1 code (e.g., 'en')");
  }

  if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
@@ -152,9 +161,7 @@ function validateOpenAIResponse(
  }

  if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
-    throw new Error(
-      "Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
-    );
+    throw new Error("Invalid sentiment. Expected 'positive', 'neutral', or 'negative'");
  }

  if (typeof data.escalated !== "boolean") {
@@ -166,39 +173,22 @@ function validateOpenAIResponse(
  }

  const validCategories = [
-    "Schedule & Hours",
-    "Leave & Vacation",
-    "Sick Leave & Recovery",
-    "Salary & Compensation",
-    "Contract & Hours",
-    "Onboarding",
-    "Offboarding",
-    "Workwear & Staff Pass",
-    "Team & Contacts",
-    "Personal Questions",
-    "Access & Login",
-    "Social questions",
-    "Unrecognized / Other",
+    "Schedule & Hours", "Leave & Vacation", "Sick Leave & Recovery",
+    "Salary & Compensation", "Contract & Hours", "Onboarding", "Offboarding",
+    "Workwear & Staff Pass", "Team & Contacts", "Personal Questions",
+    "Access & Login", "Social questions", "Unrecognized / Other"
  ];

  if (!validCategories.includes(data.category)) {
-    throw new Error(
-      `Invalid category. Expected one of: ${validCategories.join(", ")}`
-    );
+    throw new Error(`Invalid category. Expected one of: ${validCategories.join(", ")}`);
  }

  if (!Array.isArray(data.questions)) {
    throw new Error("Invalid questions. Expected array of strings");
  }

-  if (
-    typeof data.summary !== "string" ||
-    data.summary.length < 10 ||
-    data.summary.length > 300
-  ) {
-    throw new Error(
-      "Invalid summary. Expected string between 10-300 characters"
-    );
+  if (typeof data.summary !== "string" || data.summary.length < 10 || data.summary.length > 300) {
+    throw new Error("Invalid summary. Expected string between 10-300 characters");
  }

  if (typeof data.session_id !== "string") {
@@ -207,86 +197,146 @@ function validateOpenAIResponse(
 }

 /**
- * Main function to process unprocessed sessions
+ * Main function to process SessionImport records that need processing
 */
 async function processUnprocessedSessions() {
-  console.log("Starting to process unprocessed sessions...");
+  console.log("Starting to process unprocessed SessionImport records...");

-  // Find sessions that have transcript content but haven't been processed
-  const sessionsToProcess = await prisma.session.findMany({
+  // Find SessionImport records that are QUEUED and have transcript URLs
+  const importsToProcess = await prisma.sessionImport.findMany({
    where: {
-      AND: [
-        { transcriptContent: { not: null } },
-        { transcriptContent: { not: "" } },
-        { processed: { not: true } }, // Either false or null
-      ],
+      status: "QUEUED",
+      fullTranscriptUrl: { not: null },
    },
-    select: {
-      id: true,
-      transcriptContent: true,
+    include: {
+      company: true,
    },
  });

-  if (sessionsToProcess.length === 0) {
-    console.log("No sessions found requiring processing.");
+  if (importsToProcess.length === 0) {
+    console.log("No SessionImport records found requiring processing.");
    return;
  }

-  console.log(`Found ${sessionsToProcess.length} sessions to process.`);
+  console.log(`Found ${importsToProcess.length} SessionImport records to process.`);
  let successCount = 0;
  let errorCount = 0;

-  for (const session of sessionsToProcess) {
-    if (!session.transcriptContent) {
-      // Should not happen due to query, but good for type safety
-      console.warn(
-        `Session ${session.id} has no transcript content, skipping.`
-      );
+  for (const importRecord of importsToProcess) {
+    if (!importRecord.fullTranscriptUrl) {
+      console.warn(`SessionImport ${importRecord.id} has no transcript URL, skipping.`);
      continue;
    }

-    console.log(`Processing transcript for session ${session.id}...`);
+    console.log(`Processing transcript for SessionImport ${importRecord.id}...`);
+    
    try {
-      const processedData = await processTranscriptWithOpenAI(
-        session.id,
-        session.transcriptContent
+      // Mark as processing
+      await prisma.sessionImport.update({
+        where: { id: importRecord.id },
+        data: { status: "PROCESSING" },
+      });
+
+      // Fetch transcript content
+      const transcriptContent = await fetchTranscriptContent(
+        importRecord.fullTranscriptUrl,
+        importRecord.company.csvUsername || undefined,
+        importRecord.company.csvPassword || undefined
      );

-      // Map sentiment string to float value for compatibility with existing data
-      const sentimentMap: Record<string, number> = {
-        positive: 0.8,
-        neutral: 0.0,
-        negative: -0.8,
-      };
+      if (!transcriptContent) {
+        throw new Error("Failed to fetch transcript content");
+      }

-      // Update the session with processed data
-      await prisma.session.update({
-        where: { id: session.id },
-        data: {
+      // Process with OpenAI
+      const processedData = await processTranscriptWithOpenAI(
+        importRecord.externalSessionId,
+        transcriptContent
+      );
+
+      // Parse dates from raw strings
+      const startTime = new Date(importRecord.startTimeRaw);
+      const endTime = new Date(importRecord.endTimeRaw);
+
+      // Create or update Session record
+      const session = await prisma.session.upsert({
+        where: { importId: importRecord.id },
+        update: {
+          startTime: isNaN(startTime.getTime()) ? new Date() : startTime,
+          endTime: isNaN(endTime.getTime()) ? new Date() : endTime,
+          ipAddress: importRecord.ipAddress,
+          country: importRecord.countryCode,
          language: processedData.language,
          messagesSent: processedData.messages_sent,
-          sentiment: sentimentMap[processedData.sentiment] || 0,
-          sentimentCategory: processedData.sentiment,
+          sentiment: { positive: 0.8, neutral: 0.0, negative: -0.8 }[processedData.sentiment] || 0,
+          sentimentCategory: processedData.sentiment.toUpperCase() as "POSITIVE" | "NEUTRAL" | "NEGATIVE",
          escalated: processedData.escalated,
          forwardedHr: processedData.forwarded_hr,
+          fullTranscriptUrl: importRecord.fullTranscriptUrl,
+          avgResponseTime: importRecord.avgResponseTimeSeconds,
+          tokens: importRecord.tokens,
+          tokensEur: importRecord.tokensEur,
          category: processedData.category,
+          initialMsg: importRecord.initialMessage,
+          processed: true,
          questions: JSON.stringify(processedData.questions),
          summary: processedData.summary,
+        },
+        create: {
+          companyId: importRecord.companyId,
+          importId: importRecord.id,
+          startTime: isNaN(startTime.getTime()) ? new Date() : startTime,
+          endTime: isNaN(endTime.getTime()) ? new Date() : endTime,
+          ipAddress: importRecord.ipAddress,
+          country: importRecord.countryCode,
+          language: processedData.language,
+          messagesSent: processedData.messages_sent,
+          sentiment: { positive: 0.8, neutral: 0.0, negative: -0.8 }[processedData.sentiment] || 0,
+          sentimentCategory: processedData.sentiment.toUpperCase() as "POSITIVE" | "NEUTRAL" | "NEGATIVE",
+          escalated: processedData.escalated,
+          forwardedHr: processedData.forwarded_hr,
+          fullTranscriptUrl: importRecord.fullTranscriptUrl,
+          avgResponseTime: importRecord.avgResponseTimeSeconds,
+          tokens: importRecord.tokens,
+          tokensEur: importRecord.tokensEur,
+          category: processedData.category,
+          initialMsg: importRecord.initialMessage,
          processed: true,
+          questions: JSON.stringify(processedData.questions),
+          summary: processedData.summary,
        },
      });

-      console.log(`Successfully processed session ${session.id}.`);
+      // Mark SessionImport as DONE
+      await prisma.sessionImport.update({
+        where: { id: importRecord.id },
+        data: { 
+          status: "DONE",
+          processedAt: new Date(),
+        },
+      });
+
+      console.log(`Successfully processed SessionImport ${importRecord.id} -> Session ${session.id}`);
      successCount++;
    } catch (error) {
-      console.error(`Error processing session ${session.id}:`, error);
+      console.error(`Error processing SessionImport ${importRecord.id}:`, error);
+      
+      // Mark as ERROR
+      await prisma.sessionImport.update({
+        where: { id: importRecord.id },
+        data: { 
+          status: "ERROR",
+          errorMsg: error instanceof Error ? error.message : String(error),
+        },
+      });
+      
      errorCount++;
    }
  }

-  console.log("Session processing complete.");
-  console.log(`Successfully processed: ${successCount} sessions.`);
-  console.log(`Failed to process: ${errorCount} sessions.`);
+  console.log("SessionImport processing complete.");
+  console.log(`Successfully processed: ${successCount} records.`);
+  console.log(`Failed to process: ${errorCount} records.`);
 }

 // Run the main function
--- a/scripts/test-processing-status.js
+++ b/scripts/test-processing-status.js
@@ -1,75 +0,0 @@
-// Script to check processing status and trigger processing
-// Usage: node scripts/test-processing-status.js
-
-import { PrismaClient } from '@prisma/client';
-
-const prisma = new PrismaClient();
-
-async function checkProcessingStatus() {
-  try {
-    console.log('🔍 Checking processing status...\n');
-
-    // Get processing status
-    const totalSessions = await prisma.session.count();
-    const processedSessions = await prisma.session.count({
-      where: { processed: true }
-    });
-    const unprocessedSessions = await prisma.session.count({
-      where: { processed: false }
-    });
-    const sessionsWithMessages = await prisma.session.count({
-      where: {
-        processed: false,
-        messages: { some: {} }
-      }
-    });
-
-    console.log('📊 Processing Status:');
-    console.log(`   Total sessions: ${totalSessions}`);
-    console.log(`   ✅ Processed: ${processedSessions}`);
-    console.log(`   ⏳ Unprocessed: ${unprocessedSessions}`);
-    console.log(`   📝 Unprocessed with messages: ${sessionsWithMessages}`);
-
-    const processedPercentage = ((processedSessions / totalSessions) * 100).toFixed(1);
-    console.log(`   📈 Processing progress: ${processedPercentage}%\n`);
-
-    // Check recent processing activity
-    const recentlyProcessed = await prisma.session.findMany({
-      where: {
-        processed: true,
-        createdAt: {
-          gte: new Date(Date.now() - 60 * 60 * 1000) // Last hour
-        }
-      },
-      orderBy: { createdAt: 'desc' },
-      take: 5,
-      select: {
-        id: true,
-        createdAt: true,
-        category: true,
-        sentiment: true
-      }
-    });
-
-    if (recentlyProcessed.length > 0) {
-      console.log('🕒 Recently processed sessions:');
-      recentlyProcessed.forEach(session => {
-        const timeAgo = Math.round((Date.now() - session.createdAt.getTime()) / 1000 / 60);
-        console.log(`   • ${session.id.substring(0, 8)}... (${timeAgo}m ago) - ${session.category || 'No category'}`);
-      });
-    } else {
-      console.log('🕒 No sessions processed in the last hour');
-    }
-
-    console.log('\n✨ Processing system is working correctly!');
-    console.log('💡 The parallel processing successfully processed sessions.');
-    console.log('🎯 For manual triggers, you need to be logged in as an admin user.');
-
-  } catch (error) {
-    console.error('❌ Error checking status:', error);
-  } finally {
-    await prisma.$disconnect();
-  }
-}
-
-checkProcessingStatus();
--- a/scripts/trigger-processing-direct.js
+++ b/scripts/trigger-processing-direct.js
@@ -1,20 +0,0 @@
-// Direct trigger for processing scheduler (bypasses authentication)
-// Usage: node scripts/trigger-processing-direct.js
-
-import { processUnprocessedSessions } from '../lib/processingScheduler.js';
-
-async function triggerProcessing() {
-  try {
-    console.log('🚀 Manually triggering processing scheduler...\n');
-
-    // Process with custom parameters
-    await processUnprocessedSessions(50, 3); // Process 50 sessions with 3 concurrent workers
-
-    console.log('\n✅ Processing trigger completed!');
-
-  } catch (error) {
-    console.error('❌ Error triggering processing:', error);
-  }
-}
-
-triggerProcessing();