feat: add rawTranscriptContent field to SessionImport model

feat: enhance server initialization with environment validation and import processing scheduler test: add Jest setup for unit tests and mock console methods test: implement unit tests for environment management and validation test: create unit tests for transcript fetcher functionality
2026-03-03 00:21:30 +01:00 · 2025-06-27 19:00:22 +02:00
parent 50b230aa9b
commit 5c1ced5900
25 changed files with 3492 additions and 82 deletions
--- a/lib/env.ts
+++ b/lib/env.ts
@@ -0,0 +1,111 @@
+// Centralized environment variable management
+import { readFileSync } from "fs";
+import { fileURLToPath } from "url";
+import { dirname, join } from "path";
+
+// Load environment variables from .env.local
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+const envPath = join(__dirname, '..', '.env.local');
+
+// Load .env.local if it exists
+try {
+  const envFile = readFileSync(envPath, 'utf8');
+  const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
+
+  envVars.forEach(line => {
+    const [key, ...valueParts] = line.split('=');
+    if (key && valueParts.length > 0) {
+      const value = valueParts.join('=').trim();
+      if (!process.env[key.trim()]) {
+        process.env[key.trim()] = value;
+      }
+    }
+  });
+} catch (error) {
+  // Silently fail if .env.local doesn't exist
+}
+
+/**
+ * Typed environment variables with defaults
+ */
+export const env = {
+  // NextAuth
+  NEXTAUTH_URL: process.env.NEXTAUTH_URL || 'http://localhost:3000',
+  NEXTAUTH_SECRET: process.env.NEXTAUTH_SECRET || '',
+  NODE_ENV: process.env.NODE_ENV || 'development',
+
+  // OpenAI
+  OPENAI_API_KEY: process.env.OPENAI_API_KEY || '',
+
+  // Scheduler Configuration
+  SCHEDULER_ENABLED: process.env.SCHEDULER_ENABLED === 'true',
+  CSV_IMPORT_INTERVAL: process.env.CSV_IMPORT_INTERVAL || '*/15 * * * *',
+  IMPORT_PROCESSING_INTERVAL: process.env.IMPORT_PROCESSING_INTERVAL || '*/5 * * * *',
+  IMPORT_PROCESSING_BATCH_SIZE: parseInt(process.env.IMPORT_PROCESSING_BATCH_SIZE || '50', 10),
+  SESSION_PROCESSING_INTERVAL: process.env.SESSION_PROCESSING_INTERVAL || '0 * * * *',
+  SESSION_PROCESSING_BATCH_SIZE: parseInt(process.env.SESSION_PROCESSING_BATCH_SIZE || '0', 10),
+  SESSION_PROCESSING_CONCURRENCY: parseInt(process.env.SESSION_PROCESSING_CONCURRENCY || '5', 10),
+
+  // Server
+  PORT: parseInt(process.env.PORT || '3000', 10),
+} as const;
+
+/**
+ * Validate required environment variables
+ */
+export function validateEnv(): { valid: boolean; errors: string[] } {
+  const errors: string[] = [];
+
+  if (!env.NEXTAUTH_SECRET) {
+    errors.push('NEXTAUTH_SECRET is required');
+  }
+
+  if (!env.OPENAI_API_KEY && env.NODE_ENV === 'production') {
+    errors.push('OPENAI_API_KEY is required in production');
+  }
+
+  return {
+    valid: errors.length === 0,
+    errors,
+  };
+}
+
+/**
+ * Get scheduler configuration from environment variables
+ */
+export function getSchedulerConfig() {
+  return {
+    enabled: env.SCHEDULER_ENABLED,
+    csvImport: {
+      interval: env.CSV_IMPORT_INTERVAL,
+    },
+    importProcessing: {
+      interval: env.IMPORT_PROCESSING_INTERVAL,
+      batchSize: env.IMPORT_PROCESSING_BATCH_SIZE,
+    },
+    sessionProcessing: {
+      interval: env.SESSION_PROCESSING_INTERVAL,
+      batchSize: env.SESSION_PROCESSING_BATCH_SIZE,
+      concurrency: env.SESSION_PROCESSING_CONCURRENCY,
+    },
+  };
+}
+
+/**
+ * Log environment configuration (safe for production)
+ */
+export function logEnvConfig(): void {
+  console.log('[Environment] Configuration:');
+  console.log(`  NODE_ENV: ${env.NODE_ENV}`);
+  console.log(`  NEXTAUTH_URL: ${env.NEXTAUTH_URL}`);
+  console.log(`  SCHEDULER_ENABLED: ${env.SCHEDULER_ENABLED}`);
+  console.log(`  PORT: ${env.PORT}`);
+  
+  if (env.SCHEDULER_ENABLED) {
+    console.log('  Scheduler intervals:');
+    console.log(`    CSV Import: ${env.CSV_IMPORT_INTERVAL}`);
+    console.log(`    Import Processing: ${env.IMPORT_PROCESSING_INTERVAL}`);
+    console.log(`    Session Processing: ${env.SESSION_PROCESSING_INTERVAL}`);
+  }
+}
--- a/lib/importProcessor.ts
+++ b/lib/importProcessor.ts
@@ -0,0 +1,225 @@
+// SessionImport to Session processor
+import { PrismaClient, ImportStatus, SentimentCategory } from "@prisma/client";
+import { getSchedulerConfig } from "./env";
+import { fetchTranscriptContent, isValidTranscriptUrl } from "./transcriptFetcher";
+import cron from "node-cron";
+
+const prisma = new PrismaClient();
+
+/**
+ * Process a single SessionImport record into a Session record
+ */
+async function processSingleImport(importRecord: any): Promise<{ success: boolean; error?: string }> {
+  try {
+    // Parse dates
+    const startTime = new Date(importRecord.startTimeRaw);
+    const endTime = new Date(importRecord.endTimeRaw);
+
+    // Validate dates
+    if (isNaN(startTime.getTime()) || isNaN(endTime.getTime())) {
+      throw new Error(`Invalid date format: start=${importRecord.startTimeRaw}, end=${importRecord.endTimeRaw}`);
+    }
+
+    // Process sentiment
+    let sentiment: number | null = null;
+    let sentimentCategory: SentimentCategory | null = null;
+    
+    if (importRecord.sentimentRaw) {
+      const sentimentStr = importRecord.sentimentRaw.toLowerCase();
+      if (sentimentStr.includes('positive')) {
+        sentiment = 0.8;
+        sentimentCategory = SentimentCategory.POSITIVE;
+      } else if (sentimentStr.includes('negative')) {
+        sentiment = -0.8;
+        sentimentCategory = SentimentCategory.NEGATIVE;
+      } else {
+        sentiment = 0.0;
+        sentimentCategory = SentimentCategory.NEUTRAL;
+      }
+    }
+
+    // Process boolean fields
+    const escalated = importRecord.escalatedRaw ? 
+      ['true', '1', 'yes', 'escalated'].includes(importRecord.escalatedRaw.toLowerCase()) : null;
+    
+    const forwardedHr = importRecord.forwardedHrRaw ? 
+      ['true', '1', 'yes', 'forwarded'].includes(importRecord.forwardedHrRaw.toLowerCase()) : null;
+
+    // Keep country code as-is, will be processed by OpenAI later
+    const country = importRecord.countryCode;
+
+    // Fetch transcript content if URL is provided and not already fetched
+    let transcriptContent = importRecord.rawTranscriptContent;
+    if (!transcriptContent && importRecord.fullTranscriptUrl && isValidTranscriptUrl(importRecord.fullTranscriptUrl)) {
+      console.log(`[Import Processor] Fetching transcript for ${importRecord.externalSessionId}...`);
+      
+      // Get company credentials for transcript fetching
+      const company = await prisma.company.findUnique({
+        where: { id: importRecord.companyId },
+        select: { csvUsername: true, csvPassword: true },
+      });
+
+      const transcriptResult = await fetchTranscriptContent(
+        importRecord.fullTranscriptUrl,
+        company?.csvUsername || undefined,
+        company?.csvPassword || undefined
+      );
+
+      if (transcriptResult.success) {
+        transcriptContent = transcriptResult.content;
+        console.log(`[Import Processor] ✓ Fetched transcript for ${importRecord.externalSessionId} (${transcriptContent?.length} chars)`);
+        
+        // Update the import record with the fetched content
+        await prisma.sessionImport.update({
+          where: { id: importRecord.id },
+          data: { rawTranscriptContent: transcriptContent },
+        });
+      } else {
+        console.log(`[Import Processor] ⚠️ Failed to fetch transcript for ${importRecord.externalSessionId}: ${transcriptResult.error}`);
+      }
+    }
+
+    // Create or update Session record
+    const session = await prisma.session.upsert({
+      where: {
+        importId: importRecord.id,
+      },
+      update: {
+        startTime,
+        endTime,
+        ipAddress: importRecord.ipAddress,
+        country,
+        language: importRecord.language,
+        messagesSent: importRecord.messagesSent,
+        sentiment,
+        sentimentCategory,
+        escalated,
+        forwardedHr,
+        fullTranscriptUrl: importRecord.fullTranscriptUrl,
+        avgResponseTime: importRecord.avgResponseTimeSeconds,
+        tokens: importRecord.tokens,
+        tokensEur: importRecord.tokensEur,
+        category: importRecord.category,
+        initialMsg: importRecord.initialMessage,
+        processed: false, // Will be processed later by AI
+      },
+      create: {
+        companyId: importRecord.companyId,
+        importId: importRecord.id,
+        startTime,
+        endTime,
+        ipAddress: importRecord.ipAddress,
+        country,
+        language: importRecord.language,
+        messagesSent: importRecord.messagesSent,
+        sentiment,
+        sentimentCategory,
+        escalated,
+        forwardedHr,
+        fullTranscriptUrl: importRecord.fullTranscriptUrl,
+        avgResponseTime: importRecord.avgResponseTimeSeconds,
+        tokens: importRecord.tokens,
+        tokensEur: importRecord.tokensEur,
+        category: importRecord.category,
+        initialMsg: importRecord.initialMessage,
+        processed: false, // Will be processed later by AI
+      },
+    });
+
+    // Update import status to DONE
+    await prisma.sessionImport.update({
+      where: { id: importRecord.id },
+      data: {
+        status: ImportStatus.DONE,
+        processedAt: new Date(),
+        errorMsg: null,
+      },
+    });
+
+    return { success: true };
+  } catch (error) {
+    // Update import status to ERROR
+    await prisma.sessionImport.update({
+      where: { id: importRecord.id },
+      data: {
+        status: ImportStatus.ERROR,
+        errorMsg: error instanceof Error ? error.message : String(error),
+      },
+    });
+
+    return {
+      success: false,
+      error: error instanceof Error ? error.message : String(error),
+    };
+  }
+}
+
+/**
+ * Process queued SessionImport records into Session records
+ */
+export async function processQueuedImports(batchSize: number = 50): Promise<void> {
+  console.log('[Import Processor] Starting to process queued imports...');
+
+  // Find queued imports
+  const queuedImports = await prisma.sessionImport.findMany({
+    where: {
+      status: ImportStatus.QUEUED,
+    },
+    take: batchSize,
+    orderBy: {
+      createdAt: 'asc', // Process oldest first
+    },
+  });
+
+  if (queuedImports.length === 0) {
+    console.log('[Import Processor] No queued imports found');
+    return;
+  }
+
+  console.log(`[Import Processor] Processing ${queuedImports.length} queued imports...`);
+
+  let successCount = 0;
+  let errorCount = 0;
+
+  // Process each import
+  for (const importRecord of queuedImports) {
+    const result = await processSingleImport(importRecord);
+    
+    if (result.success) {
+      successCount++;
+      console.log(`[Import Processor] ✓ Processed import ${importRecord.externalSessionId}`);
+    } else {
+      errorCount++;
+      console.log(`[Import Processor] ✗ Failed to process import ${importRecord.externalSessionId}: ${result.error}`);
+    }
+  }
+
+  console.log(`[Import Processor] Completed: ${successCount} successful, ${errorCount} failed`);
+}
+
+/**
+ * Start the import processing scheduler
+ */
+export function startImportProcessingScheduler(): void {
+  const config = getSchedulerConfig();
+  
+  if (!config.enabled) {
+    console.log('[Import Processing Scheduler] Disabled via configuration');
+    return;
+  }
+
+  // Use a more frequent interval for import processing (every 5 minutes by default)
+  const interval = process.env.IMPORT_PROCESSING_INTERVAL || '*/5 * * * *';
+  const batchSize = parseInt(process.env.IMPORT_PROCESSING_BATCH_SIZE || '50', 10);
+
+  console.log(`[Import Processing Scheduler] Starting with interval: ${interval}`);
+  console.log(`[Import Processing Scheduler] Batch size: ${batchSize}`);
+
+  cron.schedule(interval, async () => {
+    try {
+      await processQueuedImports(batchSize);
+    } catch (error) {
+      console.error(`[Import Processing Scheduler] Error: ${error}`);
+    }
+  });
+}
--- a/lib/schedulerConfig.ts
+++ b/lib/schedulerConfig.ts
@@ -1,30 +1,7 @@
-// Unified scheduler configuration
-import { readFileSync } from "fs";
-import { fileURLToPath } from "url";
-import { dirname, join } from "path";
+// Legacy scheduler configuration - now uses centralized env management
+// This file is kept for backward compatibility but delegates to lib/env.ts

-// Load environment variables from .env.local
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = dirname(__filename);
-const envPath = join(__dirname, '..', '.env.local');
-
-// Load .env.local if it exists
-try {
-  const envFile = readFileSync(envPath, 'utf8');
-  const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
-
-  envVars.forEach(line => {
-    const [key, ...valueParts] = line.split('=');
-    if (key && valueParts.length > 0) {
-      const value = valueParts.join('=').trim();
-      if (!process.env[key.trim()]) {
-        process.env[key.trim()] = value;
-      }
-    }
-  });
-} catch (error) {
-  // Silently fail if .env.local doesn't exist
-}
+import { getSchedulerConfig as getEnvSchedulerConfig, logEnvConfig } from "./env";

 export interface SchedulerConfig {
  enabled: boolean;
@@ -40,43 +17,28 @@ export interface SchedulerConfig {

 /**
 * Get scheduler configuration from environment variables
+ * @deprecated Use getSchedulerConfig from lib/env.ts instead
 */
 export function getSchedulerConfig(): SchedulerConfig {
-  const enabled = process.env.SCHEDULER_ENABLED === 'true';
+  const config = getEnvSchedulerConfig();
  
-  // Default values
-  const defaults = {
-    csvImportInterval: '*/15 * * * *', // Every 15 minutes
-    sessionProcessingInterval: '0 * * * *', // Every hour
-    sessionProcessingBatchSize: 0, // Unlimited
-    sessionProcessingConcurrency: 5,
-  };
-
  return {
-    enabled,
+    enabled: config.enabled,
    csvImport: {
-      interval: process.env.CSV_IMPORT_INTERVAL || defaults.csvImportInterval,
+      interval: config.csvImport.interval,
    },
    sessionProcessing: {
-      interval: process.env.SESSION_PROCESSING_INTERVAL || defaults.sessionProcessingInterval,
-      batchSize: parseInt(process.env.SESSION_PROCESSING_BATCH_SIZE || '0', 10) || defaults.sessionProcessingBatchSize,
-      concurrency: parseInt(process.env.SESSION_PROCESSING_CONCURRENCY || '5', 10) || defaults.sessionProcessingConcurrency,
+      interval: config.sessionProcessing.interval,
+      batchSize: config.sessionProcessing.batchSize,
+      concurrency: config.sessionProcessing.concurrency,
    },
  };
 }

 /**
 * Log scheduler configuration
+ * @deprecated Use logEnvConfig from lib/env.ts instead
 */
 export function logSchedulerConfig(config: SchedulerConfig): void {
-  if (!config.enabled) {
-    console.log('[Scheduler] Schedulers are DISABLED (SCHEDULER_ENABLED=false)');
-    return;
-  }
-
-  console.log('[Scheduler] Configuration:');
-  console.log(`  CSV Import: ${config.csvImport.interval}`);
-  console.log(`  Session Processing: ${config.sessionProcessing.interval}`);
-  console.log(`  Batch Size: ${config.sessionProcessing.batchSize === 0 ? 'unlimited' : config.sessionProcessing.batchSize}`);
-  console.log(`  Concurrency: ${config.sessionProcessing.concurrency}`);
+  logEnvConfig();
 }
--- a/lib/transcriptFetcher.ts
+++ b/lib/transcriptFetcher.ts
@@ -0,0 +1,151 @@
+// Transcript fetching utility
+import fetch from "node-fetch";
+
+export interface TranscriptFetchResult {
+  success: boolean;
+  content?: string;
+  error?: string;
+}
+
+/**
+ * Fetch transcript content from a URL
+ * @param url The transcript URL
+ * @param username Optional username for authentication
+ * @param password Optional password for authentication
+ * @returns Promise with fetch result
+ */
+export async function fetchTranscriptContent(
+  url: string,
+  username?: string,
+  password?: string
+): Promise<TranscriptFetchResult> {
+  try {
+    if (!url || !url.trim()) {
+      return {
+        success: false,
+        error: 'No transcript URL provided',
+      };
+    }
+
+    // Prepare authentication header if credentials provided
+    const authHeader =
+      username && password
+        ? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
+        : undefined;
+
+    const headers: Record<string, string> = {
+      'User-Agent': 'LiveDash-Transcript-Fetcher/1.0',
+    };
+
+    if (authHeader) {
+      headers.Authorization = authHeader;
+    }
+
+    // Fetch the transcript with timeout
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), 30000); // 30 second timeout
+
+    const response = await fetch(url, {
+      method: 'GET',
+      headers,
+      signal: controller.signal,
+    });
+
+    clearTimeout(timeoutId);
+
+    if (!response.ok) {
+      return {
+        success: false,
+        error: `HTTP ${response.status}: ${response.statusText}`,
+      };
+    }
+
+    const content = await response.text();
+
+    if (!content || content.trim().length === 0) {
+      return {
+        success: false,
+        error: 'Empty transcript content',
+      };
+    }
+
+    return {
+      success: true,
+      content: content.trim(),
+    };
+
+  } catch (error) {
+    const errorMessage = error instanceof Error ? error.message : String(error);
+    
+    // Handle common network errors
+    if (errorMessage.includes('ENOTFOUND')) {
+      return {
+        success: false,
+        error: 'Domain not found',
+      };
+    }
+    
+    if (errorMessage.includes('ECONNREFUSED')) {
+      return {
+        success: false,
+        error: 'Connection refused',
+      };
+    }
+    
+    if (errorMessage.includes('timeout')) {
+      return {
+        success: false,
+        error: 'Request timeout',
+      };
+    }
+
+    return {
+      success: false,
+      error: errorMessage,
+    };
+  }
+}
+
+/**
+ * Validate if a URL looks like a valid transcript URL
+ * @param url The URL to validate
+ * @returns boolean indicating if URL appears valid
+ */
+export function isValidTranscriptUrl(url: string): boolean {
+  if (!url || typeof url !== 'string') {
+    return false;
+  }
+
+  try {
+    const parsedUrl = new URL(url);
+    return parsedUrl.protocol === 'http:' || parsedUrl.protocol === 'https:';
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Extract session ID from transcript content if possible
+ * This is a helper function that can be enhanced based on transcript format
+ * @param content The transcript content
+ * @returns Extracted session ID or null
+ */
+export function extractSessionIdFromTranscript(content: string): string | null {
+  if (!content) return null;
+
+  // Look for common session ID patterns
+  const patterns = [
+    /session[_-]?id[:\s]*([a-zA-Z0-9-]+)/i,
+    /id[:\s]*([a-zA-Z0-9-]{8,})/i,
+    /^([a-zA-Z0-9-]{8,})/m, // First line might be session ID
+  ];
+
+  for (const pattern of patterns) {
+    const match = content.match(pattern);
+    if (match && match[1]) {
+      return match[1].trim();
+    }
+  }
+
+  return null;
+}