feat: add rawTranscriptContent field to SessionImport model

feat: enhance server initialization with environment validation and import processing scheduler

test: add Jest setup for unit tests and mock console methods

test: implement unit tests for environment management and validation

test: create unit tests for transcript fetcher functionality
This commit is contained in:
Max Kowalski
2025-06-27 19:00:22 +02:00
parent 50b230aa9b
commit 5c1ced5900
25 changed files with 3492 additions and 82 deletions

111
lib/env.ts Normal file
View File

@ -0,0 +1,111 @@
// Centralized environment variable management
import { readFileSync } from "fs";
import { fileURLToPath } from "url";
import { dirname, join } from "path";
// Load environment variables from .env.local
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const envPath = join(__dirname, '..', '.env.local');
// Load .env.local if it exists
try {
const envFile = readFileSync(envPath, 'utf8');
const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
envVars.forEach(line => {
const [key, ...valueParts] = line.split('=');
if (key && valueParts.length > 0) {
const value = valueParts.join('=').trim();
if (!process.env[key.trim()]) {
process.env[key.trim()] = value;
}
}
});
} catch (error) {
// Silently fail if .env.local doesn't exist
}
/**
* Typed environment variables with defaults
*/
export const env = {
// NextAuth
NEXTAUTH_URL: process.env.NEXTAUTH_URL || 'http://localhost:3000',
NEXTAUTH_SECRET: process.env.NEXTAUTH_SECRET || '',
NODE_ENV: process.env.NODE_ENV || 'development',
// OpenAI
OPENAI_API_KEY: process.env.OPENAI_API_KEY || '',
// Scheduler Configuration
SCHEDULER_ENABLED: process.env.SCHEDULER_ENABLED === 'true',
CSV_IMPORT_INTERVAL: process.env.CSV_IMPORT_INTERVAL || '*/15 * * * *',
IMPORT_PROCESSING_INTERVAL: process.env.IMPORT_PROCESSING_INTERVAL || '*/5 * * * *',
IMPORT_PROCESSING_BATCH_SIZE: parseInt(process.env.IMPORT_PROCESSING_BATCH_SIZE || '50', 10),
SESSION_PROCESSING_INTERVAL: process.env.SESSION_PROCESSING_INTERVAL || '0 * * * *',
SESSION_PROCESSING_BATCH_SIZE: parseInt(process.env.SESSION_PROCESSING_BATCH_SIZE || '0', 10),
SESSION_PROCESSING_CONCURRENCY: parseInt(process.env.SESSION_PROCESSING_CONCURRENCY || '5', 10),
// Server
PORT: parseInt(process.env.PORT || '3000', 10),
} as const;
/**
* Validate required environment variables
*/
export function validateEnv(): { valid: boolean; errors: string[] } {
const errors: string[] = [];
if (!env.NEXTAUTH_SECRET) {
errors.push('NEXTAUTH_SECRET is required');
}
if (!env.OPENAI_API_KEY && env.NODE_ENV === 'production') {
errors.push('OPENAI_API_KEY is required in production');
}
return {
valid: errors.length === 0,
errors,
};
}
/**
* Get scheduler configuration from environment variables
*/
export function getSchedulerConfig() {
return {
enabled: env.SCHEDULER_ENABLED,
csvImport: {
interval: env.CSV_IMPORT_INTERVAL,
},
importProcessing: {
interval: env.IMPORT_PROCESSING_INTERVAL,
batchSize: env.IMPORT_PROCESSING_BATCH_SIZE,
},
sessionProcessing: {
interval: env.SESSION_PROCESSING_INTERVAL,
batchSize: env.SESSION_PROCESSING_BATCH_SIZE,
concurrency: env.SESSION_PROCESSING_CONCURRENCY,
},
};
}
/**
* Log environment configuration (safe for production)
*/
export function logEnvConfig(): void {
console.log('[Environment] Configuration:');
console.log(` NODE_ENV: ${env.NODE_ENV}`);
console.log(` NEXTAUTH_URL: ${env.NEXTAUTH_URL}`);
console.log(` SCHEDULER_ENABLED: ${env.SCHEDULER_ENABLED}`);
console.log(` PORT: ${env.PORT}`);
if (env.SCHEDULER_ENABLED) {
console.log(' Scheduler intervals:');
console.log(` CSV Import: ${env.CSV_IMPORT_INTERVAL}`);
console.log(` Import Processing: ${env.IMPORT_PROCESSING_INTERVAL}`);
console.log(` Session Processing: ${env.SESSION_PROCESSING_INTERVAL}`);
}
}

225
lib/importProcessor.ts Normal file
View File

@ -0,0 +1,225 @@
// SessionImport to Session processor
import { PrismaClient, ImportStatus, SentimentCategory } from "@prisma/client";
import { getSchedulerConfig } from "./env";
import { fetchTranscriptContent, isValidTranscriptUrl } from "./transcriptFetcher";
import cron from "node-cron";
const prisma = new PrismaClient();
/**
* Process a single SessionImport record into a Session record
*/
async function processSingleImport(importRecord: any): Promise<{ success: boolean; error?: string }> {
try {
// Parse dates
const startTime = new Date(importRecord.startTimeRaw);
const endTime = new Date(importRecord.endTimeRaw);
// Validate dates
if (isNaN(startTime.getTime()) || isNaN(endTime.getTime())) {
throw new Error(`Invalid date format: start=${importRecord.startTimeRaw}, end=${importRecord.endTimeRaw}`);
}
// Process sentiment
let sentiment: number | null = null;
let sentimentCategory: SentimentCategory | null = null;
if (importRecord.sentimentRaw) {
const sentimentStr = importRecord.sentimentRaw.toLowerCase();
if (sentimentStr.includes('positive')) {
sentiment = 0.8;
sentimentCategory = SentimentCategory.POSITIVE;
} else if (sentimentStr.includes('negative')) {
sentiment = -0.8;
sentimentCategory = SentimentCategory.NEGATIVE;
} else {
sentiment = 0.0;
sentimentCategory = SentimentCategory.NEUTRAL;
}
}
// Process boolean fields
const escalated = importRecord.escalatedRaw ?
['true', '1', 'yes', 'escalated'].includes(importRecord.escalatedRaw.toLowerCase()) : null;
const forwardedHr = importRecord.forwardedHrRaw ?
['true', '1', 'yes', 'forwarded'].includes(importRecord.forwardedHrRaw.toLowerCase()) : null;
// Keep country code as-is, will be processed by OpenAI later
const country = importRecord.countryCode;
// Fetch transcript content if URL is provided and not already fetched
let transcriptContent = importRecord.rawTranscriptContent;
if (!transcriptContent && importRecord.fullTranscriptUrl && isValidTranscriptUrl(importRecord.fullTranscriptUrl)) {
console.log(`[Import Processor] Fetching transcript for ${importRecord.externalSessionId}...`);
// Get company credentials for transcript fetching
const company = await prisma.company.findUnique({
where: { id: importRecord.companyId },
select: { csvUsername: true, csvPassword: true },
});
const transcriptResult = await fetchTranscriptContent(
importRecord.fullTranscriptUrl,
company?.csvUsername || undefined,
company?.csvPassword || undefined
);
if (transcriptResult.success) {
transcriptContent = transcriptResult.content;
console.log(`[Import Processor] ✓ Fetched transcript for ${importRecord.externalSessionId} (${transcriptContent?.length} chars)`);
// Update the import record with the fetched content
await prisma.sessionImport.update({
where: { id: importRecord.id },
data: { rawTranscriptContent: transcriptContent },
});
} else {
console.log(`[Import Processor] ⚠️ Failed to fetch transcript for ${importRecord.externalSessionId}: ${transcriptResult.error}`);
}
}
// Create or update Session record
const session = await prisma.session.upsert({
where: {
importId: importRecord.id,
},
update: {
startTime,
endTime,
ipAddress: importRecord.ipAddress,
country,
language: importRecord.language,
messagesSent: importRecord.messagesSent,
sentiment,
sentimentCategory,
escalated,
forwardedHr,
fullTranscriptUrl: importRecord.fullTranscriptUrl,
avgResponseTime: importRecord.avgResponseTimeSeconds,
tokens: importRecord.tokens,
tokensEur: importRecord.tokensEur,
category: importRecord.category,
initialMsg: importRecord.initialMessage,
processed: false, // Will be processed later by AI
},
create: {
companyId: importRecord.companyId,
importId: importRecord.id,
startTime,
endTime,
ipAddress: importRecord.ipAddress,
country,
language: importRecord.language,
messagesSent: importRecord.messagesSent,
sentiment,
sentimentCategory,
escalated,
forwardedHr,
fullTranscriptUrl: importRecord.fullTranscriptUrl,
avgResponseTime: importRecord.avgResponseTimeSeconds,
tokens: importRecord.tokens,
tokensEur: importRecord.tokensEur,
category: importRecord.category,
initialMsg: importRecord.initialMessage,
processed: false, // Will be processed later by AI
},
});
// Update import status to DONE
await prisma.sessionImport.update({
where: { id: importRecord.id },
data: {
status: ImportStatus.DONE,
processedAt: new Date(),
errorMsg: null,
},
});
return { success: true };
} catch (error) {
// Update import status to ERROR
await prisma.sessionImport.update({
where: { id: importRecord.id },
data: {
status: ImportStatus.ERROR,
errorMsg: error instanceof Error ? error.message : String(error),
},
});
return {
success: false,
error: error instanceof Error ? error.message : String(error),
};
}
}
/**
* Process queued SessionImport records into Session records
*/
export async function processQueuedImports(batchSize: number = 50): Promise<void> {
console.log('[Import Processor] Starting to process queued imports...');
// Find queued imports
const queuedImports = await prisma.sessionImport.findMany({
where: {
status: ImportStatus.QUEUED,
},
take: batchSize,
orderBy: {
createdAt: 'asc', // Process oldest first
},
});
if (queuedImports.length === 0) {
console.log('[Import Processor] No queued imports found');
return;
}
console.log(`[Import Processor] Processing ${queuedImports.length} queued imports...`);
let successCount = 0;
let errorCount = 0;
// Process each import
for (const importRecord of queuedImports) {
const result = await processSingleImport(importRecord);
if (result.success) {
successCount++;
console.log(`[Import Processor] ✓ Processed import ${importRecord.externalSessionId}`);
} else {
errorCount++;
console.log(`[Import Processor] ✗ Failed to process import ${importRecord.externalSessionId}: ${result.error}`);
}
}
console.log(`[Import Processor] Completed: ${successCount} successful, ${errorCount} failed`);
}
/**
* Start the import processing scheduler
*/
export function startImportProcessingScheduler(): void {
const config = getSchedulerConfig();
if (!config.enabled) {
console.log('[Import Processing Scheduler] Disabled via configuration');
return;
}
// Use a more frequent interval for import processing (every 5 minutes by default)
const interval = process.env.IMPORT_PROCESSING_INTERVAL || '*/5 * * * *';
const batchSize = parseInt(process.env.IMPORT_PROCESSING_BATCH_SIZE || '50', 10);
console.log(`[Import Processing Scheduler] Starting with interval: ${interval}`);
console.log(`[Import Processing Scheduler] Batch size: ${batchSize}`);
cron.schedule(interval, async () => {
try {
await processQueuedImports(batchSize);
} catch (error) {
console.error(`[Import Processing Scheduler] Error: ${error}`);
}
});
}

View File

@ -1,30 +1,7 @@
// Unified scheduler configuration
import { readFileSync } from "fs";
import { fileURLToPath } from "url";
import { dirname, join } from "path";
// Legacy scheduler configuration - now uses centralized env management
// This file is kept for backward compatibility but delegates to lib/env.ts
// Load environment variables from .env.local
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const envPath = join(__dirname, '..', '.env.local');
// Load .env.local if it exists
try {
const envFile = readFileSync(envPath, 'utf8');
const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
envVars.forEach(line => {
const [key, ...valueParts] = line.split('=');
if (key && valueParts.length > 0) {
const value = valueParts.join('=').trim();
if (!process.env[key.trim()]) {
process.env[key.trim()] = value;
}
}
});
} catch (error) {
// Silently fail if .env.local doesn't exist
}
import { getSchedulerConfig as getEnvSchedulerConfig, logEnvConfig } from "./env";
export interface SchedulerConfig {
enabled: boolean;
@ -40,43 +17,28 @@ export interface SchedulerConfig {
/**
* Get scheduler configuration from environment variables
* @deprecated Use getSchedulerConfig from lib/env.ts instead
*/
export function getSchedulerConfig(): SchedulerConfig {
const enabled = process.env.SCHEDULER_ENABLED === 'true';
const config = getEnvSchedulerConfig();
// Default values
const defaults = {
csvImportInterval: '*/15 * * * *', // Every 15 minutes
sessionProcessingInterval: '0 * * * *', // Every hour
sessionProcessingBatchSize: 0, // Unlimited
sessionProcessingConcurrency: 5,
};
return {
enabled,
enabled: config.enabled,
csvImport: {
interval: process.env.CSV_IMPORT_INTERVAL || defaults.csvImportInterval,
interval: config.csvImport.interval,
},
sessionProcessing: {
interval: process.env.SESSION_PROCESSING_INTERVAL || defaults.sessionProcessingInterval,
batchSize: parseInt(process.env.SESSION_PROCESSING_BATCH_SIZE || '0', 10) || defaults.sessionProcessingBatchSize,
concurrency: parseInt(process.env.SESSION_PROCESSING_CONCURRENCY || '5', 10) || defaults.sessionProcessingConcurrency,
interval: config.sessionProcessing.interval,
batchSize: config.sessionProcessing.batchSize,
concurrency: config.sessionProcessing.concurrency,
},
};
}
/**
* Log scheduler configuration
* @deprecated Use logEnvConfig from lib/env.ts instead
*/
export function logSchedulerConfig(config: SchedulerConfig): void {
if (!config.enabled) {
console.log('[Scheduler] Schedulers are DISABLED (SCHEDULER_ENABLED=false)');
return;
}
console.log('[Scheduler] Configuration:');
console.log(` CSV Import: ${config.csvImport.interval}`);
console.log(` Session Processing: ${config.sessionProcessing.interval}`);
console.log(` Batch Size: ${config.sessionProcessing.batchSize === 0 ? 'unlimited' : config.sessionProcessing.batchSize}`);
console.log(` Concurrency: ${config.sessionProcessing.concurrency}`);
logEnvConfig();
}

151
lib/transcriptFetcher.ts Normal file
View File

@ -0,0 +1,151 @@
// Transcript fetching utility
import fetch from "node-fetch";
export interface TranscriptFetchResult {
success: boolean;
content?: string;
error?: string;
}
/**
* Fetch transcript content from a URL
* @param url The transcript URL
* @param username Optional username for authentication
* @param password Optional password for authentication
* @returns Promise with fetch result
*/
export async function fetchTranscriptContent(
url: string,
username?: string,
password?: string
): Promise<TranscriptFetchResult> {
try {
if (!url || !url.trim()) {
return {
success: false,
error: 'No transcript URL provided',
};
}
// Prepare authentication header if credentials provided
const authHeader =
username && password
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
: undefined;
const headers: Record<string, string> = {
'User-Agent': 'LiveDash-Transcript-Fetcher/1.0',
};
if (authHeader) {
headers.Authorization = authHeader;
}
// Fetch the transcript with timeout
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 30000); // 30 second timeout
const response = await fetch(url, {
method: 'GET',
headers,
signal: controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
return {
success: false,
error: `HTTP ${response.status}: ${response.statusText}`,
};
}
const content = await response.text();
if (!content || content.trim().length === 0) {
return {
success: false,
error: 'Empty transcript content',
};
}
return {
success: true,
content: content.trim(),
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
// Handle common network errors
if (errorMessage.includes('ENOTFOUND')) {
return {
success: false,
error: 'Domain not found',
};
}
if (errorMessage.includes('ECONNREFUSED')) {
return {
success: false,
error: 'Connection refused',
};
}
if (errorMessage.includes('timeout')) {
return {
success: false,
error: 'Request timeout',
};
}
return {
success: false,
error: errorMessage,
};
}
}
/**
* Validate if a URL looks like a valid transcript URL
* @param url The URL to validate
* @returns boolean indicating if URL appears valid
*/
export function isValidTranscriptUrl(url: string): boolean {
if (!url || typeof url !== 'string') {
return false;
}
try {
const parsedUrl = new URL(url);
return parsedUrl.protocol === 'http:' || parsedUrl.protocol === 'https:';
} catch {
return false;
}
}
/**
* Extract session ID from transcript content if possible
* This is a helper function that can be enhanced based on transcript format
* @param content The transcript content
* @returns Extracted session ID or null
*/
export function extractSessionIdFromTranscript(content: string): string | null {
if (!content) return null;
// Look for common session ID patterns
const patterns = [
/session[_-]?id[:\s]*([a-zA-Z0-9-]+)/i,
/id[:\s]*([a-zA-Z0-9-]{8,})/i,
/^([a-zA-Z0-9-]{8,})/m, // First line might be session ID
];
for (const pattern of patterns) {
const match = content.match(pattern);
if (match && match[1]) {
return match[1].trim();
}
}
return null;
}