mirror of
https://github.com/kjanat/livedash-node.git
synced 2026-01-16 13:32:08 +01:00
- Convert ProcessingStatusManager from static class to individual functions - Refactor processSingleImport function to reduce cognitive complexity - Fix unused parameters in database-pool.ts event handlers - Add missing DATABASE_URL configuration to env.ts - Add pg package and @types/pg dependencies for PostgreSQL support - Fix tsx command execution by updating package.json scripts to use pnpm exec - Apply biome formatting fixes for import organization
503 lines
14 KiB
TypeScript
503 lines
14 KiB
TypeScript
// SessionImport to Session processor
|
|
import { ProcessingStage, SentimentCategory } from "@prisma/client";
|
|
import cron from "node-cron";
|
|
import { withRetry } from "./database-retry.js";
|
|
import { getSchedulerConfig } from "./env";
|
|
import { prisma } from "./prisma.js";
|
|
import {
|
|
completeStage,
|
|
failStage,
|
|
initializeSession,
|
|
skipStage,
|
|
startStage,
|
|
} from "./processingStatusManager.js";
|
|
import {
|
|
fetchTranscriptContent,
|
|
isValidTranscriptUrl,
|
|
} from "./transcriptFetcher";
|
|
|
|
interface ImportRecord {
|
|
id: string;
|
|
companyId: string;
|
|
startTimeRaw: string;
|
|
endTimeRaw: string;
|
|
externalSessionId: string;
|
|
sessionId?: string;
|
|
userId?: string;
|
|
category?: string;
|
|
language?: string;
|
|
sentiment?: string;
|
|
escalated?: boolean;
|
|
forwardedHr?: boolean;
|
|
avgResponseTime?: number;
|
|
messagesSent?: number;
|
|
fullTranscriptUrl?: string;
|
|
rawTranscriptContent?: string;
|
|
aiSummary?: string;
|
|
initialMsg?: string;
|
|
}
|
|
|
|
/**
|
|
* Parse European date format (DD.MM.YYYY HH:mm:ss) to JavaScript Date
|
|
*/
|
|
function parseEuropeanDate(dateStr: string): Date {
|
|
if (!dateStr || typeof dateStr !== "string") {
|
|
throw new Error(`Invalid date string: ${dateStr}`);
|
|
}
|
|
|
|
// Handle format: "DD.MM.YYYY HH:mm:ss"
|
|
const [datePart, timePart] = dateStr.trim().split(" ");
|
|
|
|
if (!datePart || !timePart) {
|
|
throw new Error(
|
|
`Invalid date format: ${dateStr}. Expected format: DD.MM.YYYY HH:mm:ss`
|
|
);
|
|
}
|
|
|
|
const [day, month, year] = datePart.split(".");
|
|
|
|
if (!day || !month || !year) {
|
|
throw new Error(
|
|
`Invalid date part: ${datePart}. Expected format: DD.MM.YYYY`
|
|
);
|
|
}
|
|
|
|
// Convert to ISO format: YYYY-MM-DD HH:mm:ss
|
|
const isoDateStr = `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")} ${timePart}`;
|
|
const date = new Date(isoDateStr);
|
|
|
|
if (Number.isNaN(date.getTime())) {
|
|
throw new Error(`Failed to parse date: ${dateStr} -> ${isoDateStr}`);
|
|
}
|
|
|
|
return date;
|
|
}
|
|
|
|
/**
|
|
* Helper function to parse sentiment from raw string (fallback only)
|
|
*/
|
|
function _parseFallbackSentiment(
|
|
sentimentRaw: string | null
|
|
): SentimentCategory | null {
|
|
if (!sentimentRaw) return null;
|
|
|
|
const sentimentStr = sentimentRaw.toLowerCase();
|
|
if (sentimentStr.includes("positive")) {
|
|
return SentimentCategory.POSITIVE;
|
|
}
|
|
if (sentimentStr.includes("negative")) {
|
|
return SentimentCategory.NEGATIVE;
|
|
}
|
|
return SentimentCategory.NEUTRAL;
|
|
}
|
|
|
|
/**
|
|
* Helper function to parse boolean from raw string (fallback only)
|
|
*/
|
|
function _parseFallbackBoolean(rawValue: string | null): boolean | null {
|
|
if (!rawValue) return null;
|
|
return ["true", "1", "yes", "escalated", "forwarded"].includes(
|
|
rawValue.toLowerCase()
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Parse transcript content into Message records
|
|
*/
|
|
async function parseTranscriptIntoMessages(
|
|
sessionId: string,
|
|
transcriptContent: string
|
|
): Promise<void> {
|
|
// Clear existing messages for this session
|
|
await prisma.message.deleteMany({
|
|
where: { sessionId },
|
|
});
|
|
|
|
// Split transcript into lines and parse each message
|
|
const lines = transcriptContent.split("\n").filter((line) => line.trim());
|
|
let order = 0;
|
|
|
|
for (const line of lines) {
|
|
const trimmedLine = line.trim();
|
|
if (!trimmedLine) continue;
|
|
|
|
// Try to parse different formats:
|
|
// Format 1: "User: message" or "Assistant: message"
|
|
// Format 2: "[timestamp] User: message" or "[timestamp] Assistant: message"
|
|
|
|
let role = "unknown";
|
|
let content = trimmedLine;
|
|
let timestamp: Date | null = null;
|
|
|
|
// Check for timestamp format: [DD.MM.YYYY HH:mm:ss] Role: content
|
|
const timestampMatch = trimmedLine.match(/^\[([^\]]+)\]\s*(.+)$/);
|
|
if (timestampMatch) {
|
|
try {
|
|
timestamp = parseEuropeanDate(timestampMatch[1]);
|
|
content = timestampMatch[2];
|
|
} catch (_error) {
|
|
// If timestamp parsing fails, treat the whole line as content
|
|
content = trimmedLine;
|
|
}
|
|
}
|
|
|
|
// Extract role and message content
|
|
const roleMatch = content.match(/^(User|Assistant|System):\s*(.*)$/i);
|
|
if (roleMatch) {
|
|
role = roleMatch[1].toLowerCase();
|
|
content = roleMatch[2].trim();
|
|
} else {
|
|
// If no role prefix found, try to infer from context or use 'unknown'
|
|
role = "unknown";
|
|
}
|
|
|
|
// Skip empty content
|
|
if (!content) continue;
|
|
|
|
// Create message record
|
|
await prisma.message.create({
|
|
data: {
|
|
sessionId,
|
|
timestamp,
|
|
role,
|
|
content,
|
|
order,
|
|
},
|
|
});
|
|
|
|
order++;
|
|
}
|
|
|
|
console.log(
|
|
`[Import Processor] ✓ Parsed ${order} messages for session ${sessionId}`
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Create or update a Session record from ImportRecord
|
|
*/
|
|
async function createSession(importRecord: ImportRecord): Promise<string> {
|
|
const startTime = parseEuropeanDate(importRecord.startTimeRaw);
|
|
const endTime = parseEuropeanDate(importRecord.endTimeRaw);
|
|
|
|
console.log(
|
|
`[Import Processor] Processing ${importRecord.externalSessionId}: ${startTime.toISOString()} - ${endTime.toISOString()}`
|
|
);
|
|
|
|
const session = await prisma.session.upsert({
|
|
where: {
|
|
importId: importRecord.id,
|
|
},
|
|
update: {
|
|
startTime,
|
|
endTime,
|
|
ipAddress: importRecord.ipAddress,
|
|
country: importRecord.countryCode,
|
|
fullTranscriptUrl: importRecord.fullTranscriptUrl,
|
|
avgResponseTime: importRecord.avgResponseTimeSeconds,
|
|
initialMsg: importRecord.initialMessage,
|
|
},
|
|
create: {
|
|
companyId: importRecord.companyId,
|
|
importId: importRecord.id,
|
|
startTime,
|
|
endTime,
|
|
ipAddress: importRecord.ipAddress,
|
|
country: importRecord.countryCode,
|
|
fullTranscriptUrl: importRecord.fullTranscriptUrl,
|
|
avgResponseTime: importRecord.avgResponseTimeSeconds,
|
|
initialMsg: importRecord.initialMessage,
|
|
},
|
|
});
|
|
|
|
return session.id;
|
|
}
|
|
|
|
/**
|
|
* Handle transcript fetching for a session
|
|
*/
|
|
async function handleTranscriptFetching(
|
|
sessionId: string,
|
|
importRecord: ImportRecord
|
|
): Promise<string | null> {
|
|
let transcriptContent = importRecord.rawTranscriptContent;
|
|
|
|
if (
|
|
!transcriptContent &&
|
|
importRecord.fullTranscriptUrl &&
|
|
isValidTranscriptUrl(importRecord.fullTranscriptUrl)
|
|
) {
|
|
await startStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH);
|
|
|
|
console.log(
|
|
`[Import Processor] Fetching transcript for ${importRecord.externalSessionId}...`
|
|
);
|
|
|
|
const company = await prisma.company.findUnique({
|
|
where: { id: importRecord.companyId },
|
|
select: { csvUsername: true, csvPassword: true },
|
|
});
|
|
|
|
const transcriptResult = await fetchTranscriptContent(
|
|
importRecord.fullTranscriptUrl,
|
|
company?.csvUsername || undefined,
|
|
company?.csvPassword || undefined
|
|
);
|
|
|
|
if (transcriptResult.success) {
|
|
transcriptContent = transcriptResult.content;
|
|
console.log(
|
|
`[Import Processor] ✓ Fetched transcript for ${importRecord.externalSessionId} (${transcriptContent?.length} chars)`
|
|
);
|
|
|
|
await prisma.sessionImport.update({
|
|
where: { id: importRecord.id },
|
|
data: { rawTranscriptContent: transcriptContent },
|
|
});
|
|
|
|
await completeStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, {
|
|
contentLength: transcriptContent?.length || 0,
|
|
url: importRecord.fullTranscriptUrl,
|
|
});
|
|
} else {
|
|
console.log(
|
|
`[Import Processor] ⚠️ Failed to fetch transcript for ${importRecord.externalSessionId}: ${transcriptResult.error}`
|
|
);
|
|
await failStage(
|
|
sessionId,
|
|
ProcessingStage.TRANSCRIPT_FETCH,
|
|
transcriptResult.error || "Unknown error"
|
|
);
|
|
}
|
|
} else if (!importRecord.fullTranscriptUrl) {
|
|
await skipStage(
|
|
sessionId,
|
|
ProcessingStage.TRANSCRIPT_FETCH,
|
|
"No transcript URL provided"
|
|
);
|
|
} else {
|
|
await completeStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, {
|
|
contentLength: transcriptContent?.length || 0,
|
|
source: "already_fetched",
|
|
});
|
|
}
|
|
|
|
return transcriptContent;
|
|
}
|
|
|
|
/**
|
|
* Handle session creation (message parsing)
|
|
*/
|
|
async function handleSessionCreation(
|
|
sessionId: string,
|
|
transcriptContent: string | null
|
|
): Promise<void> {
|
|
await startStage(sessionId, ProcessingStage.SESSION_CREATION);
|
|
|
|
if (transcriptContent) {
|
|
await parseTranscriptIntoMessages(sessionId, transcriptContent);
|
|
}
|
|
|
|
await completeStage(sessionId, ProcessingStage.SESSION_CREATION, {
|
|
hasTranscript: !!transcriptContent,
|
|
transcriptLength: transcriptContent?.length || 0,
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Handle errors and mark appropriate stage as failed
|
|
*/
|
|
async function handleProcessingError(
|
|
sessionId: string | null,
|
|
error: unknown
|
|
): Promise<void> {
|
|
if (!sessionId) return;
|
|
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
|
|
if (errorMessage.includes("transcript") || errorMessage.includes("fetch")) {
|
|
await failStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, errorMessage);
|
|
} else if (
|
|
errorMessage.includes("message") ||
|
|
errorMessage.includes("parse")
|
|
) {
|
|
await failStage(sessionId, ProcessingStage.SESSION_CREATION, errorMessage);
|
|
} else {
|
|
await failStage(sessionId, ProcessingStage.CSV_IMPORT, errorMessage);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process a single SessionImport record into a Session record
|
|
* Uses new unified processing status tracking
|
|
*/
|
|
async function processSingleImport(
|
|
importRecord: ImportRecord
|
|
): Promise<{ success: boolean; error?: string }> {
|
|
let sessionId: string | null = null;
|
|
|
|
try {
|
|
sessionId = await createSession(importRecord);
|
|
await initializeSession(sessionId);
|
|
await completeStage(sessionId, ProcessingStage.CSV_IMPORT);
|
|
|
|
const transcriptContent = await handleTranscriptFetching(
|
|
sessionId,
|
|
importRecord
|
|
);
|
|
await handleSessionCreation(sessionId, transcriptContent);
|
|
|
|
return { success: true };
|
|
} catch (error) {
|
|
await handleProcessingError(sessionId, error);
|
|
return {
|
|
success: false,
|
|
error: error instanceof Error ? error.message : String(error),
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process unprocessed SessionImport records into Session records
|
|
* Uses new processing status system to find imports that need processing
|
|
*/
|
|
export async function processQueuedImports(batchSize = 50): Promise<void> {
|
|
console.log("[Import Processor] Starting to process unprocessed imports...");
|
|
|
|
try {
|
|
await withRetry(
|
|
async () => {
|
|
await processQueuedImportsInternal(batchSize);
|
|
},
|
|
{
|
|
maxRetries: 3,
|
|
initialDelay: 2000,
|
|
maxDelay: 10000,
|
|
backoffMultiplier: 2,
|
|
},
|
|
"processQueuedImports"
|
|
);
|
|
} catch (error) {
|
|
console.error("[Import Processor] Failed after all retries:", error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async function processQueuedImportsInternal(batchSize = 50): Promise<void> {
|
|
let totalSuccessCount = 0;
|
|
let totalErrorCount = 0;
|
|
let batchNumber = 1;
|
|
|
|
while (true) {
|
|
// Find SessionImports that don't have a corresponding Session yet
|
|
const unprocessedImports = await prisma.sessionImport.findMany({
|
|
where: {
|
|
session: null, // No session created yet
|
|
company: {
|
|
status: "ACTIVE", // Only process imports from active companies
|
|
},
|
|
},
|
|
take: batchSize,
|
|
orderBy: {
|
|
createdAt: "asc", // Process oldest first
|
|
},
|
|
});
|
|
|
|
if (unprocessedImports.length === 0) {
|
|
if (batchNumber === 1) {
|
|
console.log("[Import Processor] No unprocessed imports found");
|
|
} else {
|
|
console.log(
|
|
`[Import Processor] All batches completed. Total: ${totalSuccessCount} successful, ${totalErrorCount} failed`
|
|
);
|
|
}
|
|
return;
|
|
}
|
|
|
|
console.log(
|
|
`[Import Processor] Processing batch ${batchNumber}: ${unprocessedImports.length} imports...`
|
|
);
|
|
|
|
let batchSuccessCount = 0;
|
|
let batchErrorCount = 0;
|
|
|
|
// Process imports in parallel batches for better performance
|
|
const batchPromises = unprocessedImports.map(async (importRecord) => {
|
|
const result = await processSingleImport(importRecord);
|
|
return { importRecord, result };
|
|
});
|
|
|
|
// Process with concurrency limit to avoid overwhelming the database
|
|
const concurrencyLimit = 5;
|
|
const results = [];
|
|
|
|
for (let i = 0; i < batchPromises.length; i += concurrencyLimit) {
|
|
const chunk = batchPromises.slice(i, i + concurrencyLimit);
|
|
const chunkResults = await Promise.all(chunk);
|
|
results.push(...chunkResults);
|
|
}
|
|
|
|
// Process results
|
|
for (const { importRecord, result } of results) {
|
|
if (result.success) {
|
|
batchSuccessCount++;
|
|
totalSuccessCount++;
|
|
console.log(
|
|
`[Import Processor] ✓ Processed import ${importRecord.externalSessionId}`
|
|
);
|
|
} else {
|
|
batchErrorCount++;
|
|
totalErrorCount++;
|
|
console.log(
|
|
`[Import Processor] ✗ Failed to process import ${importRecord.externalSessionId}: ${result.error}`
|
|
);
|
|
}
|
|
}
|
|
|
|
console.log(
|
|
`[Import Processor] Batch ${batchNumber} completed: ${batchSuccessCount} successful, ${batchErrorCount} failed`
|
|
);
|
|
batchNumber++;
|
|
|
|
// If this batch was smaller than the batch size, we're done
|
|
if (unprocessedImports.length < batchSize) {
|
|
console.log(
|
|
`[Import Processor] All batches completed. Total: ${totalSuccessCount} successful, ${totalErrorCount} failed`
|
|
);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Start the import processing scheduler
|
|
*/
|
|
export function startImportProcessingScheduler(): void {
|
|
const config = getSchedulerConfig();
|
|
|
|
if (!config.enabled) {
|
|
console.log("[Import Processing Scheduler] Disabled via configuration");
|
|
return;
|
|
}
|
|
|
|
// Use a more frequent interval for import processing (every 5 minutes by default)
|
|
const interval = process.env.IMPORT_PROCESSING_INTERVAL || "*/5 * * * *";
|
|
const batchSize = Number.parseInt(
|
|
process.env.IMPORT_PROCESSING_BATCH_SIZE || "50",
|
|
10
|
|
);
|
|
|
|
console.log(
|
|
`[Import Processing Scheduler] Starting with interval: ${interval}`
|
|
);
|
|
console.log(`[Import Processing Scheduler] Batch size: ${batchSize}`);
|
|
|
|
cron.schedule(interval, async () => {
|
|
try {
|
|
await processQueuedImports(batchSize);
|
|
} catch (error) {
|
|
console.error(`[Import Processing Scheduler] Error: ${error}`);
|
|
}
|
|
});
|
|
}
|