DB refactor

This commit is contained in:
Max Kowalski
2025-06-27 23:05:46 +02:00
parent 185bb6da58
commit 2dfc49f840
20 changed files with 1607 additions and 339 deletions

View File

@ -1,7 +1,8 @@
// SessionImport to Session processor
import { PrismaClient, ImportStatus, SentimentCategory, SessionCategory } from "@prisma/client";
import { PrismaClient, SentimentCategory, SessionCategory, ProcessingStage } from "@prisma/client";
import { getSchedulerConfig } from "./env";
import { fetchTranscriptContent, isValidTranscriptUrl } from "./transcriptFetcher";
import { ProcessingStatusManager } from "./processingStatusManager";
import cron from "node-cron";
const prisma = new PrismaClient();
@ -62,21 +63,130 @@ function parseFallbackBoolean(rawValue: string | null): boolean | null {
return ['true', '1', 'yes', 'escalated', 'forwarded'].includes(rawValue.toLowerCase());
}
/**
* Parse transcript content into Message records
*/
async function parseTranscriptIntoMessages(sessionId: string, transcriptContent: string): Promise<void> {
// Clear existing messages for this session
await prisma.message.deleteMany({
where: { sessionId }
});
// Split transcript into lines and parse each message
const lines = transcriptContent.split('\n').filter(line => line.trim());
let order = 0;
for (const line of lines) {
const trimmedLine = line.trim();
if (!trimmedLine) continue;
// Try to parse different formats:
// Format 1: "User: message" or "Assistant: message"
// Format 2: "[timestamp] User: message" or "[timestamp] Assistant: message"
let role = 'unknown';
let content = trimmedLine;
let timestamp: Date | null = null;
// Check for timestamp format: [DD.MM.YYYY HH:mm:ss] Role: content
const timestampMatch = trimmedLine.match(/^\[([^\]]+)\]\s*(.+)$/);
if (timestampMatch) {
try {
timestamp = parseEuropeanDate(timestampMatch[1]);
content = timestampMatch[2];
} catch (error) {
// If timestamp parsing fails, treat the whole line as content
content = trimmedLine;
}
}
// Extract role and message content
const roleMatch = content.match(/^(User|Assistant|System):\s*(.*)$/i);
if (roleMatch) {
role = roleMatch[1].toLowerCase();
content = roleMatch[2].trim();
} else {
// If no role prefix found, try to infer from context or use 'unknown'
role = 'unknown';
}
// Skip empty content
if (!content) continue;
// Create message record
await prisma.message.create({
data: {
sessionId,
timestamp,
role,
content,
order,
},
});
order++;
}
console.log(`[Import Processor] ✓ Parsed ${order} messages for session ${sessionId}`);
}
/**
* Process a single SessionImport record into a Session record
* NEW STRATEGY: Only copy minimal fields, let AI processing handle the rest
* Uses new unified processing status tracking
*/
async function processSingleImport(importRecord: any): Promise<{ success: boolean; error?: string }> {
let sessionId: string | null = null;
try {
// Parse dates using European format parser
const startTime = parseEuropeanDate(importRecord.startTimeRaw);
const endTime = parseEuropeanDate(importRecord.endTimeRaw);
console.log(`[Import Processor] Parsed dates for ${importRecord.externalSessionId}: ${startTime.toISOString()} - ${endTime.toISOString()}`);
console.log(`[Import Processor] Processing ${importRecord.externalSessionId}: ${startTime.toISOString()} - ${endTime.toISOString()}`);
// Fetch transcript content if URL is provided and not already fetched
// Create or update Session record with MINIMAL processing
const session = await prisma.session.upsert({
where: {
importId: importRecord.id,
},
update: {
startTime,
endTime,
// Direct copies (minimal processing)
ipAddress: importRecord.ipAddress,
country: importRecord.countryCode, // Keep as country code
fullTranscriptUrl: importRecord.fullTranscriptUrl,
avgResponseTime: importRecord.avgResponseTimeSeconds,
initialMsg: importRecord.initialMessage,
},
create: {
companyId: importRecord.companyId,
importId: importRecord.id,
startTime,
endTime,
// Direct copies (minimal processing)
ipAddress: importRecord.ipAddress,
country: importRecord.countryCode, // Keep as country code
fullTranscriptUrl: importRecord.fullTranscriptUrl,
avgResponseTime: importRecord.avgResponseTimeSeconds,
initialMsg: importRecord.initialMessage,
},
});
sessionId = session.id;
// Initialize processing status for this session
await ProcessingStatusManager.initializeSession(sessionId);
// Mark CSV_IMPORT as completed
await ProcessingStatusManager.completeStage(sessionId, ProcessingStage.CSV_IMPORT);
// Handle transcript fetching
let transcriptContent = importRecord.rawTranscriptContent;
if (!transcriptContent && importRecord.fullTranscriptUrl && isValidTranscriptUrl(importRecord.fullTranscriptUrl)) {
await ProcessingStatusManager.startStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH);
console.log(`[Import Processor] Fetching transcript for ${importRecord.externalSessionId}...`);
// Get company credentials for transcript fetching
@ -100,125 +210,123 @@ async function processSingleImport(importRecord: any): Promise<{ success: boolea
where: { id: importRecord.id },
data: { rawTranscriptContent: transcriptContent },
});
await ProcessingStatusManager.completeStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, {
contentLength: transcriptContent?.length || 0,
url: importRecord.fullTranscriptUrl
});
} else {
console.log(`[Import Processor] ⚠️ Failed to fetch transcript for ${importRecord.externalSessionId}: ${transcriptResult.error}`);
await ProcessingStatusManager.failStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, transcriptResult.error || 'Unknown error');
}
} else if (!importRecord.fullTranscriptUrl) {
// No transcript URL available - skip this stage
await ProcessingStatusManager.skipStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, 'No transcript URL provided');
} else {
// Transcript already fetched
await ProcessingStatusManager.completeStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, {
contentLength: transcriptContent?.length || 0,
source: 'already_fetched'
});
}
// Create or update Session record with MINIMAL processing
// Only copy fields that don't need AI analysis
const session = await prisma.session.upsert({
where: {
importId: importRecord.id,
},
update: {
startTime,
endTime,
// Direct copies (minimal processing)
ipAddress: importRecord.ipAddress,
country: importRecord.countryCode, // Keep as country code
fullTranscriptUrl: importRecord.fullTranscriptUrl,
avgResponseTime: importRecord.avgResponseTimeSeconds,
initialMsg: importRecord.initialMessage,
// AI-processed fields: Leave empty, will be filled by AI processing
// language: null, // AI will detect
// messagesSent: null, // AI will count from Messages
// sentiment: null, // AI will analyze
// escalated: null, // AI will detect
// forwardedHr: null, // AI will detect
// category: null, // AI will categorize
// summary: null, // AI will generate
processed: false, // Will be processed later by AI
},
create: {
companyId: importRecord.companyId,
importId: importRecord.id,
startTime,
endTime,
// Direct copies (minimal processing)
ipAddress: importRecord.ipAddress,
country: importRecord.countryCode, // Keep as country code
fullTranscriptUrl: importRecord.fullTranscriptUrl,
avgResponseTime: importRecord.avgResponseTimeSeconds,
initialMsg: importRecord.initialMessage,
// AI-processed fields: Leave empty, will be filled by AI processing
// All these will be null initially and filled by AI
processed: false, // Will be processed later by AI
},
});
// Handle session creation (parse messages)
await ProcessingStatusManager.startStage(sessionId, ProcessingStage.SESSION_CREATION);
if (transcriptContent) {
await parseTranscriptIntoMessages(sessionId, transcriptContent);
}
// Update import status to DONE
await prisma.sessionImport.update({
where: { id: importRecord.id },
data: {
status: ImportStatus.DONE,
processedAt: new Date(),
errorMsg: null,
},
await ProcessingStatusManager.completeStage(sessionId, ProcessingStage.SESSION_CREATION, {
hasTranscript: !!transcriptContent,
transcriptLength: transcriptContent?.length || 0
});
return { success: true };
} catch (error) {
// Update import status to ERROR
await prisma.sessionImport.update({
where: { id: importRecord.id },
data: {
status: ImportStatus.ERROR,
errorMsg: error instanceof Error ? error.message : String(error),
},
});
const errorMessage = error instanceof Error ? error.message : String(error);
// Mark the current stage as failed if we have a sessionId
if (sessionId) {
// Determine which stage failed based on the error
if (errorMessage.includes('transcript') || errorMessage.includes('fetch')) {
await ProcessingStatusManager.failStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, errorMessage);
} else if (errorMessage.includes('message') || errorMessage.includes('parse')) {
await ProcessingStatusManager.failStage(sessionId, ProcessingStage.SESSION_CREATION, errorMessage);
} else {
// General failure - mark CSV_IMPORT as failed
await ProcessingStatusManager.failStage(sessionId, ProcessingStage.CSV_IMPORT, errorMessage);
}
}
return {
success: false,
error: error instanceof Error ? error.message : String(error),
error: errorMessage,
};
}
}
/**
* Process queued SessionImport records into Session records
* Process unprocessed SessionImport records into Session records
* Uses new processing status system to find imports that need processing
*/
export async function processQueuedImports(batchSize: number = 50): Promise<void> {
console.log('[Import Processor] Starting to process queued imports...');
console.log('[Import Processor] Starting to process unprocessed imports...');
// Find queued imports
const queuedImports = await prisma.sessionImport.findMany({
where: {
status: ImportStatus.QUEUED,
},
take: batchSize,
orderBy: {
createdAt: 'asc', // Process oldest first
},
});
let totalSuccessCount = 0;
let totalErrorCount = 0;
let batchNumber = 1;
if (queuedImports.length === 0) {
console.log('[Import Processor] No queued imports found');
return;
}
while (true) {
// Find SessionImports that don't have a corresponding Session yet
const unprocessedImports = await prisma.sessionImport.findMany({
where: {
session: null, // No session created yet
},
take: batchSize,
orderBy: {
createdAt: 'asc', // Process oldest first
},
});
console.log(`[Import Processor] Processing ${queuedImports.length} queued imports...`);
if (unprocessedImports.length === 0) {
if (batchNumber === 1) {
console.log('[Import Processor] No unprocessed imports found');
} else {
console.log(`[Import Processor] All batches completed. Total: ${totalSuccessCount} successful, ${totalErrorCount} failed`);
}
return;
}
let successCount = 0;
let errorCount = 0;
console.log(`[Import Processor] Processing batch ${batchNumber}: ${unprocessedImports.length} imports...`);
// Process each import
for (const importRecord of queuedImports) {
const result = await processSingleImport(importRecord);
if (result.success) {
successCount++;
console.log(`[Import Processor] ✓ Processed import ${importRecord.externalSessionId}`);
} else {
errorCount++;
console.log(`[Import Processor] ✗ Failed to process import ${importRecord.externalSessionId}: ${result.error}`);
let batchSuccessCount = 0;
let batchErrorCount = 0;
// Process each import in this batch
for (const importRecord of unprocessedImports) {
const result = await processSingleImport(importRecord);
if (result.success) {
batchSuccessCount++;
totalSuccessCount++;
console.log(`[Import Processor] ✓ Processed import ${importRecord.externalSessionId}`);
} else {
batchErrorCount++;
totalErrorCount++;
console.log(`[Import Processor] ✗ Failed to process import ${importRecord.externalSessionId}: ${result.error}`);
}
}
console.log(`[Import Processor] Batch ${batchNumber} completed: ${batchSuccessCount} successful, ${batchErrorCount} failed`);
batchNumber++;
// If this batch was smaller than the batch size, we're done
if (unprocessedImports.length < batchSize) {
console.log(`[Import Processor] All batches completed. Total: ${totalSuccessCount} successful, ${totalErrorCount} failed`);
return;
}
}
console.log(`[Import Processor] Completed: ${successCount} successful, ${errorCount} failed`);
}
/**

View File

@ -7,25 +7,62 @@ import { getSchedulerConfig } from "./schedulerConfig";
const prisma = new PrismaClient();
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
// Model pricing in USD (update as needed)
const MODEL_PRICING = {
'gpt-4o-2024-08-06': {
promptTokenCost: 0.0000025, // $2.50 per 1M tokens
completionTokenCost: 0.00001, // $10.00 per 1M tokens
},
'gpt-4-turbo': {
promptTokenCost: 0.00001, // $10.00 per 1M tokens
completionTokenCost: 0.00003, // $30.00 per 1M tokens
},
'gpt-4o': {
promptTokenCost: 0.000005, // $5.00 per 1M tokens
completionTokenCost: 0.000015, // $15.00 per 1M tokens
}
} as const;
const DEFAULT_MODEL = process.env.OPENAI_MODEL || "gpt-4o";
const USD_TO_EUR_RATE = 0.85; // Update periodically or fetch from API
/**
* Get company's default AI model
*/
async function getCompanyAIModel(companyId: string): Promise<string> {
const companyModel = await prisma.companyAIModel.findFirst({
where: {
companyId,
isDefault: true,
},
include: {
aiModel: true,
},
});
return companyModel?.aiModel.name || DEFAULT_MODEL;
}
/**
* Get current pricing for an AI model
*/
async function getCurrentModelPricing(modelName: string): Promise<{
promptTokenCost: number;
completionTokenCost: number;
} | null> {
const model = await prisma.aIModel.findUnique({
where: { name: modelName },
include: {
pricing: {
where: {
effectiveFrom: { lte: new Date() },
OR: [
{ effectiveUntil: null },
{ effectiveUntil: { gte: new Date() } }
]
},
orderBy: { effectiveFrom: 'desc' },
take: 1,
},
},
});
if (!model || model.pricing.length === 0) {
return null;
}
const pricing = model.pricing[0];
return {
promptTokenCost: pricing.promptTokenCost,
completionTokenCost: pricing.completionTokenCost,
};
}
interface ProcessedData {
language: string;
sentiment: "POSITIVE" | "NEUTRAL" | "NEGATIVE";
@ -53,10 +90,20 @@ async function recordAIProcessingRequest(
): Promise<void> {
const usage = openaiResponse.usage;
const model = openaiResponse.model;
const pricing = MODEL_PRICING[model as keyof typeof MODEL_PRICING] || MODEL_PRICING['gpt-4-turbo']; // fallback
const promptCost = usage.prompt_tokens * pricing.promptTokenCost;
const completionCost = usage.completion_tokens * pricing.completionTokenCost;
// Get current pricing from database
const pricing = await getCurrentModelPricing(model);
// Fallback pricing if not found in database
const fallbackPricing = {
promptTokenCost: 0.00001, // $10.00 per 1M tokens (gpt-4-turbo rate)
completionTokenCost: 0.00003, // $30.00 per 1M tokens
};
const finalPricing = pricing || fallbackPricing;
const promptCost = usage.prompt_tokens * finalPricing.promptTokenCost;
const completionCost = usage.completion_tokens * finalPricing.completionTokenCost;
const totalCostUsd = promptCost + completionCost;
const totalCostEur = totalCostUsd * USD_TO_EUR_RATE;
@ -80,8 +127,8 @@ async function recordAIProcessingRequest(
acceptedPredictionTokens: usage.completion_tokens_details?.accepted_prediction_tokens || null,
rejectedPredictionTokens: usage.completion_tokens_details?.rejected_prediction_tokens || null,
promptTokenCost: pricing.promptTokenCost,
completionTokenCost: pricing.completionTokenCost,
promptTokenCost: finalPricing.promptTokenCost,
completionTokenCost: finalPricing.completionTokenCost,
totalCostEur,
processingType,
@ -177,11 +224,14 @@ async function calculateEndTime(sessionId: string, fallbackEndTime: Date): Promi
/**
* Processes a session transcript using OpenAI API
*/
async function processTranscriptWithOpenAI(sessionId: string, transcript: string): Promise<ProcessedData> {
async function processTranscriptWithOpenAI(sessionId: string, transcript: string, companyId: string): Promise<ProcessedData> {
if (!OPENAI_API_KEY) {
throw new Error("OPENAI_API_KEY environment variable is not set");
}
// Get company's AI model
const aiModel = await getCompanyAIModel(companyId);
// Updated system message with exact enum values
const systemMessage = `
You are an AI assistant tasked with analyzing chat transcripts.
@ -218,7 +268,7 @@ async function processTranscriptWithOpenAI(sessionId: string, transcript: string
Authorization: `Bearer ${OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: "gpt-4o", // Use latest model
model: aiModel, // Use company's configured AI model
messages: [
{
role: "system",
@ -348,7 +398,7 @@ async function processSingleSession(session: any): Promise<ProcessingResult> {
)
.join("\n");
const processedData = await processTranscriptWithOpenAI(session.id, transcript);
const processedData = await processTranscriptWithOpenAI(session.id, transcript, session.companyId);
// Calculate messagesSent from actual Message records
const messagesSent = await calculateMessagesSent(session.id);

View File

@ -0,0 +1,295 @@
import { PrismaClient, ProcessingStage, ProcessingStatus } from '@prisma/client';
const prisma = new PrismaClient();
/**
* Centralized processing status management
*/
export class ProcessingStatusManager {
/**
* Initialize processing status for a session with all stages set to PENDING
*/
static async initializeSession(sessionId: string): Promise<void> {
const stages = [
ProcessingStage.CSV_IMPORT,
ProcessingStage.TRANSCRIPT_FETCH,
ProcessingStage.SESSION_CREATION,
ProcessingStage.AI_ANALYSIS,
ProcessingStage.QUESTION_EXTRACTION,
];
// Create all processing status records for this session
await prisma.sessionProcessingStatus.createMany({
data: stages.map(stage => ({
sessionId,
stage,
status: ProcessingStatus.PENDING,
})),
skipDuplicates: true, // In case some already exist
});
}
/**
* Start a processing stage
*/
static async startStage(
sessionId: string,
stage: ProcessingStage,
metadata?: any
): Promise<void> {
await prisma.sessionProcessingStatus.upsert({
where: {
sessionId_stage: { sessionId, stage }
},
update: {
status: ProcessingStatus.IN_PROGRESS,
startedAt: new Date(),
errorMessage: null,
metadata: metadata || null,
},
create: {
sessionId,
stage,
status: ProcessingStatus.IN_PROGRESS,
startedAt: new Date(),
metadata: metadata || null,
},
});
}
/**
* Complete a processing stage successfully
*/
static async completeStage(
sessionId: string,
stage: ProcessingStage,
metadata?: any
): Promise<void> {
await prisma.sessionProcessingStatus.upsert({
where: {
sessionId_stage: { sessionId, stage }
},
update: {
status: ProcessingStatus.COMPLETED,
completedAt: new Date(),
errorMessage: null,
metadata: metadata || null,
},
create: {
sessionId,
stage,
status: ProcessingStatus.COMPLETED,
startedAt: new Date(),
completedAt: new Date(),
metadata: metadata || null,
},
});
}
/**
* Mark a processing stage as failed
*/
static async failStage(
sessionId: string,
stage: ProcessingStage,
errorMessage: string,
metadata?: any
): Promise<void> {
await prisma.sessionProcessingStatus.upsert({
where: {
sessionId_stage: { sessionId, stage }
},
update: {
status: ProcessingStatus.FAILED,
completedAt: new Date(),
errorMessage,
retryCount: { increment: 1 },
metadata: metadata || null,
},
create: {
sessionId,
stage,
status: ProcessingStatus.FAILED,
startedAt: new Date(),
completedAt: new Date(),
errorMessage,
retryCount: 1,
metadata: metadata || null,
},
});
}
/**
* Skip a processing stage (e.g., no transcript URL available)
*/
static async skipStage(
sessionId: string,
stage: ProcessingStage,
reason: string
): Promise<void> {
await prisma.sessionProcessingStatus.upsert({
where: {
sessionId_stage: { sessionId, stage }
},
update: {
status: ProcessingStatus.SKIPPED,
completedAt: new Date(),
errorMessage: reason,
},
create: {
sessionId,
stage,
status: ProcessingStatus.SKIPPED,
startedAt: new Date(),
completedAt: new Date(),
errorMessage: reason,
},
});
}
/**
* Get processing status for a specific session
*/
static async getSessionStatus(sessionId: string) {
return await prisma.sessionProcessingStatus.findMany({
where: { sessionId },
orderBy: { stage: 'asc' },
});
}
/**
* Get sessions that need processing for a specific stage
*/
static async getSessionsNeedingProcessing(
stage: ProcessingStage,
limit: number = 50
) {
return await prisma.sessionProcessingStatus.findMany({
where: {
stage,
status: ProcessingStatus.PENDING,
},
include: {
session: {
include: {
import: true,
company: true,
},
},
},
take: limit,
orderBy: { session: { createdAt: 'asc' } },
});
}
/**
* Get pipeline status overview
*/
static async getPipelineStatus() {
// Get counts by stage and status
const statusCounts = await prisma.sessionProcessingStatus.groupBy({
by: ['stage', 'status'],
_count: { id: true },
});
// Get total sessions
const totalSessions = await prisma.session.count();
// Organize the data
const pipeline: Record<string, Record<string, number>> = {};
for (const { stage, status, _count } of statusCounts) {
if (!pipeline[stage]) {
pipeline[stage] = {};
}
pipeline[stage][status] = _count.id;
}
return {
totalSessions,
pipeline,
};
}
/**
* Get sessions with failed processing
*/
static async getFailedSessions(stage?: ProcessingStage) {
const where: any = {
status: ProcessingStatus.FAILED,
};
if (stage) {
where.stage = stage;
}
return await prisma.sessionProcessingStatus.findMany({
where,
include: {
session: {
include: {
import: true,
},
},
},
orderBy: { completedAt: 'desc' },
});
}
/**
* Reset a failed stage for retry
*/
static async resetStageForRetry(sessionId: string, stage: ProcessingStage): Promise<void> {
await prisma.sessionProcessingStatus.update({
where: {
sessionId_stage: { sessionId, stage }
},
data: {
status: ProcessingStatus.PENDING,
startedAt: null,
completedAt: null,
errorMessage: null,
},
});
}
/**
* Check if a session has completed a specific stage
*/
static async hasCompletedStage(sessionId: string, stage: ProcessingStage): Promise<boolean> {
const status = await prisma.sessionProcessingStatus.findUnique({
where: {
sessionId_stage: { sessionId, stage }
},
});
return status?.status === ProcessingStatus.COMPLETED;
}
/**
* Check if a session is ready for a specific stage (previous stages completed)
*/
static async isReadyForStage(sessionId: string, stage: ProcessingStage): Promise<boolean> {
const stageOrder = [
ProcessingStage.CSV_IMPORT,
ProcessingStage.TRANSCRIPT_FETCH,
ProcessingStage.SESSION_CREATION,
ProcessingStage.AI_ANALYSIS,
ProcessingStage.QUESTION_EXTRACTION,
];
const currentStageIndex = stageOrder.indexOf(stage);
if (currentStageIndex === 0) return true; // First stage is always ready
// Check if all previous stages are completed
const previousStages = stageOrder.slice(0, currentStageIndex);
for (const prevStage of previousStages) {
const isCompleted = await this.hasCompletedStage(sessionId, prevStage);
if (!isCompleted) return false;
}
return true;
}
}