feat: Enhance session processing and metrics

- Updated session processing commands in documentation for clarity.
- Removed transcript content fetching from session processing, allowing on-demand retrieval.
- Improved session metrics calculations and added new metrics for dashboard.
- Refactored processing scheduler to handle sessions in parallel with concurrency limits.
- Added manual trigger API for processing unprocessed sessions with admin checks.
- Implemented scripts for fetching and parsing transcripts, checking transcript content, and testing processing status.
- Updated Prisma schema to enforce default values for processed sessions.
- Added error handling and logging improvements throughout the processing workflow.
This commit is contained in:
Max Kowalski
2025-06-26 17:12:42 +02:00
parent 8f3c1e0f7c
commit 8c43a35632
20 changed files with 851 additions and 229 deletions

View File

@ -561,15 +561,8 @@ export async function fetchAndStoreSessionsForAllCompanies() {
? session.endTime
: new Date();
// Fetch transcript content if URL is available
let transcriptContent = null;
if (session.fullTranscriptUrl) {
transcriptContent = await fetchTranscriptContent(
session.fullTranscriptUrl,
company.csvUsername,
company.csvPassword
);
}
// Note: transcriptContent field was removed from schema
// Transcript content can be fetched on-demand from fullTranscriptUrl
// Check if the session already exists
const existingSession = await prisma.session.findUnique({
@ -608,7 +601,6 @@ export async function fetchAndStoreSessionsForAllCompanies() {
? session.forwardedHr
: null,
fullTranscriptUrl: session.fullTranscriptUrl || null,
transcriptContent: transcriptContent, // Add the transcript content
avgResponseTime:
typeof session.avgResponseTime === "number"
? session.avgResponseTime

View File

@ -349,7 +349,7 @@ export function sessionMetrics(
let totalTokensEur = 0;
const wordCounts: { [key: string]: number } = {};
let alerts = 0;
// New metrics variables
const hourlySessionCounts: { [hour: string]: number } = {};
let resolvedChatsCount = 0;
@ -530,7 +530,7 @@ export function sessionMetrics(
.forEach(msg => {
const content = msg.content.trim();
// Simple heuristic: if message ends with ? or contains question words, treat as question
if (content.endsWith('?') ||
if (content.endsWith('?') ||
/\b(what|when|where|why|how|who|which|can|could|would|will|is|are|do|does|did)\b/i.test(content)) {
questionCounts[content] = (questionCounts[content] || 0) + 1;
}
@ -540,7 +540,7 @@ export function sessionMetrics(
// 3. Extract questions from initial message as fallback
if (session.initialMsg) {
const content = session.initialMsg.trim();
if (content.endsWith('?') ||
if (content.endsWith('?') ||
/\b(what|when|where|why|how|who|which|can|could|would|will|is|are|do|does|did)\b/i.test(content)) {
questionCounts[content] = (questionCounts[content] || 0) + 1;
}
@ -611,10 +611,10 @@ export function sessionMetrics(
);
// Calculate new metrics
// 1. Average Daily Costs (euros)
const avgDailyCosts = numDaysWithSessions > 0 ? totalTokensEur / numDaysWithSessions : 0;
// 2. Peak Usage Time
let peakUsageTime = "N/A";
if (Object.keys(hourlySessionCounts).length > 0) {
@ -624,7 +624,7 @@ export function sessionMetrics(
const endHour = (peakHourNum + 1) % 24;
peakUsageTime = `${peakHour}-${endHour.toString().padStart(2, '0')}:00`;
}
// 3. Resolved Chats Percentage
const resolvedChatsPercentage = totalSessions > 0 ? (resolvedChatsCount / totalSessions) * 100 : 0;
@ -672,7 +672,7 @@ export function sessionMetrics(
lastUpdated: Date.now(),
totalSessionDuration,
validSessionsForDuration,
// New metrics
avgDailyCosts,
peakUsageTime,

View File

@ -14,7 +14,7 @@ const envPath = join(__dirname, '..', '.env.local');
try {
const envFile = readFileSync(envPath, 'utf8');
const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
envVars.forEach(line => {
const [key, ...valueParts] = line.split('=');
if (key && valueParts.length > 0) {
@ -216,24 +216,130 @@ function validateOpenAIResponse(data) {
}
/**
* Process unprocessed sessions
* Process a single session
* @param {Object} session The session to process
* @returns {Promise<Object>} Result object with success/error info
*/
export async function processUnprocessedSessions() {
async function processSingleSession(session) {
if (session.messages.length === 0) {
return {
sessionId: session.id,
success: false,
error: "Session has no messages",
};
}
try {
// Convert messages back to transcript format for OpenAI processing
const transcript = session.messages
.map(
(msg) =>
`[${new Date(msg.timestamp)
.toLocaleString("en-GB", {
day: "2-digit",
month: "2-digit",
year: "numeric",
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
})
.replace(",", "")}] ${msg.role}: ${msg.content}`
)
.join("\n");
const processedData = await processTranscriptWithOpenAI(
session.id,
transcript
);
// Map sentiment string to float value for compatibility with existing data
const sentimentMap = {
positive: 0.8,
neutral: 0.0,
negative: -0.8,
};
// Update the session with processed data
await prisma.session.update({
where: { id: session.id },
data: {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: sentimentMap[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment,
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,
questions: JSON.stringify(processedData.questions),
summary: processedData.summary,
processed: true,
},
});
return {
sessionId: session.id,
success: true,
};
} catch (error) {
return {
sessionId: session.id,
success: false,
error: error.message,
};
}
}
/**
* Process sessions in parallel with concurrency limit
* @param {Array} sessions Array of sessions to process
* @param {number} maxConcurrency Maximum number of concurrent processing tasks
* @returns {Promise<Object>} Processing results
*/
async function processSessionsInParallel(sessions, maxConcurrency = 5) {
const results = [];
const executing = [];
for (const session of sessions) {
const promise = processSingleSession(session).then((result) => {
process.stdout.write(
result.success
? `[ProcessingScheduler] ✓ Successfully processed session ${result.sessionId}\n`
: `[ProcessingScheduler] ✗ Failed to process session ${result.sessionId}: ${result.error}\n`
);
return result;
});
results.push(promise);
executing.push(promise);
if (executing.length >= maxConcurrency) {
await Promise.race(executing);
executing.splice(
executing.findIndex((p) => p === promise),
1
);
}
}
return Promise.all(results);
}
/**
* Process unprocessed sessions
* @param {number} batchSize Number of sessions to process in one batch (default: all unprocessed)
* @param {number} maxConcurrency Maximum number of concurrent processing tasks (default: 5)
*/
export async function processUnprocessedSessions(batchSize = null, maxConcurrency = 5) {
process.stdout.write(
"[ProcessingScheduler] Starting to process unprocessed sessions...\n"
);
// Find sessions that have messages but haven't been processed
const sessionsToProcess = await prisma.session.findMany({
const queryOptions = {
where: {
AND: [
{ messages: { some: {} } }, // Must have messages
{
OR: [
{ processed: false },
{ processed: null }
]
}
{ processed: false }, // Only unprocessed sessions (no longer checking for null)
],
},
include: {
@ -241,8 +347,14 @@ export async function processUnprocessedSessions() {
orderBy: { order: "asc" },
},
},
take: 10, // Process in batches to avoid overloading the system
});
};
// Add batch size limit if specified
if (batchSize && batchSize > 0) {
queryOptions.take = batchSize;
}
const sessionsToProcess = await prisma.session.findMany(queryOptions);
// Filter to only sessions that have messages
const sessionsWithMessages = sessionsToProcess.filter(
@ -257,80 +369,15 @@ export async function processUnprocessedSessions() {
}
process.stdout.write(
`[ProcessingScheduler] Found ${sessionsWithMessages.length} sessions to process.\n`
`[ProcessingScheduler] Found ${sessionsWithMessages.length} sessions to process (max concurrency: ${maxConcurrency}).\n`
);
let successCount = 0;
let errorCount = 0;
for (const session of sessionsWithMessages) {
if (session.messages.length === 0) {
process.stderr.write(
`[ProcessingScheduler] Session ${session.id} has no messages, skipping.\n`
);
continue;
}
const startTime = Date.now();
const results = await processSessionsInParallel(sessionsWithMessages, maxConcurrency);
const endTime = Date.now();
process.stdout.write(
`[ProcessingScheduler] Processing messages for session ${session.id}...\n`
);
try {
// Convert messages back to transcript format for OpenAI processing
const transcript = session.messages
.map(
(msg) =>
`[${new Date(msg.timestamp)
.toLocaleString("en-GB", {
day: "2-digit",
month: "2-digit",
year: "numeric",
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
})
.replace(",", "")}] ${msg.role}: ${msg.content}`
)
.join("\n");
const processedData = await processTranscriptWithOpenAI(
session.id,
transcript
);
// Map sentiment string to float value for compatibility with existing data
const sentimentMap = {
positive: 0.8,
neutral: 0.0,
negative: -0.8,
};
// Update the session with processed data
await prisma.session.update({
where: { id: session.id },
data: {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: sentimentMap[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment,
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,
questions: JSON.stringify(processedData.questions),
summary: processedData.summary,
processed: true,
},
});
process.stdout.write(
`[ProcessingScheduler] Successfully processed session ${session.id}.\n`
);
successCount++;
} catch (error) {
process.stderr.write(
`[ProcessingScheduler] Error processing session ${session.id}: ${error}\n`
);
errorCount++;
}
}
const successCount = results.filter((r) => r.success).length;
const errorCount = results.filter((r) => !r.success).length;
process.stdout.write("[ProcessingScheduler] Session processing complete.\n");
process.stdout.write(
@ -339,6 +386,9 @@ export async function processUnprocessedSessions() {
process.stdout.write(
`[ProcessingScheduler] Failed to process: ${errorCount} sessions.\n`
);
process.stdout.write(
`[ProcessingScheduler] Total processing time: ${((endTime - startTime) / 1000).toFixed(2)}s\n`
);
}
/**

View File

@ -1,14 +1,38 @@
// node-cron job to process unprocessed sessions every hour
// Session processing scheduler - TypeScript version
import cron from "node-cron";
import { PrismaClient } from "@prisma/client";
import fetch from "node-fetch";
import { readFileSync } from "fs";
import { fileURLToPath } from "url";
import { dirname, join } from "path";
// Load environment variables from .env.local
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const envPath = join(__dirname, '..', '.env.local');
try {
const envFile = readFileSync(envPath, 'utf8');
const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
envVars.forEach(line => {
const [key, ...valueParts] = line.split('=');
if (key && valueParts.length > 0) {
const value = valueParts.join('=').trim();
if (!process.env[key.trim()]) {
process.env[key.trim()] = value;
}
}
});
} catch (error) {
// Silently fail if .env.local doesn't exist
}
const prisma = new PrismaClient();
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
// Define the expected response structure from OpenAI
interface OpenAIProcessedData {
interface ProcessedData {
language: string;
messages_sent: number;
sentiment: "positive" | "neutral" | "negative";
@ -20,16 +44,16 @@ interface OpenAIProcessedData {
session_id: string;
}
interface ProcessingResult {
sessionId: string;
success: boolean;
error?: string;
}
/**
* Processes a session transcript using OpenAI API
* @param sessionId The session ID
* @param transcript The transcript content to process
* @returns Processed data from OpenAI
*/
async function processTranscriptWithOpenAI(
sessionId: string,
transcript: string
): Promise<OpenAIProcessedData> {
async function processTranscriptWithOpenAI(sessionId: string, transcript: string): Promise<ProcessedData> {
if (!OPENAI_API_KEY) {
throw new Error("OPENAI_API_KEY environment variable is not set");
}
@ -103,7 +127,7 @@ async function processTranscriptWithOpenAI(
throw new Error(`OpenAI API error: ${response.status} - ${errorText}`);
}
const data = (await response.json()) as any;
const data: any = await response.json();
const processedData = JSON.parse(data.choices[0].message.content);
// Validate the response against our expected schema
@ -118,11 +142,8 @@ async function processTranscriptWithOpenAI(
/**
* Validates the OpenAI response against our expected schema
* @param data The data to validate
*/
function validateOpenAIResponse(
data: any
): asserts data is OpenAIProcessedData {
function validateOpenAIResponse(data: any): void {
// Check required fields
const requiredFields = [
"language",
@ -208,31 +229,146 @@ function validateOpenAIResponse(
}
}
/**
* Process a single session
*/
async function processSingleSession(session: any): Promise<ProcessingResult> {
if (session.messages.length === 0) {
return {
sessionId: session.id,
success: false,
error: "Session has no messages",
};
}
try {
// Convert messages back to transcript format for OpenAI processing
const transcript = session.messages
.map(
(msg: any) =>
`[${new Date(msg.timestamp)
.toLocaleString("en-GB", {
day: "2-digit",
month: "2-digit",
year: "numeric",
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
})
.replace(",", "")}] ${msg.role}: ${msg.content}`
)
.join("\n");
const processedData = await processTranscriptWithOpenAI(
session.id,
transcript
);
// Map sentiment string to float value for compatibility with existing data
const sentimentMap = {
positive: 0.8,
neutral: 0.0,
negative: -0.8,
};
// Update the session with processed data
await prisma.session.update({
where: { id: session.id },
data: {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: sentimentMap[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment,
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,
questions: JSON.stringify(processedData.questions),
summary: processedData.summary,
processed: true,
},
});
return {
sessionId: session.id,
success: true,
};
} catch (error) {
return {
sessionId: session.id,
success: false,
error: error instanceof Error ? error.message : String(error),
};
}
}
/**
* Process sessions in parallel with concurrency limit
*/
async function processSessionsInParallel(sessions: any[], maxConcurrency: number = 5): Promise<ProcessingResult[]> {
const results: Promise<ProcessingResult>[] = [];
const executing: Promise<ProcessingResult>[] = [];
for (const session of sessions) {
const promise = processSingleSession(session).then((result) => {
process.stdout.write(
result.success
? `[ProcessingScheduler] ✓ Successfully processed session ${result.sessionId}\n`
: `[ProcessingScheduler] ✗ Failed to process session ${result.sessionId}: ${result.error}\n`
);
return result;
});
results.push(promise);
executing.push(promise);
if (executing.length >= maxConcurrency) {
await Promise.race(executing);
const completedIndex = executing.findIndex(p => p === promise);
if (completedIndex !== -1) {
executing.splice(completedIndex, 1);
}
}
}
return Promise.all(results);
}
/**
* Process unprocessed sessions
*/
async function processUnprocessedSessions() {
export async function processUnprocessedSessions(batchSize: number | null = null, maxConcurrency: number = 5): Promise<void> {
process.stdout.write(
"[ProcessingScheduler] Starting to process unprocessed sessions...\n"
);
// Find sessions that have transcript content but haven't been processed
const sessionsToProcess = await prisma.session.findMany({
// Find sessions that have messages but haven't been processed
const queryOptions: any = {
where: {
AND: [
{ transcriptContent: { not: null } },
{ transcriptContent: { not: "" } },
{ processed: { not: true } }, // Either false or null
{ messages: { some: {} } }, // Must have messages
{ processed: false }, // Only unprocessed sessions
],
},
select: {
id: true,
transcriptContent: true,
include: {
messages: {
orderBy: { order: "asc" },
},
},
take: 10, // Process in batches to avoid overloading the system
});
};
if (sessionsToProcess.length === 0) {
// Add batch size limit if specified
if (batchSize && batchSize > 0) {
queryOptions.take = batchSize;
}
const sessionsToProcess = await prisma.session.findMany(queryOptions);
// Filter to only sessions that have messages
const sessionsWithMessages = sessionsToProcess.filter(
(session: any) => session.messages && session.messages.length > 0
);
if (sessionsWithMessages.length === 0) {
process.stdout.write(
"[ProcessingScheduler] No sessions found requiring processing.\n"
);
@ -240,64 +376,15 @@ async function processUnprocessedSessions() {
}
process.stdout.write(
`[ProcessingScheduler] Found ${sessionsToProcess.length} sessions to process.\n`
`[ProcessingScheduler] Found ${sessionsWithMessages.length} sessions to process (max concurrency: ${maxConcurrency}).\n`
);
let successCount = 0;
let errorCount = 0;
for (const session of sessionsToProcess) {
if (!session.transcriptContent) {
// Should not happen due to query, but good for type safety
process.stderr.write(
`[ProcessingScheduler] Session ${session.id} has no transcript content, skipping.\n`
);
continue;
}
const startTime = Date.now();
const results = await processSessionsInParallel(sessionsWithMessages, maxConcurrency);
const endTime = Date.now();
process.stdout.write(
`[ProcessingScheduler] Processing transcript for session ${session.id}...\n`
);
try {
const processedData = await processTranscriptWithOpenAI(
session.id,
session.transcriptContent
);
// Map sentiment string to float value for compatibility with existing data
const sentimentMap: Record<string, number> = {
positive: 0.8,
neutral: 0.0,
negative: -0.8,
};
// Update the session with processed data
await prisma.session.update({
where: { id: session.id },
data: {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: sentimentMap[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment,
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,
questions: JSON.stringify(processedData.questions),
summary: processedData.summary,
processed: true,
},
});
process.stdout.write(
`[ProcessingScheduler] Successfully processed session ${session.id}.\n`
);
successCount++;
} catch (error) {
process.stderr.write(
`[ProcessingScheduler] Error processing session ${session.id}: ${error}\n`
);
errorCount++;
}
}
const successCount = results.filter((r) => r.success).length;
const errorCount = results.filter((r) => !r.success).length;
process.stdout.write("[ProcessingScheduler] Session processing complete.\n");
process.stdout.write(
@ -306,12 +393,15 @@ async function processUnprocessedSessions() {
process.stdout.write(
`[ProcessingScheduler] Failed to process: ${errorCount} sessions.\n`
);
process.stdout.write(
`[ProcessingScheduler] Total processing time: ${((endTime - startTime) / 1000).toFixed(2)}s\n`
);
}
/**
* Start the processing scheduler
*/
export function startProcessingScheduler() {
export function startProcessingScheduler(): void {
// Process unprocessed sessions every hour
cron.schedule("0 * * * *", async () => {
try {

View File

@ -157,7 +157,7 @@ export interface MetricsResult {
usersTrend?: number; // e.g., percentage change in uniqueUsers
avgSessionTimeTrend?: number; // e.g., percentage change in avgSessionLength
avgResponseTimeTrend?: number; // e.g., percentage change in avgResponseTime
// New metrics for enhanced dashboard
avgDailyCosts?: number; // Average daily costs in euros
peakUsageTime?: string; // Peak usage time (e.g., "14:00-15:00")