Files
livedash-node/lib/processingScheduler.ts
Max Kowalski 3196dabdf2 feat: Implement session processing and refresh schedulers
- Added processingScheduler.js and processingScheduler.ts to handle session transcript processing using OpenAI API.
- Implemented a new scheduler (scheduler.js and schedulers.ts) for refreshing sessions every 15 minutes.
- Updated Prisma migrations to add new fields for processed sessions, including questions, sentimentCategory, and summary.
- Created scripts (process_sessions.mjs and process_sessions.ts) for manual processing of unprocessed sessions.
- Enhanced server.js and server.mjs to initialize schedulers on server start.
2025-06-25 16:14:01 +02:00

294 lines
9.0 KiB
TypeScript

// node-cron job to process unprocessed sessions every hour
import cron from "node-cron";
import { PrismaClient } from "@prisma/client";
import fetch from "node-fetch";
const prisma = new PrismaClient();
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
// Define the expected response structure from OpenAI
interface OpenAIProcessedData {
language: string;
messages_sent: number;
sentiment: "positive" | "neutral" | "negative";
escalated: boolean;
forwarded_hr: boolean;
category: string;
questions: string[];
summary: string;
session_id: string;
}
/**
* Processes a session transcript using OpenAI API
* @param sessionId The session ID
* @param transcript The transcript content to process
* @returns Processed data from OpenAI
*/
async function processTranscriptWithOpenAI(
sessionId: string,
transcript: string
): Promise<OpenAIProcessedData> {
if (!OPENAI_API_KEY) {
throw new Error("OPENAI_API_KEY environment variable is not set");
}
// Create a system message with instructions
const systemMessage = `
You are an AI assistant tasked with analyzing chat transcripts.
Extract the following information from the transcript:
1. The primary language used by the user (ISO 639-1 code)
2. Number of messages sent by the user
3. Overall sentiment (positive, neutral, or negative)
4. Whether the conversation was escalated
5. Whether HR contact was mentioned or provided
6. The best-fitting category for the conversation from this list:
- Schedule & Hours
- Leave & Vacation
- Sick Leave & Recovery
- Salary & Compensation
- Contract & Hours
- Onboarding
- Offboarding
- Workwear & Staff Pass
- Team & Contacts
- Personal Questions
- Access & Login
- Social questions
- Unrecognized / Other
7. Up to 5 paraphrased questions asked by the user (in English)
8. A brief summary of the conversation (10-300 characters)
Return the data in JSON format matching this schema:
{
"language": "ISO 639-1 code",
"messages_sent": number,
"sentiment": "positive|neutral|negative",
"escalated": boolean,
"forwarded_hr": boolean,
"category": "one of the categories listed above",
"questions": ["question 1", "question 2", ...],
"summary": "brief summary",
"session_id": "${sessionId}"
}
`;
try {
const response = await fetch(OPENAI_API_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: "gpt-4-turbo",
messages: [
{
role: "system",
content: systemMessage,
},
{
role: "user",
content: transcript,
},
],
temperature: 0.3, // Lower temperature for more consistent results
response_format: { type: "json_object" },
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`OpenAI API error: ${response.status} - ${errorText}`);
}
const data = await response.json() as any;
const processedData = JSON.parse(data.choices[0].message.content);
// Validate the response against our expected schema
validateOpenAIResponse(processedData);
return processedData;
} catch (error) {
process.stderr.write(`Error processing transcript with OpenAI: ${error}\n`);
throw error;
}
}
/**
* Validates the OpenAI response against our expected schema
* @param data The data to validate
*/
function validateOpenAIResponse(data: any): asserts data is OpenAIProcessedData {
// Check required fields
const requiredFields = [
"language",
"messages_sent",
"sentiment",
"escalated",
"forwarded_hr",
"category",
"questions",
"summary",
"session_id",
];
for (const field of requiredFields) {
if (!(field in data)) {
throw new Error(`Missing required field: ${field}`);
}
}
// Validate field types
if (typeof data.language !== "string" || !/^[a-z]{2}$/.test(data.language)) {
throw new Error("Invalid language format. Expected ISO 639-1 code (e.g., 'en')");
}
if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
throw new Error("Invalid messages_sent. Expected non-negative number");
}
if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
throw new Error("Invalid sentiment. Expected 'positive', 'neutral', or 'negative'");
}
if (typeof data.escalated !== "boolean") {
throw new Error("Invalid escalated. Expected boolean");
}
if (typeof data.forwarded_hr !== "boolean") {
throw new Error("Invalid forwarded_hr. Expected boolean");
}
const validCategories = [
"Schedule & Hours",
"Leave & Vacation",
"Sick Leave & Recovery",
"Salary & Compensation",
"Contract & Hours",
"Onboarding",
"Offboarding",
"Workwear & Staff Pass",
"Team & Contacts",
"Personal Questions",
"Access & Login",
"Social questions",
"Unrecognized / Other",
];
if (!validCategories.includes(data.category)) {
throw new Error(`Invalid category. Expected one of: ${validCategories.join(", ")}`);
}
if (!Array.isArray(data.questions)) {
throw new Error("Invalid questions. Expected array of strings");
}
if (typeof data.summary !== "string" || data.summary.length < 10 || data.summary.length > 300) {
throw new Error("Invalid summary. Expected string between 10-300 characters");
}
if (typeof data.session_id !== "string") {
throw new Error("Invalid session_id. Expected string");
}
}
/**
* Process unprocessed sessions
*/
async function processUnprocessedSessions() {
process.stdout.write("[ProcessingScheduler] Starting to process unprocessed sessions...\n");
// Find sessions that have transcript content but haven't been processed
const sessionsToProcess = await prisma.session.findMany({
where: {
AND: [
{ transcriptContent: { not: null } },
{ transcriptContent: { not: "" } },
{ processed: { not: true } }, // Either false or null
],
},
select: {
id: true,
transcriptContent: true,
},
take: 10, // Process in batches to avoid overloading the system
});
if (sessionsToProcess.length === 0) {
process.stdout.write("[ProcessingScheduler] No sessions found requiring processing.\n");
return;
}
process.stdout.write(`[ProcessingScheduler] Found ${sessionsToProcess.length} sessions to process.\n`);
let successCount = 0;
let errorCount = 0;
for (const session of sessionsToProcess) {
if (!session.transcriptContent) {
// Should not happen due to query, but good for type safety
process.stderr.write(`[ProcessingScheduler] Session ${session.id} has no transcript content, skipping.\n`);
continue;
}
process.stdout.write(`[ProcessingScheduler] Processing transcript for session ${session.id}...\n`);
try {
const processedData = await processTranscriptWithOpenAI(
session.id,
session.transcriptContent
);
// Map sentiment string to float value for compatibility with existing data
const sentimentMap: Record<string, number> = {
positive: 0.8,
neutral: 0.0,
negative: -0.8,
};
// Update the session with processed data
await prisma.session.update({
where: { id: session.id },
data: {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: sentimentMap[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment,
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,
questions: JSON.stringify(processedData.questions),
summary: processedData.summary,
processed: true,
},
});
process.stdout.write(`[ProcessingScheduler] Successfully processed session ${session.id}.\n`);
successCount++;
} catch (error) {
process.stderr.write(`[ProcessingScheduler] Error processing session ${session.id}: ${error}\n`);
errorCount++;
}
}
process.stdout.write("[ProcessingScheduler] Session processing complete.\n");
process.stdout.write(`[ProcessingScheduler] Successfully processed: ${successCount} sessions.\n`);
process.stdout.write(`[ProcessingScheduler] Failed to process: ${errorCount} sessions.\n`);
}
/**
* Start the processing scheduler
*/
export function startProcessingScheduler() {
// Process unprocessed sessions every hour
cron.schedule("0 * * * *", async () => {
try {
await processUnprocessedSessions();
} catch (error) {
process.stderr.write(`[ProcessingScheduler] Error in scheduler: ${error}\n`);
}
});
process.stdout.write("[ProcessingScheduler] Started processing scheduler (runs hourly).\n");
}