Files
livedash-node/scripts/process_sessions.ts
Max Kowalski 9e095e1a43 Refactor code for improved readability and consistency
- Updated formatting in SessionDetails component for better readability.
- Enhanced documentation in scheduler-fixes.md to clarify issues and solutions.
- Improved error handling and logging in csvFetcher.js and processingScheduler.js.
- Standardized code formatting across various scripts and components for consistency.
- Added validation checks for CSV URLs and transcript content to prevent processing errors.
- Enhanced logging messages for better tracking of processing status and errors.
2025-06-25 17:46:23 +02:00

301 lines
8.4 KiB
TypeScript

import { PrismaClient } from "@prisma/client";
import fetch from "node-fetch";
const prisma = new PrismaClient();
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
// Define the expected response structure from OpenAI
interface OpenAIProcessedData {
language: string;
messages_sent: number;
sentiment: "positive" | "neutral" | "negative";
escalated: boolean;
forwarded_hr: boolean;
category: string;
questions: string[];
summary: string;
session_id: string;
}
/**
* Processes a session transcript using OpenAI API
* @param sessionId The session ID
* @param transcript The transcript content to process
* @returns Processed data from OpenAI
*/
async function processTranscriptWithOpenAI(
sessionId: string,
transcript: string
): Promise<OpenAIProcessedData> {
if (!OPENAI_API_KEY) {
throw new Error("OPENAI_API_KEY environment variable is not set");
}
// Create a system message with instructions
const systemMessage = `
You are an AI assistant tasked with analyzing chat transcripts.
Extract the following information from the transcript:
1. The primary language used by the user (ISO 639-1 code)
2. Number of messages sent by the user
3. Overall sentiment (positive, neutral, or negative)
4. Whether the conversation was escalated
5. Whether HR contact was mentioned or provided
6. The best-fitting category for the conversation from this list:
- Schedule & Hours
- Leave & Vacation
- Sick Leave & Recovery
- Salary & Compensation
- Contract & Hours
- Onboarding
- Offboarding
- Workwear & Staff Pass
- Team & Contacts
- Personal Questions
- Access & Login
- Social questions
- Unrecognized / Other
7. Up to 5 paraphrased questions asked by the user (in English)
8. A brief summary of the conversation (10-300 characters)
Return the data in JSON format matching this schema:
{
"language": "ISO 639-1 code",
"messages_sent": number,
"sentiment": "positive|neutral|negative",
"escalated": boolean,
"forwarded_hr": boolean,
"category": "one of the categories listed above",
"questions": ["question 1", "question 2", ...],
"summary": "brief summary",
"session_id": "${sessionId}"
}
`;
try {
const response = await fetch(OPENAI_API_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: "gpt-4-turbo",
messages: [
{
role: "system",
content: systemMessage,
},
{
role: "user",
content: transcript,
},
],
temperature: 0.3, // Lower temperature for more consistent results
response_format: { type: "json_object" },
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`OpenAI API error: ${response.status} - ${errorText}`);
}
const data = (await response.json()) as any;
const processedData = JSON.parse(data.choices[0].message.content);
// Validate the response against our expected schema
validateOpenAIResponse(processedData);
return processedData;
} catch (error) {
console.error(`Error processing transcript with OpenAI:`, error);
throw error;
}
}
/**
* Validates the OpenAI response against our expected schema
* @param data The data to validate
*/
function validateOpenAIResponse(
data: any
): asserts data is OpenAIProcessedData {
// Check required fields
const requiredFields = [
"language",
"messages_sent",
"sentiment",
"escalated",
"forwarded_hr",
"category",
"questions",
"summary",
"session_id",
];
for (const field of requiredFields) {
if (!(field in data)) {
throw new Error(`Missing required field: ${field}`);
}
}
// Validate field types
if (typeof data.language !== "string" || !/^[a-z]{2}$/.test(data.language)) {
throw new Error(
"Invalid language format. Expected ISO 639-1 code (e.g., 'en')"
);
}
if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
throw new Error("Invalid messages_sent. Expected non-negative number");
}
if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
throw new Error(
"Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
);
}
if (typeof data.escalated !== "boolean") {
throw new Error("Invalid escalated. Expected boolean");
}
if (typeof data.forwarded_hr !== "boolean") {
throw new Error("Invalid forwarded_hr. Expected boolean");
}
const validCategories = [
"Schedule & Hours",
"Leave & Vacation",
"Sick Leave & Recovery",
"Salary & Compensation",
"Contract & Hours",
"Onboarding",
"Offboarding",
"Workwear & Staff Pass",
"Team & Contacts",
"Personal Questions",
"Access & Login",
"Social questions",
"Unrecognized / Other",
];
if (!validCategories.includes(data.category)) {
throw new Error(
`Invalid category. Expected one of: ${validCategories.join(", ")}`
);
}
if (!Array.isArray(data.questions)) {
throw new Error("Invalid questions. Expected array of strings");
}
if (
typeof data.summary !== "string" ||
data.summary.length < 10 ||
data.summary.length > 300
) {
throw new Error(
"Invalid summary. Expected string between 10-300 characters"
);
}
if (typeof data.session_id !== "string") {
throw new Error("Invalid session_id. Expected string");
}
}
/**
* Main function to process unprocessed sessions
*/
async function processUnprocessedSessions() {
console.log("Starting to process unprocessed sessions...");
// Find sessions that have transcript content but haven't been processed
const sessionsToProcess = await prisma.session.findMany({
where: {
AND: [
{ transcriptContent: { not: null } },
{ transcriptContent: { not: "" } },
{ processed: { not: true } }, // Either false or null
],
},
select: {
id: true,
transcriptContent: true,
},
});
if (sessionsToProcess.length === 0) {
console.log("No sessions found requiring processing.");
return;
}
console.log(`Found ${sessionsToProcess.length} sessions to process.`);
let successCount = 0;
let errorCount = 0;
for (const session of sessionsToProcess) {
if (!session.transcriptContent) {
// Should not happen due to query, but good for type safety
console.warn(
`Session ${session.id} has no transcript content, skipping.`
);
continue;
}
console.log(`Processing transcript for session ${session.id}...`);
try {
const processedData = await processTranscriptWithOpenAI(
session.id,
session.transcriptContent
);
// Map sentiment string to float value for compatibility with existing data
const sentimentMap: Record<string, number> = {
positive: 0.8,
neutral: 0.0,
negative: -0.8,
};
// Update the session with processed data
await prisma.session.update({
where: { id: session.id },
data: {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: sentimentMap[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment,
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,
questions: JSON.stringify(processedData.questions),
summary: processedData.summary,
processed: true,
},
});
console.log(`Successfully processed session ${session.id}.`);
successCount++;
} catch (error) {
console.error(`Error processing session ${session.id}:`, error);
errorCount++;
}
}
console.log("Session processing complete.");
console.log(`Successfully processed: ${successCount} sessions.`);
console.log(`Failed to process: ${errorCount} sessions.`);
}
// Run the main function
processUnprocessedSessions()
.catch((e) => {
console.error("An error occurred during the script execution:", e);
process.exitCode = 1;
})
.finally(async () => {
await prisma.$disconnect();
});