mirror of
https://github.com/kjanat/livedash-node.git
synced 2026-01-16 11:32:13 +01:00
feat: Refactor data processing pipeline with AI cost tracking and enhanced session management
- Updated environment configuration to include Postgres database settings. - Enhanced import processing to minimize field copying and rely on AI for analysis. - Implemented detailed AI processing request tracking, including token usage and costs. - Added new models for Question and SessionQuestion to manage user inquiries separately. - Improved session processing scheduler with AI cost reporting functionality. - Created a test script to validate the refactored pipeline and display processing statistics. - Updated Prisma schema and migration files to reflect new database structure and relationships.
This commit is contained in:
@ -20,3 +20,7 @@ IMPORT_PROCESSING_BATCH_SIZE="50" # Number of imports to process at on
|
|||||||
SESSION_PROCESSING_INTERVAL="0 * * * *" # Cron expression for AI session processing (every hour)
|
SESSION_PROCESSING_INTERVAL="0 * * * *" # Cron expression for AI session processing (every hour)
|
||||||
SESSION_PROCESSING_BATCH_SIZE="0" # 0 = unlimited sessions, >0 = specific limit
|
SESSION_PROCESSING_BATCH_SIZE="0" # 0 = unlimited sessions, >0 = specific limit
|
||||||
SESSION_PROCESSING_CONCURRENCY="5" # How many sessions to process in parallel
|
SESSION_PROCESSING_CONCURRENCY="5" # How many sessions to process in parallel
|
||||||
|
|
||||||
|
# Postgres Database Configuration
|
||||||
|
DATABASE_URL_TEST="postgresql://"
|
||||||
|
DATABASE_URL="postgresql://"
|
||||||
@ -17,6 +17,10 @@ SESSION_PROCESSING_INTERVAL="0 * * * *" # Every hour (cron format) - AI pro
|
|||||||
SESSION_PROCESSING_BATCH_SIZE="0" # 0 = process all sessions, >0 = limit number
|
SESSION_PROCESSING_BATCH_SIZE="0" # 0 = process all sessions, >0 = limit number
|
||||||
SESSION_PROCESSING_CONCURRENCY="5" # Number of sessions to process in parallel
|
SESSION_PROCESSING_CONCURRENCY="5" # Number of sessions to process in parallel
|
||||||
|
|
||||||
|
# Postgres Database Configuration
|
||||||
|
DATABASE_URL_TEST="postgresql://"
|
||||||
|
DATABASE_URL="postgresql://"
|
||||||
|
|
||||||
# Example configurations:
|
# Example configurations:
|
||||||
# - For development (no schedulers): SCHEDULER_ENABLED=false
|
# - For development (no schedulers): SCHEDULER_ENABLED=false
|
||||||
# - For testing (every 5 minutes): CSV_IMPORT_INTERVAL=*/5 * * * *
|
# - For testing (every 5 minutes): CSV_IMPORT_INTERVAL=*/5 * * * *
|
||||||
|
|||||||
3
.gitignore
vendored
3
.gitignore
vendored
@ -261,3 +261,6 @@ Thumbs.db
|
|||||||
/playwright-report/
|
/playwright-report/
|
||||||
/blob-report/
|
/blob-report/
|
||||||
/playwright/.cache/
|
/playwright/.cache/
|
||||||
|
|
||||||
|
# OpenAI API request samples
|
||||||
|
sample-openai-request.json
|
||||||
@ -1,5 +1,5 @@
|
|||||||
// SessionImport to Session processor
|
// SessionImport to Session processor
|
||||||
import { PrismaClient, ImportStatus, SentimentCategory } from "@prisma/client";
|
import { PrismaClient, ImportStatus, SentimentCategory, SessionCategory } from "@prisma/client";
|
||||||
import { getSchedulerConfig } from "./env";
|
import { getSchedulerConfig } from "./env";
|
||||||
import { fetchTranscriptContent, isValidTranscriptUrl } from "./transcriptFetcher";
|
import { fetchTranscriptContent, isValidTranscriptUrl } from "./transcriptFetcher";
|
||||||
import cron from "node-cron";
|
import cron from "node-cron";
|
||||||
@ -38,8 +38,33 @@ function parseEuropeanDate(dateStr: string): Date {
|
|||||||
return date;
|
return date;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to parse sentiment from raw string (fallback only)
|
||||||
|
*/
|
||||||
|
function parseFallbackSentiment(sentimentRaw: string | null): SentimentCategory | null {
|
||||||
|
if (!sentimentRaw) return null;
|
||||||
|
|
||||||
|
const sentimentStr = sentimentRaw.toLowerCase();
|
||||||
|
if (sentimentStr.includes('positive')) {
|
||||||
|
return SentimentCategory.POSITIVE;
|
||||||
|
} else if (sentimentStr.includes('negative')) {
|
||||||
|
return SentimentCategory.NEGATIVE;
|
||||||
|
} else {
|
||||||
|
return SentimentCategory.NEUTRAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to parse boolean from raw string (fallback only)
|
||||||
|
*/
|
||||||
|
function parseFallbackBoolean(rawValue: string | null): boolean | null {
|
||||||
|
if (!rawValue) return null;
|
||||||
|
return ['true', '1', 'yes', 'escalated', 'forwarded'].includes(rawValue.toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process a single SessionImport record into a Session record
|
* Process a single SessionImport record into a Session record
|
||||||
|
* NEW STRATEGY: Only copy minimal fields, let AI processing handle the rest
|
||||||
*/
|
*/
|
||||||
async function processSingleImport(importRecord: any): Promise<{ success: boolean; error?: string }> {
|
async function processSingleImport(importRecord: any): Promise<{ success: boolean; error?: string }> {
|
||||||
try {
|
try {
|
||||||
@ -49,34 +74,6 @@ async function processSingleImport(importRecord: any): Promise<{ success: boolea
|
|||||||
|
|
||||||
console.log(`[Import Processor] Parsed dates for ${importRecord.externalSessionId}: ${startTime.toISOString()} - ${endTime.toISOString()}`);
|
console.log(`[Import Processor] Parsed dates for ${importRecord.externalSessionId}: ${startTime.toISOString()} - ${endTime.toISOString()}`);
|
||||||
|
|
||||||
// Process sentiment
|
|
||||||
let sentiment: number | null = null;
|
|
||||||
let sentimentCategory: SentimentCategory | null = null;
|
|
||||||
|
|
||||||
if (importRecord.sentimentRaw) {
|
|
||||||
const sentimentStr = importRecord.sentimentRaw.toLowerCase();
|
|
||||||
if (sentimentStr.includes('positive')) {
|
|
||||||
sentiment = 0.8;
|
|
||||||
sentimentCategory = SentimentCategory.POSITIVE;
|
|
||||||
} else if (sentimentStr.includes('negative')) {
|
|
||||||
sentiment = -0.8;
|
|
||||||
sentimentCategory = SentimentCategory.NEGATIVE;
|
|
||||||
} else {
|
|
||||||
sentiment = 0.0;
|
|
||||||
sentimentCategory = SentimentCategory.NEUTRAL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process boolean fields
|
|
||||||
const escalated = importRecord.escalatedRaw ?
|
|
||||||
['true', '1', 'yes', 'escalated'].includes(importRecord.escalatedRaw.toLowerCase()) : null;
|
|
||||||
|
|
||||||
const forwardedHr = importRecord.forwardedHrRaw ?
|
|
||||||
['true', '1', 'yes', 'forwarded'].includes(importRecord.forwardedHrRaw.toLowerCase()) : null;
|
|
||||||
|
|
||||||
// Keep country code as-is, will be processed by OpenAI later
|
|
||||||
const country = importRecord.countryCode;
|
|
||||||
|
|
||||||
// Fetch transcript content if URL is provided and not already fetched
|
// Fetch transcript content if URL is provided and not already fetched
|
||||||
let transcriptContent = importRecord.rawTranscriptContent;
|
let transcriptContent = importRecord.rawTranscriptContent;
|
||||||
if (!transcriptContent && importRecord.fullTranscriptUrl && isValidTranscriptUrl(importRecord.fullTranscriptUrl)) {
|
if (!transcriptContent && importRecord.fullTranscriptUrl && isValidTranscriptUrl(importRecord.fullTranscriptUrl)) {
|
||||||
@ -108,7 +105,8 @@ async function processSingleImport(importRecord: any): Promise<{ success: boolea
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create or update Session record
|
// Create or update Session record with MINIMAL processing
|
||||||
|
// Only copy fields that don't need AI analysis
|
||||||
const session = await prisma.session.upsert({
|
const session = await prisma.session.upsert({
|
||||||
where: {
|
where: {
|
||||||
importId: importRecord.id,
|
importId: importRecord.id,
|
||||||
@ -116,20 +114,22 @@ async function processSingleImport(importRecord: any): Promise<{ success: boolea
|
|||||||
update: {
|
update: {
|
||||||
startTime,
|
startTime,
|
||||||
endTime,
|
endTime,
|
||||||
|
// Direct copies (minimal processing)
|
||||||
ipAddress: importRecord.ipAddress,
|
ipAddress: importRecord.ipAddress,
|
||||||
country,
|
country: importRecord.countryCode, // Keep as country code
|
||||||
language: importRecord.language,
|
|
||||||
messagesSent: importRecord.messagesSent,
|
|
||||||
sentiment,
|
|
||||||
sentimentCategory,
|
|
||||||
escalated,
|
|
||||||
forwardedHr,
|
|
||||||
fullTranscriptUrl: importRecord.fullTranscriptUrl,
|
fullTranscriptUrl: importRecord.fullTranscriptUrl,
|
||||||
avgResponseTime: importRecord.avgResponseTimeSeconds,
|
avgResponseTime: importRecord.avgResponseTimeSeconds,
|
||||||
tokens: importRecord.tokens,
|
|
||||||
tokensEur: importRecord.tokensEur,
|
|
||||||
category: importRecord.category,
|
|
||||||
initialMsg: importRecord.initialMessage,
|
initialMsg: importRecord.initialMessage,
|
||||||
|
|
||||||
|
// AI-processed fields: Leave empty, will be filled by AI processing
|
||||||
|
// language: null, // AI will detect
|
||||||
|
// messagesSent: null, // AI will count from Messages
|
||||||
|
// sentiment: null, // AI will analyze
|
||||||
|
// escalated: null, // AI will detect
|
||||||
|
// forwardedHr: null, // AI will detect
|
||||||
|
// category: null, // AI will categorize
|
||||||
|
// summary: null, // AI will generate
|
||||||
|
|
||||||
processed: false, // Will be processed later by AI
|
processed: false, // Will be processed later by AI
|
||||||
},
|
},
|
||||||
create: {
|
create: {
|
||||||
@ -137,20 +137,15 @@ async function processSingleImport(importRecord: any): Promise<{ success: boolea
|
|||||||
importId: importRecord.id,
|
importId: importRecord.id,
|
||||||
startTime,
|
startTime,
|
||||||
endTime,
|
endTime,
|
||||||
|
// Direct copies (minimal processing)
|
||||||
ipAddress: importRecord.ipAddress,
|
ipAddress: importRecord.ipAddress,
|
||||||
country,
|
country: importRecord.countryCode, // Keep as country code
|
||||||
language: importRecord.language,
|
|
||||||
messagesSent: importRecord.messagesSent,
|
|
||||||
sentiment,
|
|
||||||
sentimentCategory,
|
|
||||||
escalated,
|
|
||||||
forwardedHr,
|
|
||||||
fullTranscriptUrl: importRecord.fullTranscriptUrl,
|
fullTranscriptUrl: importRecord.fullTranscriptUrl,
|
||||||
avgResponseTime: importRecord.avgResponseTimeSeconds,
|
avgResponseTime: importRecord.avgResponseTimeSeconds,
|
||||||
tokens: importRecord.tokens,
|
|
||||||
tokensEur: importRecord.tokensEur,
|
|
||||||
category: importRecord.category,
|
|
||||||
initialMsg: importRecord.initialMessage,
|
initialMsg: importRecord.initialMessage,
|
||||||
|
|
||||||
|
// AI-processed fields: Leave empty, will be filled by AI processing
|
||||||
|
// All these will be null initially and filled by AI
|
||||||
processed: false, // Will be processed later by AI
|
processed: false, // Will be processed later by AI
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
// Session processing scheduler with configurable intervals and batch sizes
|
// Enhanced session processing scheduler with AI cost tracking and question management
|
||||||
import cron from "node-cron";
|
import cron from "node-cron";
|
||||||
import { PrismaClient } from "@prisma/client";
|
import { PrismaClient, SentimentCategory, SessionCategory } from "@prisma/client";
|
||||||
import fetch from "node-fetch";
|
import fetch from "node-fetch";
|
||||||
import { getSchedulerConfig } from "./schedulerConfig";
|
import { getSchedulerConfig } from "./schedulerConfig";
|
||||||
|
|
||||||
@ -8,13 +8,30 @@ const prisma = new PrismaClient();
|
|||||||
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
|
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
|
||||||
const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
|
const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
|
||||||
|
|
||||||
|
// Model pricing in USD (update as needed)
|
||||||
|
const MODEL_PRICING = {
|
||||||
|
'gpt-4o-2024-08-06': {
|
||||||
|
promptTokenCost: 0.0000025, // $2.50 per 1M tokens
|
||||||
|
completionTokenCost: 0.00001, // $10.00 per 1M tokens
|
||||||
|
},
|
||||||
|
'gpt-4-turbo': {
|
||||||
|
promptTokenCost: 0.00001, // $10.00 per 1M tokens
|
||||||
|
completionTokenCost: 0.00003, // $30.00 per 1M tokens
|
||||||
|
},
|
||||||
|
'gpt-4o': {
|
||||||
|
promptTokenCost: 0.000005, // $5.00 per 1M tokens
|
||||||
|
completionTokenCost: 0.000015, // $15.00 per 1M tokens
|
||||||
|
}
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
const USD_TO_EUR_RATE = 0.85; // Update periodically or fetch from API
|
||||||
|
|
||||||
interface ProcessedData {
|
interface ProcessedData {
|
||||||
language: string;
|
language: string;
|
||||||
messages_sent: number;
|
sentiment: "POSITIVE" | "NEUTRAL" | "NEGATIVE";
|
||||||
sentiment: "positive" | "neutral" | "negative";
|
|
||||||
escalated: boolean;
|
escalated: boolean;
|
||||||
forwarded_hr: boolean;
|
forwarded_hr: boolean;
|
||||||
category: string;
|
category: "SCHEDULE_HOURS" | "LEAVE_VACATION" | "SICK_LEAVE_RECOVERY" | "SALARY_COMPENSATION" | "CONTRACT_HOURS" | "ONBOARDING" | "OFFBOARDING" | "WORKWEAR_STAFF_PASS" | "TEAM_CONTACTS" | "PERSONAL_QUESTIONS" | "ACCESS_LOGIN" | "SOCIAL_QUESTIONS" | "UNRECOGNIZED_OTHER";
|
||||||
questions: string[];
|
questions: string[];
|
||||||
summary: string;
|
summary: string;
|
||||||
session_id: string;
|
session_id: string;
|
||||||
@ -26,6 +43,137 @@ interface ProcessingResult {
|
|||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record AI processing request with detailed token tracking
|
||||||
|
*/
|
||||||
|
async function recordAIProcessingRequest(
|
||||||
|
sessionId: string,
|
||||||
|
openaiResponse: any,
|
||||||
|
processingType: string = 'session_analysis'
|
||||||
|
): Promise<void> {
|
||||||
|
const usage = openaiResponse.usage;
|
||||||
|
const model = openaiResponse.model;
|
||||||
|
const pricing = MODEL_PRICING[model as keyof typeof MODEL_PRICING] || MODEL_PRICING['gpt-4-turbo']; // fallback
|
||||||
|
|
||||||
|
const promptCost = usage.prompt_tokens * pricing.promptTokenCost;
|
||||||
|
const completionCost = usage.completion_tokens * pricing.completionTokenCost;
|
||||||
|
const totalCostUsd = promptCost + completionCost;
|
||||||
|
const totalCostEur = totalCostUsd * USD_TO_EUR_RATE;
|
||||||
|
|
||||||
|
await prisma.aIProcessingRequest.create({
|
||||||
|
data: {
|
||||||
|
sessionId,
|
||||||
|
openaiRequestId: openaiResponse.id,
|
||||||
|
model: openaiResponse.model,
|
||||||
|
serviceTier: openaiResponse.service_tier,
|
||||||
|
systemFingerprint: openaiResponse.system_fingerprint,
|
||||||
|
|
||||||
|
promptTokens: usage.prompt_tokens,
|
||||||
|
completionTokens: usage.completion_tokens,
|
||||||
|
totalTokens: usage.total_tokens,
|
||||||
|
|
||||||
|
// Detailed breakdown
|
||||||
|
cachedTokens: usage.prompt_tokens_details?.cached_tokens || null,
|
||||||
|
audioTokensPrompt: usage.prompt_tokens_details?.audio_tokens || null,
|
||||||
|
reasoningTokens: usage.completion_tokens_details?.reasoning_tokens || null,
|
||||||
|
audioTokensCompletion: usage.completion_tokens_details?.audio_tokens || null,
|
||||||
|
acceptedPredictionTokens: usage.completion_tokens_details?.accepted_prediction_tokens || null,
|
||||||
|
rejectedPredictionTokens: usage.completion_tokens_details?.rejected_prediction_tokens || null,
|
||||||
|
|
||||||
|
promptTokenCost: pricing.promptTokenCost,
|
||||||
|
completionTokenCost: pricing.completionTokenCost,
|
||||||
|
totalCostEur,
|
||||||
|
|
||||||
|
processingType,
|
||||||
|
success: true,
|
||||||
|
completedAt: new Date(),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record failed AI processing request
|
||||||
|
*/
|
||||||
|
async function recordFailedAIProcessingRequest(
|
||||||
|
sessionId: string,
|
||||||
|
processingType: string,
|
||||||
|
errorMessage: string
|
||||||
|
): Promise<void> {
|
||||||
|
await prisma.aIProcessingRequest.create({
|
||||||
|
data: {
|
||||||
|
sessionId,
|
||||||
|
model: 'unknown',
|
||||||
|
promptTokens: 0,
|
||||||
|
completionTokens: 0,
|
||||||
|
totalTokens: 0,
|
||||||
|
promptTokenCost: 0,
|
||||||
|
completionTokenCost: 0,
|
||||||
|
totalCostEur: 0,
|
||||||
|
processingType,
|
||||||
|
success: false,
|
||||||
|
errorMessage,
|
||||||
|
completedAt: new Date(),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process questions into separate Question and SessionQuestion tables
|
||||||
|
*/
|
||||||
|
async function processQuestions(sessionId: string, questions: string[]): Promise<void> {
|
||||||
|
// Clear existing questions for this session
|
||||||
|
await prisma.sessionQuestion.deleteMany({
|
||||||
|
where: { sessionId }
|
||||||
|
});
|
||||||
|
|
||||||
|
// Process each question
|
||||||
|
for (let index = 0; index < questions.length; index++) {
|
||||||
|
const questionText = questions[index];
|
||||||
|
if (!questionText.trim()) continue; // Skip empty questions
|
||||||
|
|
||||||
|
// Find or create question
|
||||||
|
const question = await prisma.question.upsert({
|
||||||
|
where: { content: questionText.trim() },
|
||||||
|
create: { content: questionText.trim() },
|
||||||
|
update: {}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Link to session
|
||||||
|
await prisma.sessionQuestion.create({
|
||||||
|
data: {
|
||||||
|
sessionId,
|
||||||
|
questionId: question.id,
|
||||||
|
order: index
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate messagesSent from actual Message records
|
||||||
|
*/
|
||||||
|
async function calculateMessagesSent(sessionId: string): Promise<number> {
|
||||||
|
const userMessageCount = await prisma.message.count({
|
||||||
|
where: {
|
||||||
|
sessionId,
|
||||||
|
role: { in: ['user', 'User'] } // Handle both cases
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return userMessageCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate endTime from latest Message timestamp
|
||||||
|
*/
|
||||||
|
async function calculateEndTime(sessionId: string, fallbackEndTime: Date): Promise<Date> {
|
||||||
|
const latestMessage = await prisma.message.findFirst({
|
||||||
|
where: { sessionId },
|
||||||
|
orderBy: { timestamp: 'desc' }
|
||||||
|
});
|
||||||
|
|
||||||
|
return latestMessage?.timestamp || fallbackEndTime;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Processes a session transcript using OpenAI API
|
* Processes a session transcript using OpenAI API
|
||||||
*/
|
*/
|
||||||
@ -34,44 +182,32 @@ async function processTranscriptWithOpenAI(sessionId: string, transcript: string
|
|||||||
throw new Error("OPENAI_API_KEY environment variable is not set");
|
throw new Error("OPENAI_API_KEY environment variable is not set");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a system message with instructions
|
// Updated system message with exact enum values
|
||||||
const systemMessage = `
|
const systemMessage = `
|
||||||
You are an AI assistant tasked with analyzing chat transcripts.
|
You are an AI assistant tasked with analyzing chat transcripts.
|
||||||
Extract the following information from the transcript:
|
Extract the following information from the transcript and return it in EXACT JSON format:
|
||||||
1. The primary language used by the user (ISO 639-1 code)
|
|
||||||
2. Number of messages sent by the user
|
|
||||||
3. Overall sentiment (positive, neutral, or negative)
|
|
||||||
4. Whether the conversation was escalated
|
|
||||||
5. Whether HR contact was mentioned or provided
|
|
||||||
6. The best-fitting category for the conversation from this list:
|
|
||||||
- Schedule & Hours
|
|
||||||
- Leave & Vacation
|
|
||||||
- Sick Leave & Recovery
|
|
||||||
- Salary & Compensation
|
|
||||||
- Contract & Hours
|
|
||||||
- Onboarding
|
|
||||||
- Offboarding
|
|
||||||
- Workwear & Staff Pass
|
|
||||||
- Team & Contacts
|
|
||||||
- Personal Questions
|
|
||||||
- Access & Login
|
|
||||||
- Social questions
|
|
||||||
- Unrecognized / Other
|
|
||||||
7. Up to 5 paraphrased questions asked by the user (in English)
|
|
||||||
8. A brief summary of the conversation (10-300 characters)
|
|
||||||
|
|
||||||
Return the data in JSON format matching this schema:
|
|
||||||
{
|
{
|
||||||
"language": "ISO 639-1 code",
|
"language": "ISO 639-1 code (e.g., 'en', 'nl', 'de')",
|
||||||
"messages_sent": number,
|
"sentiment": "POSITIVE|NEUTRAL|NEGATIVE",
|
||||||
"sentiment": "positive|neutral|negative",
|
|
||||||
"escalated": boolean,
|
"escalated": boolean,
|
||||||
"forwarded_hr": boolean,
|
"forwarded_hr": boolean,
|
||||||
"category": "one of the categories listed above",
|
"category": "SCHEDULE_HOURS|LEAVE_VACATION|SICK_LEAVE_RECOVERY|SALARY_COMPENSATION|CONTRACT_HOURS|ONBOARDING|OFFBOARDING|WORKWEAR_STAFF_PASS|TEAM_CONTACTS|PERSONAL_QUESTIONS|ACCESS_LOGIN|SOCIAL_QUESTIONS|UNRECOGNIZED_OTHER",
|
||||||
"questions": ["question 1", "question 2", ...],
|
"questions": ["question 1", "question 2", ...],
|
||||||
"summary": "brief summary",
|
"summary": "brief summary (10-300 chars)",
|
||||||
"session_id": "${sessionId}"
|
"session_id": "${sessionId}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- language: Primary language used by the user (ISO 639-1 code)
|
||||||
|
- sentiment: Overall emotional tone of the conversation
|
||||||
|
- escalated: Was the issue escalated to a supervisor/manager?
|
||||||
|
- forwarded_hr: Was HR contact mentioned or provided?
|
||||||
|
- category: Best fitting category for the main topic (use exact enum values above)
|
||||||
|
- questions: Up to 5 paraphrased user questions (in English)
|
||||||
|
- summary: Brief conversation summary (10-300 characters)
|
||||||
|
|
||||||
|
IMPORTANT: Use EXACT enum values as specified above.
|
||||||
`;
|
`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -82,7 +218,7 @@ async function processTranscriptWithOpenAI(sessionId: string, transcript: string
|
|||||||
Authorization: `Bearer ${OPENAI_API_KEY}`,
|
Authorization: `Bearer ${OPENAI_API_KEY}`,
|
||||||
},
|
},
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: "gpt-4-turbo",
|
model: "gpt-4o", // Use latest model
|
||||||
messages: [
|
messages: [
|
||||||
{
|
{
|
||||||
role: "system",
|
role: "system",
|
||||||
@ -103,14 +239,25 @@ async function processTranscriptWithOpenAI(sessionId: string, transcript: string
|
|||||||
throw new Error(`OpenAI API error: ${response.status} - ${errorText}`);
|
throw new Error(`OpenAI API error: ${response.status} - ${errorText}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const data: any = await response.json();
|
const openaiResponse: any = await response.json();
|
||||||
const processedData = JSON.parse(data.choices[0].message.content);
|
|
||||||
|
// Record the AI processing request for cost tracking
|
||||||
|
await recordAIProcessingRequest(sessionId, openaiResponse, 'session_analysis');
|
||||||
|
|
||||||
|
const processedData = JSON.parse(openaiResponse.choices[0].message.content);
|
||||||
|
|
||||||
// Validate the response against our expected schema
|
// Validate the response against our expected schema
|
||||||
validateOpenAIResponse(processedData);
|
validateOpenAIResponse(processedData);
|
||||||
|
|
||||||
return processedData;
|
return processedData;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
// Record failed request
|
||||||
|
await recordFailedAIProcessingRequest(
|
||||||
|
sessionId,
|
||||||
|
'session_analysis',
|
||||||
|
error instanceof Error ? error.message : String(error)
|
||||||
|
);
|
||||||
|
|
||||||
process.stderr.write(`Error processing transcript with OpenAI: ${error}\n`);
|
process.stderr.write(`Error processing transcript with OpenAI: ${error}\n`);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
@ -120,17 +267,9 @@ async function processTranscriptWithOpenAI(sessionId: string, transcript: string
|
|||||||
* Validates the OpenAI response against our expected schema
|
* Validates the OpenAI response against our expected schema
|
||||||
*/
|
*/
|
||||||
function validateOpenAIResponse(data: any): void {
|
function validateOpenAIResponse(data: any): void {
|
||||||
// Check required fields
|
|
||||||
const requiredFields = [
|
const requiredFields = [
|
||||||
"language",
|
"language", "sentiment", "escalated", "forwarded_hr",
|
||||||
"messages_sent",
|
"category", "questions", "summary", "session_id"
|
||||||
"sentiment",
|
|
||||||
"escalated",
|
|
||||||
"forwarded_hr",
|
|
||||||
"category",
|
|
||||||
"questions",
|
|
||||||
"summary",
|
|
||||||
"session_id",
|
|
||||||
];
|
];
|
||||||
|
|
||||||
for (const field of requiredFields) {
|
for (const field of requiredFields) {
|
||||||
@ -139,21 +278,13 @@ function validateOpenAIResponse(data: any): void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate field types
|
// Validate field types and values
|
||||||
if (typeof data.language !== "string" || !/^[a-z]{2}$/.test(data.language)) {
|
if (typeof data.language !== "string" || !/^[a-z]{2}$/.test(data.language)) {
|
||||||
throw new Error(
|
throw new Error("Invalid language format. Expected ISO 639-1 code (e.g., 'en')");
|
||||||
"Invalid language format. Expected ISO 639-1 code (e.g., 'en')"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
|
if (!["POSITIVE", "NEUTRAL", "NEGATIVE"].includes(data.sentiment)) {
|
||||||
throw new Error("Invalid messages_sent. Expected non-negative number");
|
throw new Error("Invalid sentiment. Expected 'POSITIVE', 'NEUTRAL', or 'NEGATIVE'");
|
||||||
}
|
|
||||||
|
|
||||||
if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
|
|
||||||
throw new Error(
|
|
||||||
"Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (typeof data.escalated !== "boolean") {
|
if (typeof data.escalated !== "boolean") {
|
||||||
@ -165,39 +296,22 @@ function validateOpenAIResponse(data: any): void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const validCategories = [
|
const validCategories = [
|
||||||
"Schedule & Hours",
|
"SCHEDULE_HOURS", "LEAVE_VACATION", "SICK_LEAVE_RECOVERY", "SALARY_COMPENSATION",
|
||||||
"Leave & Vacation",
|
"CONTRACT_HOURS", "ONBOARDING", "OFFBOARDING", "WORKWEAR_STAFF_PASS",
|
||||||
"Sick Leave & Recovery",
|
"TEAM_CONTACTS", "PERSONAL_QUESTIONS", "ACCESS_LOGIN", "SOCIAL_QUESTIONS",
|
||||||
"Salary & Compensation",
|
"UNRECOGNIZED_OTHER"
|
||||||
"Contract & Hours",
|
|
||||||
"Onboarding",
|
|
||||||
"Offboarding",
|
|
||||||
"Workwear & Staff Pass",
|
|
||||||
"Team & Contacts",
|
|
||||||
"Personal Questions",
|
|
||||||
"Access & Login",
|
|
||||||
"Social questions",
|
|
||||||
"Unrecognized / Other",
|
|
||||||
];
|
];
|
||||||
|
|
||||||
if (!validCategories.includes(data.category)) {
|
if (!validCategories.includes(data.category)) {
|
||||||
throw new Error(
|
throw new Error(`Invalid category. Expected one of: ${validCategories.join(", ")}`);
|
||||||
`Invalid category. Expected one of: ${validCategories.join(", ")}`
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Array.isArray(data.questions)) {
|
if (!Array.isArray(data.questions)) {
|
||||||
throw new Error("Invalid questions. Expected array of strings");
|
throw new Error("Invalid questions. Expected array of strings");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (
|
if (typeof data.summary !== "string" || data.summary.length < 10 || data.summary.length > 300) {
|
||||||
typeof data.summary !== "string" ||
|
throw new Error("Invalid summary. Expected string between 10-300 characters");
|
||||||
data.summary.length < 10 ||
|
|
||||||
data.summary.length > 300
|
|
||||||
) {
|
|
||||||
throw new Error(
|
|
||||||
"Invalid summary. Expected string between 10-300 characters"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (typeof data.session_id !== "string") {
|
if (typeof data.session_id !== "string") {
|
||||||
@ -220,45 +334,42 @@ async function processSingleSession(session: any): Promise<ProcessingResult> {
|
|||||||
try {
|
try {
|
||||||
// Convert messages back to transcript format for OpenAI processing
|
// Convert messages back to transcript format for OpenAI processing
|
||||||
const transcript = session.messages
|
const transcript = session.messages
|
||||||
.map(
|
.map((msg: any) =>
|
||||||
(msg: any) =>
|
`[${new Date(msg.timestamp)
|
||||||
`[${new Date(msg.timestamp)
|
.toLocaleString("en-GB", {
|
||||||
.toLocaleString("en-GB", {
|
day: "2-digit",
|
||||||
day: "2-digit",
|
month: "2-digit",
|
||||||
month: "2-digit",
|
year: "numeric",
|
||||||
year: "numeric",
|
hour: "2-digit",
|
||||||
hour: "2-digit",
|
minute: "2-digit",
|
||||||
minute: "2-digit",
|
second: "2-digit",
|
||||||
second: "2-digit",
|
})
|
||||||
})
|
.replace(",", "")}] ${msg.role}: ${msg.content}`
|
||||||
.replace(",", "")}] ${msg.role}: ${msg.content}`
|
|
||||||
)
|
)
|
||||||
.join("\n");
|
.join("\n");
|
||||||
|
|
||||||
const processedData = await processTranscriptWithOpenAI(
|
const processedData = await processTranscriptWithOpenAI(session.id, transcript);
|
||||||
session.id,
|
|
||||||
transcript
|
|
||||||
);
|
|
||||||
|
|
||||||
// Map sentiment string to float value for compatibility with existing data
|
// Calculate messagesSent from actual Message records
|
||||||
const sentimentMap = {
|
const messagesSent = await calculateMessagesSent(session.id);
|
||||||
positive: 0.8,
|
|
||||||
neutral: 0.0,
|
// Calculate endTime from latest Message timestamp
|
||||||
negative: -0.8,
|
const calculatedEndTime = await calculateEndTime(session.id, session.endTime);
|
||||||
};
|
|
||||||
|
// Process questions into separate tables
|
||||||
|
await processQuestions(session.id, processedData.questions);
|
||||||
|
|
||||||
// Update the session with processed data
|
// Update the session with processed data
|
||||||
await prisma.session.update({
|
await prisma.session.update({
|
||||||
where: { id: session.id },
|
where: { id: session.id },
|
||||||
data: {
|
data: {
|
||||||
language: processedData.language,
|
language: processedData.language,
|
||||||
messagesSent: processedData.messages_sent,
|
messagesSent: messagesSent, // Calculated from Messages, not AI
|
||||||
sentiment: sentimentMap[processedData.sentiment] || 0,
|
endTime: calculatedEndTime, // Use calculated endTime if different
|
||||||
sentimentCategory: processedData.sentiment.toUpperCase() as "POSITIVE" | "NEUTRAL" | "NEGATIVE",
|
sentiment: processedData.sentiment as SentimentCategory,
|
||||||
escalated: processedData.escalated,
|
escalated: processedData.escalated,
|
||||||
forwardedHr: processedData.forwarded_hr,
|
forwardedHr: processedData.forwarded_hr,
|
||||||
category: processedData.category,
|
category: processedData.category as SessionCategory,
|
||||||
questions: JSON.stringify(processedData.questions),
|
|
||||||
summary: processedData.summary,
|
summary: processedData.summary,
|
||||||
processed: true,
|
processed: true,
|
||||||
},
|
},
|
||||||
@ -313,9 +424,7 @@ async function processSessionsInParallel(sessions: any[], maxConcurrency: number
|
|||||||
* Process unprocessed sessions
|
* Process unprocessed sessions
|
||||||
*/
|
*/
|
||||||
export async function processUnprocessedSessions(batchSize: number | null = null, maxConcurrency: number = 5): Promise<void> {
|
export async function processUnprocessedSessions(batchSize: number | null = null, maxConcurrency: number = 5): Promise<void> {
|
||||||
process.stdout.write(
|
process.stdout.write("[ProcessingScheduler] Starting to process unprocessed sessions...\n");
|
||||||
"[ProcessingScheduler] Starting to process unprocessed sessions...\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
// Find sessions that have messages but haven't been processed
|
// Find sessions that have messages but haven't been processed
|
||||||
const queryOptions: any = {
|
const queryOptions: any = {
|
||||||
@ -345,9 +454,7 @@ export async function processUnprocessedSessions(batchSize: number | null = null
|
|||||||
);
|
);
|
||||||
|
|
||||||
if (sessionsWithMessages.length === 0) {
|
if (sessionsWithMessages.length === 0) {
|
||||||
process.stdout.write(
|
process.stdout.write("[ProcessingScheduler] No sessions found requiring processing.\n");
|
||||||
"[ProcessingScheduler] No sessions found requiring processing.\n"
|
|
||||||
);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -363,15 +470,46 @@ export async function processUnprocessedSessions(batchSize: number | null = null
|
|||||||
const errorCount = results.filter((r) => !r.success).length;
|
const errorCount = results.filter((r) => !r.success).length;
|
||||||
|
|
||||||
process.stdout.write("[ProcessingScheduler] Session processing complete.\n");
|
process.stdout.write("[ProcessingScheduler] Session processing complete.\n");
|
||||||
process.stdout.write(
|
process.stdout.write(`[ProcessingScheduler] Successfully processed: ${successCount} sessions.\n`);
|
||||||
`[ProcessingScheduler] Successfully processed: ${successCount} sessions.\n`
|
process.stdout.write(`[ProcessingScheduler] Failed to process: ${errorCount} sessions.\n`);
|
||||||
);
|
process.stdout.write(`[ProcessingScheduler] Total processing time: ${((endTime - startTime) / 1000).toFixed(2)}s\n`);
|
||||||
process.stdout.write(
|
}
|
||||||
`[ProcessingScheduler] Failed to process: ${errorCount} sessions.\n`
|
|
||||||
);
|
/**
|
||||||
process.stdout.write(
|
* Get total AI processing costs for reporting
|
||||||
`[ProcessingScheduler] Total processing time: ${((endTime - startTime) / 1000).toFixed(2)}s\n`
|
*/
|
||||||
);
|
export async function getAIProcessingCosts(): Promise<{
|
||||||
|
totalCostEur: number;
|
||||||
|
totalTokens: number;
|
||||||
|
requestCount: number;
|
||||||
|
successfulRequests: number;
|
||||||
|
failedRequests: number;
|
||||||
|
}> {
|
||||||
|
const result = await prisma.aIProcessingRequest.aggregate({
|
||||||
|
_sum: {
|
||||||
|
totalCostEur: true,
|
||||||
|
totalTokens: true,
|
||||||
|
},
|
||||||
|
_count: {
|
||||||
|
id: true,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const successfulRequests = await prisma.aIProcessingRequest.count({
|
||||||
|
where: { success: true }
|
||||||
|
});
|
||||||
|
|
||||||
|
const failedRequests = await prisma.aIProcessingRequest.count({
|
||||||
|
where: { success: false }
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
totalCostEur: result._sum.totalCostEur || 0,
|
||||||
|
totalTokens: result._sum.totalTokens || 0,
|
||||||
|
requestCount: result._count.id || 0,
|
||||||
|
successfulRequests,
|
||||||
|
failedRequests,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -396,9 +534,7 @@ export function startProcessingScheduler(): void {
|
|||||||
config.sessionProcessing.concurrency
|
config.sessionProcessing.concurrency
|
||||||
);
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
process.stderr.write(
|
process.stderr.write(`[ProcessingScheduler] Error in scheduler: ${error}\n`);
|
||||||
`[ProcessingScheduler] Error in scheduler: ${error}\n`
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,10 +5,8 @@ const nextConfig = {
|
|||||||
reactStrictMode: true,
|
reactStrictMode: true,
|
||||||
// Allow cross-origin requests from specific origins in development
|
// Allow cross-origin requests from specific origins in development
|
||||||
allowedDevOrigins: [
|
allowedDevOrigins: [
|
||||||
"192.168.1.2",
|
|
||||||
"localhost",
|
"localhost",
|
||||||
"propc",
|
"127.0.0.1"
|
||||||
"test123.kjanat.com",
|
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -0,0 +1,183 @@
|
|||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "Company" (
|
||||||
|
"id" TEXT NOT NULL PRIMARY KEY,
|
||||||
|
"name" TEXT NOT NULL,
|
||||||
|
"csvUrl" TEXT NOT NULL,
|
||||||
|
"csvUsername" TEXT,
|
||||||
|
"csvPassword" TEXT,
|
||||||
|
"sentimentAlert" REAL,
|
||||||
|
"dashboardOpts" JSONB,
|
||||||
|
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updatedAt" DATETIME NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "User" (
|
||||||
|
"id" TEXT NOT NULL PRIMARY KEY,
|
||||||
|
"email" TEXT NOT NULL,
|
||||||
|
"password" TEXT NOT NULL,
|
||||||
|
"role" TEXT NOT NULL DEFAULT 'USER',
|
||||||
|
"companyId" TEXT NOT NULL,
|
||||||
|
"resetToken" TEXT,
|
||||||
|
"resetTokenExpiry" DATETIME,
|
||||||
|
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updatedAt" DATETIME NOT NULL,
|
||||||
|
CONSTRAINT "User_companyId_fkey" FOREIGN KEY ("companyId") REFERENCES "Company" ("id") ON DELETE CASCADE ON UPDATE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "Session" (
|
||||||
|
"id" TEXT NOT NULL PRIMARY KEY,
|
||||||
|
"companyId" TEXT NOT NULL,
|
||||||
|
"importId" TEXT,
|
||||||
|
"startTime" DATETIME NOT NULL,
|
||||||
|
"endTime" DATETIME NOT NULL,
|
||||||
|
"ipAddress" TEXT,
|
||||||
|
"country" TEXT,
|
||||||
|
"fullTranscriptUrl" TEXT,
|
||||||
|
"avgResponseTime" REAL,
|
||||||
|
"initialMsg" TEXT,
|
||||||
|
"language" TEXT,
|
||||||
|
"messagesSent" INTEGER,
|
||||||
|
"sentiment" TEXT,
|
||||||
|
"escalated" BOOLEAN,
|
||||||
|
"forwardedHr" BOOLEAN,
|
||||||
|
"category" TEXT,
|
||||||
|
"summary" TEXT,
|
||||||
|
"processed" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updatedAt" DATETIME NOT NULL,
|
||||||
|
CONSTRAINT "Session_companyId_fkey" FOREIGN KEY ("companyId") REFERENCES "Company" ("id") ON DELETE CASCADE ON UPDATE CASCADE,
|
||||||
|
CONSTRAINT "Session_importId_fkey" FOREIGN KEY ("importId") REFERENCES "SessionImport" ("id") ON DELETE SET NULL ON UPDATE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "SessionImport" (
|
||||||
|
"id" TEXT NOT NULL PRIMARY KEY,
|
||||||
|
"companyId" TEXT NOT NULL,
|
||||||
|
"externalSessionId" TEXT NOT NULL,
|
||||||
|
"startTimeRaw" TEXT NOT NULL,
|
||||||
|
"endTimeRaw" TEXT NOT NULL,
|
||||||
|
"ipAddress" TEXT,
|
||||||
|
"countryCode" TEXT,
|
||||||
|
"language" TEXT,
|
||||||
|
"messagesSent" INTEGER,
|
||||||
|
"sentimentRaw" TEXT,
|
||||||
|
"escalatedRaw" TEXT,
|
||||||
|
"forwardedHrRaw" TEXT,
|
||||||
|
"fullTranscriptUrl" TEXT,
|
||||||
|
"avgResponseTimeSeconds" REAL,
|
||||||
|
"tokens" INTEGER,
|
||||||
|
"tokensEur" REAL,
|
||||||
|
"category" TEXT,
|
||||||
|
"initialMessage" TEXT,
|
||||||
|
"rawTranscriptContent" TEXT,
|
||||||
|
"status" TEXT NOT NULL DEFAULT 'QUEUED',
|
||||||
|
"errorMsg" TEXT,
|
||||||
|
"processedAt" DATETIME,
|
||||||
|
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
CONSTRAINT "SessionImport_companyId_fkey" FOREIGN KEY ("companyId") REFERENCES "Company" ("id") ON DELETE CASCADE ON UPDATE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "Message" (
|
||||||
|
"id" TEXT NOT NULL PRIMARY KEY,
|
||||||
|
"sessionId" TEXT NOT NULL,
|
||||||
|
"timestamp" DATETIME,
|
||||||
|
"role" TEXT NOT NULL,
|
||||||
|
"content" TEXT NOT NULL,
|
||||||
|
"order" INTEGER NOT NULL,
|
||||||
|
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
CONSTRAINT "Message_sessionId_fkey" FOREIGN KEY ("sessionId") REFERENCES "Session" ("id") ON DELETE CASCADE ON UPDATE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "Question" (
|
||||||
|
"id" TEXT NOT NULL PRIMARY KEY,
|
||||||
|
"content" TEXT NOT NULL,
|
||||||
|
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "SessionQuestion" (
|
||||||
|
"id" TEXT NOT NULL PRIMARY KEY,
|
||||||
|
"sessionId" TEXT NOT NULL,
|
||||||
|
"questionId" TEXT NOT NULL,
|
||||||
|
"order" INTEGER NOT NULL,
|
||||||
|
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
CONSTRAINT "SessionQuestion_sessionId_fkey" FOREIGN KEY ("sessionId") REFERENCES "Session" ("id") ON DELETE CASCADE ON UPDATE CASCADE,
|
||||||
|
CONSTRAINT "SessionQuestion_questionId_fkey" FOREIGN KEY ("questionId") REFERENCES "Question" ("id") ON DELETE RESTRICT ON UPDATE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "AIProcessingRequest" (
|
||||||
|
"id" TEXT NOT NULL PRIMARY KEY,
|
||||||
|
"sessionId" TEXT NOT NULL,
|
||||||
|
"openaiRequestId" TEXT,
|
||||||
|
"model" TEXT NOT NULL,
|
||||||
|
"serviceTier" TEXT,
|
||||||
|
"systemFingerprint" TEXT,
|
||||||
|
"promptTokens" INTEGER NOT NULL,
|
||||||
|
"completionTokens" INTEGER NOT NULL,
|
||||||
|
"totalTokens" INTEGER NOT NULL,
|
||||||
|
"cachedTokens" INTEGER,
|
||||||
|
"audioTokensPrompt" INTEGER,
|
||||||
|
"reasoningTokens" INTEGER,
|
||||||
|
"audioTokensCompletion" INTEGER,
|
||||||
|
"acceptedPredictionTokens" INTEGER,
|
||||||
|
"rejectedPredictionTokens" INTEGER,
|
||||||
|
"promptTokenCost" REAL NOT NULL,
|
||||||
|
"completionTokenCost" REAL NOT NULL,
|
||||||
|
"totalCostEur" REAL NOT NULL,
|
||||||
|
"processingType" TEXT NOT NULL,
|
||||||
|
"success" BOOLEAN NOT NULL,
|
||||||
|
"errorMessage" TEXT,
|
||||||
|
"requestedAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"completedAt" DATETIME,
|
||||||
|
CONSTRAINT "AIProcessingRequest_sessionId_fkey" FOREIGN KEY ("sessionId") REFERENCES "Session" ("id") ON DELETE CASCADE ON UPDATE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "User_email_key" ON "User"("email");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "Session_importId_key" ON "Session"("importId");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "Session_companyId_startTime_idx" ON "Session"("companyId", "startTime");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "SessionImport_externalSessionId_key" ON "SessionImport"("externalSessionId");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "SessionImport_status_idx" ON "SessionImport"("status");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "SessionImport_companyId_externalSessionId_key" ON "SessionImport"("companyId", "externalSessionId");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "Message_sessionId_order_idx" ON "Message"("sessionId", "order");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "Message_sessionId_order_key" ON "Message"("sessionId", "order");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "Question_content_key" ON "Question"("content");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "SessionQuestion_sessionId_idx" ON "SessionQuestion"("sessionId");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "SessionQuestion_sessionId_questionId_key" ON "SessionQuestion"("sessionId", "questionId");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "SessionQuestion_sessionId_order_key" ON "SessionQuestion"("sessionId", "order");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "AIProcessingRequest_sessionId_idx" ON "AIProcessingRequest"("sessionId");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "AIProcessingRequest_requestedAt_idx" ON "AIProcessingRequest"("requestedAt");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "AIProcessingRequest_model_idx" ON "AIProcessingRequest"("model");
|
||||||
3
prisma/migrations/migration_lock.toml
Normal file
3
prisma/migrations/migration_lock.toml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Please do not edit this file manually
|
||||||
|
# It should be added in your version-control system (e.g., Git)
|
||||||
|
provider = "sqlite"
|
||||||
@ -22,6 +22,22 @@ enum SentimentCategory {
|
|||||||
NEGATIVE
|
NEGATIVE
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum SessionCategory {
|
||||||
|
SCHEDULE_HOURS
|
||||||
|
LEAVE_VACATION
|
||||||
|
SICK_LEAVE_RECOVERY
|
||||||
|
SALARY_COMPENSATION
|
||||||
|
CONTRACT_HOURS
|
||||||
|
ONBOARDING
|
||||||
|
OFFBOARDING
|
||||||
|
WORKWEAR_STAFF_PASS
|
||||||
|
TEAM_CONTACTS
|
||||||
|
PERSONAL_QUESTIONS
|
||||||
|
ACCESS_LOGIN
|
||||||
|
SOCIAL_QUESTIONS
|
||||||
|
UNRECOGNIZED_OTHER
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* COMPANY (multi-tenant root)
|
* COMPANY (multi-tenant root)
|
||||||
*/
|
*/
|
||||||
@ -85,31 +101,33 @@ model Session {
|
|||||||
startTime DateTime
|
startTime DateTime
|
||||||
endTime DateTime
|
endTime DateTime
|
||||||
|
|
||||||
// Processed fields from SessionImport data
|
// Direct copies from SessionImport (minimal processing)
|
||||||
ipAddress String?
|
ipAddress String?
|
||||||
country String? // processed from countryCode
|
country String? // from countryCode
|
||||||
language String? // processed from language
|
|
||||||
messagesSent Int?
|
|
||||||
sentiment Float? // processed from sentimentRaw
|
|
||||||
sentimentCategory SentimentCategory?
|
|
||||||
escalated Boolean?
|
|
||||||
forwardedHr Boolean?
|
|
||||||
fullTranscriptUrl String?
|
fullTranscriptUrl String?
|
||||||
avgResponseTime Float? // processed from avgResponseTimeSeconds
|
avgResponseTime Float? // from avgResponseTimeSeconds
|
||||||
tokens Int?
|
initialMsg String? // from initialMessage
|
||||||
tokensEur Float?
|
|
||||||
category String?
|
// AI-processed fields (calculated from Messages or AI analysis)
|
||||||
initialMsg String? // processed from initialMessage
|
language String? // AI-detected from Messages
|
||||||
|
messagesSent Int? // Calculated from Message count
|
||||||
|
sentiment SentimentCategory? // AI-analyzed (changed from Float to enum)
|
||||||
|
escalated Boolean? // AI-detected
|
||||||
|
forwardedHr Boolean? // AI-detected
|
||||||
|
category SessionCategory? // AI-categorized (changed to enum)
|
||||||
|
|
||||||
|
// AI-generated fields
|
||||||
|
summary String? // AI-generated summary
|
||||||
|
|
||||||
// Processing metadata
|
// Processing metadata
|
||||||
processed Boolean @default(false)
|
processed Boolean @default(false)
|
||||||
questions String? // JSON array of extracted questions
|
|
||||||
summary String? // AI-generated summary
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ---------- the missing opposite side ----------
|
* Relationships
|
||||||
*/
|
*/
|
||||||
messages Message[] // <-- satisfies Message.session
|
messages Message[] // Individual conversation messages
|
||||||
|
sessionQuestions SessionQuestion[] // Questions asked in this session
|
||||||
|
aiProcessingRequests AIProcessingRequest[] // AI processing cost tracking
|
||||||
|
|
||||||
createdAt DateTime @default(now())
|
createdAt DateTime @default(now())
|
||||||
updatedAt DateTime @updatedAt
|
updatedAt DateTime @updatedAt
|
||||||
@ -187,3 +205,79 @@ model Message {
|
|||||||
@@unique([sessionId, order]) // guards against duplicate order values
|
@@unique([sessionId, order]) // guards against duplicate order values
|
||||||
@@index([sessionId, order])
|
@@index([sessionId, order])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* QUESTION MANAGEMENT (separate from Session for better analytics)
|
||||||
|
*/
|
||||||
|
model Question {
|
||||||
|
id String @id @default(uuid())
|
||||||
|
content String @unique // The actual question text
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
|
||||||
|
// Relationships
|
||||||
|
sessionQuestions SessionQuestion[]
|
||||||
|
}
|
||||||
|
|
||||||
|
model SessionQuestion {
|
||||||
|
id String @id @default(uuid())
|
||||||
|
sessionId String
|
||||||
|
questionId String
|
||||||
|
order Int // Order within the session
|
||||||
|
createdAt DateTime @default(now())
|
||||||
|
|
||||||
|
// Relationships
|
||||||
|
session Session @relation(fields: [sessionId], references: [id], onDelete: Cascade)
|
||||||
|
question Question @relation(fields: [questionId], references: [id])
|
||||||
|
|
||||||
|
@@unique([sessionId, questionId]) // Prevent duplicate questions per session
|
||||||
|
@@unique([sessionId, order]) // Ensure unique ordering
|
||||||
|
@@index([sessionId])
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AI PROCESSING COST TRACKING
|
||||||
|
*/
|
||||||
|
model AIProcessingRequest {
|
||||||
|
id String @id @default(uuid())
|
||||||
|
sessionId String
|
||||||
|
|
||||||
|
// OpenAI Request Details
|
||||||
|
openaiRequestId String? // "chatcmpl-Bn8IH9UM8t7luZVWnwZG7CVJ0kjPo"
|
||||||
|
model String // "gpt-4o-2024-08-06"
|
||||||
|
serviceTier String? // "default"
|
||||||
|
systemFingerprint String? // "fp_07871e2ad8"
|
||||||
|
|
||||||
|
// Token Usage (from usage object)
|
||||||
|
promptTokens Int // 11
|
||||||
|
completionTokens Int // 9
|
||||||
|
totalTokens Int // 20
|
||||||
|
|
||||||
|
// Detailed Token Breakdown
|
||||||
|
cachedTokens Int? // prompt_tokens_details.cached_tokens
|
||||||
|
audioTokensPrompt Int? // prompt_tokens_details.audio_tokens
|
||||||
|
reasoningTokens Int? // completion_tokens_details.reasoning_tokens
|
||||||
|
audioTokensCompletion Int? // completion_tokens_details.audio_tokens
|
||||||
|
acceptedPredictionTokens Int? // completion_tokens_details.accepted_prediction_tokens
|
||||||
|
rejectedPredictionTokens Int? // completion_tokens_details.rejected_prediction_tokens
|
||||||
|
|
||||||
|
// Cost Calculation
|
||||||
|
promptTokenCost Float // Cost per prompt token (varies by model)
|
||||||
|
completionTokenCost Float // Cost per completion token (varies by model)
|
||||||
|
totalCostEur Float // Calculated total cost in EUR
|
||||||
|
|
||||||
|
// Processing Context
|
||||||
|
processingType String // "session_analysis", "reprocessing", etc.
|
||||||
|
success Boolean // Whether the request succeeded
|
||||||
|
errorMessage String? // If failed, what went wrong
|
||||||
|
|
||||||
|
// Timestamps
|
||||||
|
requestedAt DateTime @default(now())
|
||||||
|
completedAt DateTime?
|
||||||
|
|
||||||
|
// Relationships
|
||||||
|
session Session @relation(fields: [sessionId], references: [id], onDelete: Cascade)
|
||||||
|
|
||||||
|
@@index([sessionId])
|
||||||
|
@@index([requestedAt])
|
||||||
|
@@index([model])
|
||||||
|
}
|
||||||
|
|||||||
129
test-refactored-pipeline.js
Normal file
129
test-refactored-pipeline.js
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
// Test script for the refactored data processing pipeline
|
||||||
|
import { PrismaClient } from '@prisma/client';
|
||||||
|
import { processQueuedImports } from './lib/importProcessor.ts';
|
||||||
|
import { processAllUnparsedTranscripts } from './lib/transcriptParser.ts';
|
||||||
|
import { processUnprocessedSessions, getAIProcessingCosts } from './lib/processingScheduler.ts';
|
||||||
|
|
||||||
|
const prisma = new PrismaClient();
|
||||||
|
|
||||||
|
async function testRefactoredPipeline() {
|
||||||
|
console.log('🧪 Testing Refactored Data Processing Pipeline\n');
|
||||||
|
|
||||||
|
// Step 1: Check current state
|
||||||
|
console.log('📊 Current Database State:');
|
||||||
|
const stats = await getDatabaseStats();
|
||||||
|
console.log(stats);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Step 2: Test import processing (minimal fields only)
|
||||||
|
console.log('🔄 Testing Import Processing (Phase 1)...');
|
||||||
|
await processQueuedImports(5); // Process 5 imports
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Step 3: Test transcript parsing
|
||||||
|
console.log('📝 Testing Transcript Parsing (Phase 2)...');
|
||||||
|
await processAllUnparsedTranscripts();
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Step 4: Test AI processing with cost tracking
|
||||||
|
console.log('🤖 Testing AI Processing with Cost Tracking (Phase 3)...');
|
||||||
|
await processUnprocessedSessions(3, 2); // Process 3 sessions with concurrency 2
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Step 5: Show final results
|
||||||
|
console.log('📈 Final Results:');
|
||||||
|
const finalStats = await getDatabaseStats();
|
||||||
|
console.log(finalStats);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Step 6: Show AI processing costs
|
||||||
|
console.log('💰 AI Processing Costs:');
|
||||||
|
const costs = await getAIProcessingCosts();
|
||||||
|
console.log(costs);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Step 7: Show sample processed session
|
||||||
|
console.log('🔍 Sample Processed Session:');
|
||||||
|
const sampleSession = await getSampleProcessedSession();
|
||||||
|
if (sampleSession) {
|
||||||
|
console.log(`Session ID: ${sampleSession.id}`);
|
||||||
|
console.log(`Language: ${sampleSession.language}`);
|
||||||
|
console.log(`Messages Sent: ${sampleSession.messagesSent}`);
|
||||||
|
console.log(`Sentiment: ${sampleSession.sentiment}`);
|
||||||
|
console.log(`Category: ${sampleSession.category}`);
|
||||||
|
console.log(`Escalated: ${sampleSession.escalated}`);
|
||||||
|
console.log(`Forwarded HR: ${sampleSession.forwardedHr}`);
|
||||||
|
console.log(`Summary: ${sampleSession.summary}`);
|
||||||
|
console.log(`Questions: ${sampleSession.sessionQuestions.length} questions`);
|
||||||
|
console.log(`AI Requests: ${sampleSession.aiProcessingRequests.length} requests`);
|
||||||
|
|
||||||
|
if (sampleSession.sessionQuestions.length > 0) {
|
||||||
|
console.log('Sample Questions:');
|
||||||
|
sampleSession.sessionQuestions.slice(0, 3).forEach((sq, i) => {
|
||||||
|
console.log(` ${i + 1}. ${sq.question.content}`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
console.log('✅ Pipeline test completed!');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getDatabaseStats() {
|
||||||
|
const [
|
||||||
|
totalSessions,
|
||||||
|
sessionsWithImports,
|
||||||
|
sessionsWithMessages,
|
||||||
|
processedSessions,
|
||||||
|
totalMessages,
|
||||||
|
totalQuestions,
|
||||||
|
totalSessionQuestions,
|
||||||
|
totalAIRequests
|
||||||
|
] = await Promise.all([
|
||||||
|
prisma.session.count(),
|
||||||
|
prisma.session.count({ where: { importId: { not: null } } }),
|
||||||
|
prisma.session.count({ where: { messages: { some: {} } } }),
|
||||||
|
prisma.session.count({ where: { processed: true } }),
|
||||||
|
prisma.message.count(),
|
||||||
|
prisma.question.count(),
|
||||||
|
prisma.sessionQuestion.count(),
|
||||||
|
prisma.aIProcessingRequest.count()
|
||||||
|
]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
totalSessions,
|
||||||
|
sessionsWithImports,
|
||||||
|
sessionsWithMessages,
|
||||||
|
processedSessions,
|
||||||
|
unprocessedSessions: sessionsWithMessages - processedSessions,
|
||||||
|
totalMessages,
|
||||||
|
totalQuestions,
|
||||||
|
totalSessionQuestions,
|
||||||
|
totalAIRequests
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getSampleProcessedSession() {
|
||||||
|
return await prisma.session.findFirst({
|
||||||
|
where: {
|
||||||
|
processed: true,
|
||||||
|
messages: { some: {} }
|
||||||
|
},
|
||||||
|
include: {
|
||||||
|
sessionQuestions: {
|
||||||
|
include: {
|
||||||
|
question: true
|
||||||
|
},
|
||||||
|
orderBy: { order: 'asc' }
|
||||||
|
},
|
||||||
|
aiProcessingRequests: {
|
||||||
|
orderBy: { requestedAt: 'desc' }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the test
|
||||||
|
testRefactoredPipeline()
|
||||||
|
.catch(console.error)
|
||||||
|
.finally(() => prisma.$disconnect());
|
||||||
Reference in New Issue
Block a user