DB refactor

This commit is contained in:
Max Kowalski
2025-06-27 23:05:46 +02:00
parent 185bb6da58
commit 2dfc49f840
20 changed files with 1607 additions and 339 deletions

1
.gitignore vendored
View File

@ -264,3 +264,4 @@ Thumbs.db
# OpenAI API request samples # OpenAI API request samples
sample-openai-request.json sample-openai-request.json
admin-user.txt

96
check-pipeline-status.ts Normal file
View File

@ -0,0 +1,96 @@
import { PrismaClient } from '@prisma/client';
const prisma = new PrismaClient();
async function checkPipelineStatus() {
try {
console.log('=== COMPLETE PIPELINE STATUS ===\n');
// Stage 1: SessionImport status
console.log('1. SessionImport Status:');
const importCounts = await prisma.sessionImport.groupBy({
by: ['status'],
_count: { status: true }
});
const totalImports = await prisma.sessionImport.count();
console.log(` Total imports: ${totalImports}`);
importCounts.forEach(({ status, _count }) => {
console.log(` ${status}: ${_count.status}`);
});
// Stage 2: Session creation status
console.log('\n2. Session Creation Status:');
const totalSessions = await prisma.session.count();
const sessionsWithMessages = await prisma.session.count({
where: { messages: { some: {} } }
});
const sessionsWithoutMessages = await prisma.session.count({
where: { messages: { none: {} } }
});
console.log(` Total sessions: ${totalSessions}`);
console.log(` Sessions with messages: ${sessionsWithMessages}`);
console.log(` Sessions without messages: ${sessionsWithoutMessages}`);
// Stage 3: AI Processing status
console.log('\n3. AI Processing Status:');
const processedSessions = await prisma.session.count({
where: { processed: true }
});
const unprocessedSessions = await prisma.session.count({
where: { processed: false }
});
console.log(` Processed sessions: ${processedSessions}`);
console.log(` Unprocessed sessions: ${unprocessedSessions}`);
// Stage 4: Questions extracted
console.log('\n4. Question Extraction Status:');
const sessionsWithQuestions = await prisma.session.count({
where: { sessionQuestions: { some: {} } }
});
const totalQuestions = await prisma.question.count();
console.log(` Sessions with questions: ${sessionsWithQuestions}`);
console.log(` Total unique questions: ${totalQuestions}`);
// Show what needs processing
console.log('\n=== WHAT NEEDS PROCESSING ===');
const queuedImports = await prisma.sessionImport.count({
where: { status: 'QUEUED' }
});
console.log(`${queuedImports} SessionImports need import processing`);
const sessionsNeedingAI = await prisma.session.count({
where: {
AND: [
{ messages: { some: {} } },
{ processed: false }
]
}
});
console.log(`${sessionsNeedingAI} Sessions need AI processing`);
// Sample of what's pending
if (queuedImports > 0) {
console.log('\nSample queued imports:');
const sampleImports = await prisma.sessionImport.findMany({
where: { status: 'QUEUED' },
select: { externalSessionId: true, createdAt: true },
take: 5
});
sampleImports.forEach(imp => {
console.log(` ${imp.externalSessionId} (created: ${imp.createdAt})`);
});
}
} catch (error) {
console.error('Error checking pipeline status:', error);
} finally {
await prisma.$disconnect();
}
}
checkPipelineStatus();

View File

@ -0,0 +1,78 @@
import { PrismaClient } from '@prisma/client';
import { ProcessingStatusManager } from './lib/processingStatusManager';
const prisma = new PrismaClient();
async function checkRefactoredPipelineStatus() {
try {
console.log('=== REFACTORED PIPELINE STATUS ===\n');
// Get pipeline status using the new system
const pipelineStatus = await ProcessingStatusManager.getPipelineStatus();
console.log(`Total Sessions: ${pipelineStatus.totalSessions}\n`);
// Display status for each stage
const stages = ['CSV_IMPORT', 'TRANSCRIPT_FETCH', 'SESSION_CREATION', 'AI_ANALYSIS', 'QUESTION_EXTRACTION'];
for (const stage of stages) {
console.log(`${stage}:`);
const stageData = pipelineStatus.pipeline[stage] || {};
const pending = stageData.PENDING || 0;
const inProgress = stageData.IN_PROGRESS || 0;
const completed = stageData.COMPLETED || 0;
const failed = stageData.FAILED || 0;
const skipped = stageData.SKIPPED || 0;
console.log(` PENDING: ${pending}`);
console.log(` IN_PROGRESS: ${inProgress}`);
console.log(` COMPLETED: ${completed}`);
console.log(` FAILED: ${failed}`);
console.log(` SKIPPED: ${skipped}`);
console.log('');
}
// Show what needs processing
console.log('=== WHAT NEEDS PROCESSING ===');
for (const stage of stages) {
const stageData = pipelineStatus.pipeline[stage] || {};
const pending = stageData.PENDING || 0;
const failed = stageData.FAILED || 0;
if (pending > 0 || failed > 0) {
console.log(`${stage}: ${pending} pending, ${failed} failed`);
}
}
// Show failed sessions if any
const failedSessions = await ProcessingStatusManager.getFailedSessions();
if (failedSessions.length > 0) {
console.log('\n=== FAILED SESSIONS ===');
failedSessions.slice(0, 5).forEach(failure => {
console.log(` ${failure.session.import?.externalSessionId || failure.sessionId}: ${failure.stage} - ${failure.errorMessage}`);
});
if (failedSessions.length > 5) {
console.log(` ... and ${failedSessions.length - 5} more failed sessions`);
}
}
// Show sessions ready for AI processing
const readyForAI = await ProcessingStatusManager.getSessionsNeedingProcessing('AI_ANALYSIS', 5);
if (readyForAI.length > 0) {
console.log('\n=== SESSIONS READY FOR AI PROCESSING ===');
readyForAI.forEach(status => {
console.log(` ${status.session.import?.externalSessionId || status.sessionId} (created: ${status.session.createdAt})`);
});
}
} catch (error) {
console.error('Error checking pipeline status:', error);
} finally {
await prisma.$disconnect();
}
}
checkRefactoredPipelineStatus();

81
debug-import-status.ts Normal file
View File

@ -0,0 +1,81 @@
import { PrismaClient } from '@prisma/client';
import { ProcessingStatusManager } from './lib/processingStatusManager';
const prisma = new PrismaClient();
async function debugImportStatus() {
try {
console.log('=== DEBUGGING PROCESSING STATUS (REFACTORED SYSTEM) ===\n');
// Get pipeline status using the new system
const pipelineStatus = await ProcessingStatusManager.getPipelineStatus();
console.log(`Total Sessions: ${pipelineStatus.totalSessions}\n`);
// Display status for each stage
const stages = ['CSV_IMPORT', 'TRANSCRIPT_FETCH', 'SESSION_CREATION', 'AI_ANALYSIS', 'QUESTION_EXTRACTION'];
for (const stage of stages) {
console.log(`${stage}:`);
const stageData = pipelineStatus.pipeline[stage] || {};
const pending = stageData.PENDING || 0;
const inProgress = stageData.IN_PROGRESS || 0;
const completed = stageData.COMPLETED || 0;
const failed = stageData.FAILED || 0;
const skipped = stageData.SKIPPED || 0;
console.log(` PENDING: ${pending}`);
console.log(` IN_PROGRESS: ${inProgress}`);
console.log(` COMPLETED: ${completed}`);
console.log(` FAILED: ${failed}`);
console.log(` SKIPPED: ${skipped}`);
console.log('');
}
// Check Sessions vs SessionImports
console.log('=== SESSION IMPORT RELATIONSHIP ===');
const sessionsWithImports = await prisma.session.count({
where: { importId: { not: null } }
});
const totalSessions = await prisma.session.count();
console.log(` Sessions with importId: ${sessionsWithImports}`);
console.log(` Total sessions: ${totalSessions}`);
// Show failed sessions if any
const failedSessions = await ProcessingStatusManager.getFailedSessions();
if (failedSessions.length > 0) {
console.log('\n=== FAILED SESSIONS ===');
failedSessions.slice(0, 10).forEach(failure => {
console.log(` ${failure.session.import?.externalSessionId || failure.sessionId}: ${failure.stage} - ${failure.errorMessage}`);
});
if (failedSessions.length > 10) {
console.log(` ... and ${failedSessions.length - 10} more failed sessions`);
}
} else {
console.log('\n✓ No failed sessions found');
}
// Show what needs processing
console.log('\n=== WHAT NEEDS PROCESSING ===');
for (const stage of stages) {
const stageData = pipelineStatus.pipeline[stage] || {};
const pending = stageData.PENDING || 0;
const failed = stageData.FAILED || 0;
if (pending > 0 || failed > 0) {
console.log(`${stage}: ${pending} pending, ${failed} failed`);
}
}
} catch (error) {
console.error('Error debugging processing status:', error);
} finally {
await prisma.$disconnect();
}
}
debugImportStatus();

View File

@ -1,2 +0,0 @@
user: admin@demo.com
password: admin123

View File

@ -1,12 +1,14 @@
# Scheduler Workflow Documentation # Scheduler Workflow Documentation
## Overview ## Overview
The LiveDash system has two main schedulers that work together to fetch and process session data: The LiveDash system has two main schedulers that work together to fetch and process session data:
1. **Session Refresh Scheduler** - Fetches new sessions from CSV files 1. **Session Refresh Scheduler** - Fetches new sessions from CSV files
2. **Processing Scheduler** - Processes session transcripts with AI 2. **Processing Scheduler** - Processes session transcripts with AI
## Current Status (as of latest check) ## Current Status (as of latest check)
- **Total sessions**: 107 - **Total sessions**: 107
- **Processed sessions**: 0 - **Processed sessions**: 0
- **Sessions with transcript**: 0 - **Sessions with transcript**: 0
@ -15,10 +17,12 @@ The LiveDash system has two main schedulers that work together to fetch and proc
## How the `processed` Field Works ## How the `processed` Field Works
The ProcessingScheduler picks up sessions where `processed` is **NOT** `true`, which includes: The ProcessingScheduler picks up sessions where `processed` is **NOT** `true`, which includes:
- `processed = false` - `processed = false`
- `processed = null` - `processed = null`
**Query used:** **Query used:**
```javascript ```javascript
{ processed: { not: true } } // Either false or null { processed: { not: true } } // Either false or null
``` ```
@ -26,7 +30,9 @@ The ProcessingScheduler picks up sessions where `processed` is **NOT** `true`, w
## Complete Workflow ## Complete Workflow
### Step 1: Session Refresh (CSV Fetching) ### Step 1: Session Refresh (CSV Fetching)
**What it does:** **What it does:**
- Fetches session data from company CSV URLs - Fetches session data from company CSV URLs
- Creates session records in database with basic metadata - Creates session records in database with basic metadata
- Sets `transcriptContent = null` initially - Sets `transcriptContent = null` initially
@ -35,7 +41,9 @@ The ProcessingScheduler picks up sessions where `processed` is **NOT** `true`, w
**Runs:** Every 30 minutes (cron: `*/30 * * * *`) **Runs:** Every 30 minutes (cron: `*/30 * * * *`)
### Step 2: Transcript Fetching ### Step 2: Transcript Fetching
**What it does:** **What it does:**
- Downloads full transcript content for sessions - Downloads full transcript content for sessions
- Updates `transcriptContent` field with actual conversation data - Updates `transcriptContent` field with actual conversation data
- Sessions remain `processed = null` until AI processing - Sessions remain `processed = null` until AI processing
@ -43,7 +51,9 @@ The ProcessingScheduler picks up sessions where `processed` is **NOT** `true`, w
**Runs:** As part of session refresh process **Runs:** As part of session refresh process
### Step 3: AI Processing ### Step 3: AI Processing
**What it does:** **What it does:**
- Finds sessions with transcript content where `processed != true` - Finds sessions with transcript content where `processed != true`
- Sends transcripts to OpenAI for analysis - Sends transcripts to OpenAI for analysis
- Extracts: sentiment, category, questions, summary, etc. - Extracts: sentiment, category, questions, summary, etc.
@ -55,21 +65,25 @@ The ProcessingScheduler picks up sessions where `processed` is **NOT** `true`, w
## Manual Trigger Commands ## Manual Trigger Commands
### Check Current Status ### Check Current Status
```bash ```bash
node scripts/manual-triggers.js status node scripts/manual-triggers.js status
``` ```
### Trigger Session Refresh (Fetch new sessions from CSV) ### Trigger Session Refresh (Fetch new sessions from CSV)
```bash ```bash
node scripts/manual-triggers.js refresh node scripts/manual-triggers.js refresh
``` ```
### Trigger AI Processing (Process unprocessed sessions) ### Trigger AI Processing (Process unprocessed sessions)
```bash ```bash
node scripts/manual-triggers.js process node scripts/manual-triggers.js process
``` ```
### Run Both Schedulers ### Run Both Schedulers
```bash ```bash
node scripts/manual-triggers.js both node scripts/manual-triggers.js both
``` ```
@ -77,7 +91,9 @@ node scripts/manual-triggers.js both
## Troubleshooting ## Troubleshooting
### No Sessions Being Processed? ### No Sessions Being Processed?
1. **Check if sessions have transcripts:** 1. **Check if sessions have transcripts:**
```bash ```bash
node scripts/manual-triggers.js status node scripts/manual-triggers.js status
``` ```
@ -93,20 +109,24 @@ node scripts/manual-triggers.js both
### Common Issues ### Common Issues
#### "No sessions found requiring processing" #### "No sessions found requiring processing"
- All sessions with transcripts have been processed (`processed = true`) - All sessions with transcripts have been processed (`processed = true`)
- Or no sessions have transcript content yet - Or no sessions have transcript content yet
#### "OPENAI_API_KEY environment variable is not set" #### "OPENAI_API_KEY environment variable is not set"
- Add OpenAI API key to `.env.development` file - Add OpenAI API key to `.env.development` file
- Restart the application - Restart the application
#### "Error fetching transcript: Unauthorized" #### "Error fetching transcript: Unauthorized"
- CSV credentials are incorrect or expired - CSV credentials are incorrect or expired
- Check company CSV username/password in database - Check company CSV username/password in database
## Database Field Mapping ## Database Field Mapping
### Before AI Processing ### Before AI Processing
```javascript ```javascript
{ {
id: "session-uuid", id: "session-uuid",
@ -120,6 +140,7 @@ node scripts/manual-triggers.js both
``` ```
### After AI Processing ### After AI Processing
```javascript ```javascript
{ {
id: "session-uuid", id: "session-uuid",
@ -141,11 +162,13 @@ node scripts/manual-triggers.js both
## Scheduler Configuration ## Scheduler Configuration
### Session Refresh Scheduler ### Session Refresh Scheduler
- **File**: `lib/scheduler.js` - **File**: `lib/scheduler.js`
- **Frequency**: Every 30 minutes - **Frequency**: Every 30 minutes
- **Cron**: `*/30 * * * *` - **Cron**: `*/30 * * * *`
### Processing Scheduler ### Processing Scheduler
- **File**: `lib/processingScheduler.js` - **File**: `lib/processingScheduler.js`
- **Frequency**: Every hour - **Frequency**: Every hour
- **Cron**: `0 * * * *` - **Cron**: `0 * * * *`
@ -168,16 +191,19 @@ NEXTAUTH_URL="http://localhost:3000"
## Next Steps for Testing ## Next Steps for Testing
1. **Trigger session refresh** to fetch transcripts: 1. **Trigger session refresh** to fetch transcripts:
```bash ```bash
node scripts/manual-triggers.js refresh node scripts/manual-triggers.js refresh
``` ```
2. **Check status** to see if transcripts were fetched: 2. **Check status** to see if transcripts were fetched:
```bash ```bash
node scripts/manual-triggers.js status node scripts/manual-triggers.js status
``` ```
3. **Trigger processing** if transcripts are available: 3. **Trigger processing** if transcripts are available:
```bash ```bash
node scripts/manual-triggers.js process node scripts/manual-triggers.js process
``` ```

View File

@ -1,11 +1,13 @@
# Transcript Parsing Implementation # Transcript Parsing Implementation
## Overview ## Overview
Added structured message parsing to the LiveDash system, allowing transcripts to be broken down into individual messages with timestamps, roles, and content. This provides a much better user experience for viewing conversations. Added structured message parsing to the LiveDash system, allowing transcripts to be broken down into individual messages with timestamps, roles, and content. This provides a much better user experience for viewing conversations.
## Database Changes ## Database Changes
### New Message Table ### New Message Table
```sql ```sql
CREATE TABLE Message ( CREATE TABLE Message (
id TEXT PRIMARY KEY DEFAULT (uuid()), id TEXT PRIMARY KEY DEFAULT (uuid()),
@ -22,12 +24,14 @@ CREATE INDEX Message_sessionId_order_idx ON Message(sessionId, order);
``` ```
### Updated Session Table ### Updated Session Table
- Added `messages` relation to Session model - Added `messages` relation to Session model
- Sessions can now have both raw transcript content AND parsed messages - Sessions can now have both raw transcript content AND parsed messages
## New Components ## New Components
### 1. Message Interface (`lib/types.ts`) ### 1. Message Interface (`lib/types.ts`)
```typescript ```typescript
export interface Message { export interface Message {
id: string; id: string;
@ -41,6 +45,7 @@ export interface Message {
``` ```
### 2. Transcript Parser (`lib/transcriptParser.js`) ### 2. Transcript Parser (`lib/transcriptParser.js`)
- **`parseChatLogToJSON(logString)`** - Parses raw transcript text into structured messages - **`parseChatLogToJSON(logString)`** - Parses raw transcript text into structured messages
- **`storeMessagesForSession(sessionId, messages)`** - Stores parsed messages in database - **`storeMessagesForSession(sessionId, messages)`** - Stores parsed messages in database
- **`processTranscriptForSession(sessionId, transcriptContent)`** - Complete processing for one session - **`processTranscriptForSession(sessionId, transcriptContent)`** - Complete processing for one session
@ -48,6 +53,7 @@ export interface Message {
- **`getMessagesForSession(sessionId)`** - Retrieve messages for a session - **`getMessagesForSession(sessionId)`** - Retrieve messages for a session
### 3. MessageViewer Component (`components/MessageViewer.tsx`) ### 3. MessageViewer Component (`components/MessageViewer.tsx`)
- Chat-like interface for displaying parsed messages - Chat-like interface for displaying parsed messages
- Color-coded by role (User: blue, Assistant: gray, System: yellow) - Color-coded by role (User: blue, Assistant: gray, System: yellow)
- Shows timestamps and message order - Shows timestamps and message order
@ -56,21 +62,26 @@ export interface Message {
## Updated Components ## Updated Components
### 1. Session API (`pages/api/dashboard/session/[id].ts`) ### 1. Session API (`pages/api/dashboard/session/[id].ts`)
- Now includes parsed messages in session response - Now includes parsed messages in session response
- Messages are ordered by `order` field (ascending) - Messages are ordered by `order` field (ascending)
### 2. Session Details Page (`app/dashboard/sessions/[id]/page.tsx`) ### 2. Session Details Page (`app/dashboard/sessions/[id]/page.tsx`)
- Added MessageViewer component - Added MessageViewer component
- Shows both parsed messages AND raw transcript - Shows both parsed messages AND raw transcript
- Prioritizes parsed messages when available - Prioritizes parsed messages when available
### 3. ChatSession Interface (`lib/types.ts`) ### 3. ChatSession Interface (`lib/types.ts`)
- Added optional `messages?: Message[]` field - Added optional `messages?: Message[]` field
## Parsing Logic ## Parsing Logic
### Supported Format ### Supported Format
The parser expects transcript format: The parser expects transcript format:
``` ```
[DD.MM.YYYY HH:MM:SS] Role: Message content [DD.MM.YYYY HH:MM:SS] Role: Message content
[DD.MM.YYYY HH:MM:SS] User: Hello, I need help [DD.MM.YYYY HH:MM:SS] User: Hello, I need help
@ -78,6 +89,7 @@ The parser expects transcript format:
``` ```
### Features ### Features
- **Multi-line support** - Messages can span multiple lines - **Multi-line support** - Messages can span multiple lines
- **Timestamp parsing** - Converts DD.MM.YYYY HH:MM:SS to ISO format - **Timestamp parsing** - Converts DD.MM.YYYY HH:MM:SS to ISO format
- **Role detection** - Extracts sender role from each message - **Role detection** - Extracts sender role from each message
@ -87,6 +99,7 @@ The parser expects transcript format:
## Manual Commands ## Manual Commands
### New Commands Added ### New Commands Added
```bash ```bash
# Parse transcripts into structured messages # Parse transcripts into structured messages
node scripts/manual-triggers.js parse node scripts/manual-triggers.js parse
@ -99,17 +112,20 @@ node scripts/manual-triggers.js status
``` ```
### Updated Commands ### Updated Commands
- **`status`** - Now shows transcript and parsing statistics - **`status`** - Now shows transcript and parsing statistics
- **`all`** - New command that runs refresh → parse → process in sequence - **`all`** - New command that runs refresh → parse → process in sequence
## Workflow Integration ## Workflow Integration
### Complete Processing Pipeline ### Complete Processing Pipeline
1. **Session Refresh** - Fetch sessions from CSV, download transcripts 1. **Session Refresh** - Fetch sessions from CSV, download transcripts
2. **Transcript Parsing** - Parse raw transcripts into structured messages 2. **Transcript Parsing** - Parse raw transcripts into structured messages
3. **AI Processing** - Process sessions with OpenAI for sentiment, categories, etc. 3. **AI Processing** - Process sessions with OpenAI for sentiment, categories, etc.
### Database States ### Database States
```javascript ```javascript
// After CSV fetch // After CSV fetch
{ {
@ -139,11 +155,13 @@ node scripts/manual-triggers.js status
## User Experience Improvements ## User Experience Improvements
### Before ### Before
- Only raw transcript text in a text area - Only raw transcript text in a text area
- Difficult to follow conversation flow - Difficult to follow conversation flow
- No clear distinction between speakers - No clear distinction between speakers
### After ### After
- **Chat-like interface** with message bubbles - **Chat-like interface** with message bubbles
- **Color-coded roles** for easy identification - **Color-coded roles** for easy identification
- **Timestamps** for each message - **Timestamps** for each message
@ -154,6 +172,7 @@ node scripts/manual-triggers.js status
## Testing ## Testing
### Manual Testing Commands ### Manual Testing Commands
```bash ```bash
# Check current status # Check current status
node scripts/manual-triggers.js status node scripts/manual-triggers.js status
@ -166,6 +185,7 @@ node scripts/manual-triggers.js all
``` ```
### Expected Results ### Expected Results
1. Sessions with transcript content get parsed into individual messages 1. Sessions with transcript content get parsed into individual messages
2. Session detail pages show chat-like interface 2. Session detail pages show chat-like interface
3. Both parsed messages and raw transcript are available 3. Both parsed messages and raw transcript are available
@ -174,16 +194,19 @@ node scripts/manual-triggers.js all
## Technical Benefits ## Technical Benefits
### Performance ### Performance
- **Indexed queries** - Messages indexed by sessionId and order - **Indexed queries** - Messages indexed by sessionId and order
- **Efficient loading** - Only load messages when needed - **Efficient loading** - Only load messages when needed
- **Cascading deletes** - Messages automatically deleted with sessions - **Cascading deletes** - Messages automatically deleted with sessions
### Maintainability ### Maintainability
- **Separation of concerns** - Parsing logic isolated in dedicated module - **Separation of concerns** - Parsing logic isolated in dedicated module
- **Type safety** - Full TypeScript support for Message interface - **Type safety** - Full TypeScript support for Message interface
- **Error handling** - Graceful fallbacks when parsing fails - **Error handling** - Graceful fallbacks when parsing fails
### Extensibility ### Extensibility
- **Role flexibility** - Supports any role names (User, Assistant, System, etc.) - **Role flexibility** - Supports any role names (User, Assistant, System, etc.)
- **Content preservation** - Multi-line messages fully supported - **Content preservation** - Multi-line messages fully supported
- **Metadata ready** - Easy to add message-level metadata in future - **Metadata ready** - Easy to add message-level metadata in future
@ -191,11 +214,13 @@ node scripts/manual-triggers.js all
## Migration Notes ## Migration Notes
### Existing Data ### Existing Data
- **No data loss** - Original transcript content preserved - **No data loss** - Original transcript content preserved
- **Backward compatibility** - Pages work with or without parsed messages - **Backward compatibility** - Pages work with or without parsed messages
- **Gradual migration** - Can parse transcripts incrementally - **Gradual migration** - Can parse transcripts incrementally
### Database Migration ### Database Migration
- New Message table created with foreign key constraints - New Message table created with foreign key constraints
- Existing Session table unchanged (only added relation) - Existing Session table unchanged (only added relation)
- Index created for efficient message queries - Index created for efficient message queries

88
fix-import-status.ts Normal file
View File

@ -0,0 +1,88 @@
import { PrismaClient, ProcessingStage, ProcessingStatus } from '@prisma/client';
import { ProcessingStatusManager } from './lib/processingStatusManager';
const prisma = new PrismaClient();
async function fixProcessingStatus() {
try {
console.log('=== FIXING PROCESSING STATUS (REFACTORED SYSTEM) ===\n');
// Check for any failed processing stages that might need retry
const failedSessions = await ProcessingStatusManager.getFailedSessions();
console.log(`Found ${failedSessions.length} failed processing stages`);
if (failedSessions.length > 0) {
console.log('\nFailed sessions by stage:');
const failuresByStage: Record<string, number> = {};
failedSessions.forEach(failure => {
failuresByStage[failure.stage] = (failuresByStage[failure.stage] || 0) + 1;
});
Object.entries(failuresByStage).forEach(([stage, count]) => {
console.log(` ${stage}: ${count} failures`);
});
// Show sample failed sessions
console.log('\nSample failed sessions:');
failedSessions.slice(0, 5).forEach(failure => {
console.log(` ${failure.session.import?.externalSessionId || failure.sessionId}: ${failure.stage} - ${failure.errorMessage}`);
});
// Ask if user wants to reset failed stages for retry
console.log('\nTo reset failed stages for retry, you can use:');
console.log('ProcessingStatusManager.resetStageForRetry(sessionId, stage)');
}
// Check for sessions that might be stuck in IN_PROGRESS
const stuckSessions = await prisma.sessionProcessingStatus.findMany({
where: {
status: ProcessingStatus.IN_PROGRESS,
startedAt: {
lt: new Date(Date.now() - 30 * 60 * 1000) // Started more than 30 minutes ago
}
},
include: {
session: {
include: {
import: true
}
}
}
});
if (stuckSessions.length > 0) {
console.log(`\nFound ${stuckSessions.length} sessions stuck in IN_PROGRESS state:`);
stuckSessions.forEach(stuck => {
console.log(` ${stuck.session.import?.externalSessionId || stuck.sessionId}: ${stuck.stage} (started: ${stuck.startedAt})`);
});
console.log('\nThese sessions may need to be reset to PENDING status for retry.');
}
// Show current pipeline status
console.log('\n=== CURRENT PIPELINE STATUS ===');
const pipelineStatus = await ProcessingStatusManager.getPipelineStatus();
const stages = ['CSV_IMPORT', 'TRANSCRIPT_FETCH', 'SESSION_CREATION', 'AI_ANALYSIS', 'QUESTION_EXTRACTION'];
for (const stage of stages) {
const stageData = pipelineStatus.pipeline[stage] || {};
const pending = stageData.PENDING || 0;
const inProgress = stageData.IN_PROGRESS || 0;
const completed = stageData.COMPLETED || 0;
const failed = stageData.FAILED || 0;
const skipped = stageData.SKIPPED || 0;
console.log(`${stage}: ${completed} completed, ${pending} pending, ${inProgress} in progress, ${failed} failed, ${skipped} skipped`);
}
} catch (error) {
console.error('Error fixing processing status:', error);
} finally {
await prisma.$disconnect();
}
}
fixProcessingStatus();

View File

@ -1,7 +1,8 @@
// SessionImport to Session processor // SessionImport to Session processor
import { PrismaClient, ImportStatus, SentimentCategory, SessionCategory } from "@prisma/client"; import { PrismaClient, SentimentCategory, SessionCategory, ProcessingStage } from "@prisma/client";
import { getSchedulerConfig } from "./env"; import { getSchedulerConfig } from "./env";
import { fetchTranscriptContent, isValidTranscriptUrl } from "./transcriptFetcher"; import { fetchTranscriptContent, isValidTranscriptUrl } from "./transcriptFetcher";
import { ProcessingStatusManager } from "./processingStatusManager";
import cron from "node-cron"; import cron from "node-cron";
const prisma = new PrismaClient(); const prisma = new PrismaClient();
@ -62,21 +63,130 @@ function parseFallbackBoolean(rawValue: string | null): boolean | null {
return ['true', '1', 'yes', 'escalated', 'forwarded'].includes(rawValue.toLowerCase()); return ['true', '1', 'yes', 'escalated', 'forwarded'].includes(rawValue.toLowerCase());
} }
/**
* Parse transcript content into Message records
*/
async function parseTranscriptIntoMessages(sessionId: string, transcriptContent: string): Promise<void> {
// Clear existing messages for this session
await prisma.message.deleteMany({
where: { sessionId }
});
// Split transcript into lines and parse each message
const lines = transcriptContent.split('\n').filter(line => line.trim());
let order = 0;
for (const line of lines) {
const trimmedLine = line.trim();
if (!trimmedLine) continue;
// Try to parse different formats:
// Format 1: "User: message" or "Assistant: message"
// Format 2: "[timestamp] User: message" or "[timestamp] Assistant: message"
let role = 'unknown';
let content = trimmedLine;
let timestamp: Date | null = null;
// Check for timestamp format: [DD.MM.YYYY HH:mm:ss] Role: content
const timestampMatch = trimmedLine.match(/^\[([^\]]+)\]\s*(.+)$/);
if (timestampMatch) {
try {
timestamp = parseEuropeanDate(timestampMatch[1]);
content = timestampMatch[2];
} catch (error) {
// If timestamp parsing fails, treat the whole line as content
content = trimmedLine;
}
}
// Extract role and message content
const roleMatch = content.match(/^(User|Assistant|System):\s*(.*)$/i);
if (roleMatch) {
role = roleMatch[1].toLowerCase();
content = roleMatch[2].trim();
} else {
// If no role prefix found, try to infer from context or use 'unknown'
role = 'unknown';
}
// Skip empty content
if (!content) continue;
// Create message record
await prisma.message.create({
data: {
sessionId,
timestamp,
role,
content,
order,
},
});
order++;
}
console.log(`[Import Processor] ✓ Parsed ${order} messages for session ${sessionId}`);
}
/** /**
* Process a single SessionImport record into a Session record * Process a single SessionImport record into a Session record
* NEW STRATEGY: Only copy minimal fields, let AI processing handle the rest * Uses new unified processing status tracking
*/ */
async function processSingleImport(importRecord: any): Promise<{ success: boolean; error?: string }> { async function processSingleImport(importRecord: any): Promise<{ success: boolean; error?: string }> {
let sessionId: string | null = null;
try { try {
// Parse dates using European format parser // Parse dates using European format parser
const startTime = parseEuropeanDate(importRecord.startTimeRaw); const startTime = parseEuropeanDate(importRecord.startTimeRaw);
const endTime = parseEuropeanDate(importRecord.endTimeRaw); const endTime = parseEuropeanDate(importRecord.endTimeRaw);
console.log(`[Import Processor] Parsed dates for ${importRecord.externalSessionId}: ${startTime.toISOString()} - ${endTime.toISOString()}`); console.log(`[Import Processor] Processing ${importRecord.externalSessionId}: ${startTime.toISOString()} - ${endTime.toISOString()}`);
// Fetch transcript content if URL is provided and not already fetched // Create or update Session record with MINIMAL processing
const session = await prisma.session.upsert({
where: {
importId: importRecord.id,
},
update: {
startTime,
endTime,
// Direct copies (minimal processing)
ipAddress: importRecord.ipAddress,
country: importRecord.countryCode, // Keep as country code
fullTranscriptUrl: importRecord.fullTranscriptUrl,
avgResponseTime: importRecord.avgResponseTimeSeconds,
initialMsg: importRecord.initialMessage,
},
create: {
companyId: importRecord.companyId,
importId: importRecord.id,
startTime,
endTime,
// Direct copies (minimal processing)
ipAddress: importRecord.ipAddress,
country: importRecord.countryCode, // Keep as country code
fullTranscriptUrl: importRecord.fullTranscriptUrl,
avgResponseTime: importRecord.avgResponseTimeSeconds,
initialMsg: importRecord.initialMessage,
},
});
sessionId = session.id;
// Initialize processing status for this session
await ProcessingStatusManager.initializeSession(sessionId);
// Mark CSV_IMPORT as completed
await ProcessingStatusManager.completeStage(sessionId, ProcessingStage.CSV_IMPORT);
// Handle transcript fetching
let transcriptContent = importRecord.rawTranscriptContent; let transcriptContent = importRecord.rawTranscriptContent;
if (!transcriptContent && importRecord.fullTranscriptUrl && isValidTranscriptUrl(importRecord.fullTranscriptUrl)) { if (!transcriptContent && importRecord.fullTranscriptUrl && isValidTranscriptUrl(importRecord.fullTranscriptUrl)) {
await ProcessingStatusManager.startStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH);
console.log(`[Import Processor] Fetching transcript for ${importRecord.externalSessionId}...`); console.log(`[Import Processor] Fetching transcript for ${importRecord.externalSessionId}...`);
// Get company credentials for transcript fetching // Get company credentials for transcript fetching
@ -100,94 +210,78 @@ async function processSingleImport(importRecord: any): Promise<{ success: boolea
where: { id: importRecord.id }, where: { id: importRecord.id },
data: { rawTranscriptContent: transcriptContent }, data: { rawTranscriptContent: transcriptContent },
}); });
await ProcessingStatusManager.completeStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, {
contentLength: transcriptContent?.length || 0,
url: importRecord.fullTranscriptUrl
});
} else { } else {
console.log(`[Import Processor] ⚠️ Failed to fetch transcript for ${importRecord.externalSessionId}: ${transcriptResult.error}`); console.log(`[Import Processor] ⚠️ Failed to fetch transcript for ${importRecord.externalSessionId}: ${transcriptResult.error}`);
await ProcessingStatusManager.failStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, transcriptResult.error || 'Unknown error');
} }
} } else if (!importRecord.fullTranscriptUrl) {
// No transcript URL available - skip this stage
// Create or update Session record with MINIMAL processing await ProcessingStatusManager.skipStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, 'No transcript URL provided');
// Only copy fields that don't need AI analysis } else {
const session = await prisma.session.upsert({ // Transcript already fetched
where: { await ProcessingStatusManager.completeStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, {
importId: importRecord.id, contentLength: transcriptContent?.length || 0,
}, source: 'already_fetched'
update: {
startTime,
endTime,
// Direct copies (minimal processing)
ipAddress: importRecord.ipAddress,
country: importRecord.countryCode, // Keep as country code
fullTranscriptUrl: importRecord.fullTranscriptUrl,
avgResponseTime: importRecord.avgResponseTimeSeconds,
initialMsg: importRecord.initialMessage,
// AI-processed fields: Leave empty, will be filled by AI processing
// language: null, // AI will detect
// messagesSent: null, // AI will count from Messages
// sentiment: null, // AI will analyze
// escalated: null, // AI will detect
// forwardedHr: null, // AI will detect
// category: null, // AI will categorize
// summary: null, // AI will generate
processed: false, // Will be processed later by AI
},
create: {
companyId: importRecord.companyId,
importId: importRecord.id,
startTime,
endTime,
// Direct copies (minimal processing)
ipAddress: importRecord.ipAddress,
country: importRecord.countryCode, // Keep as country code
fullTranscriptUrl: importRecord.fullTranscriptUrl,
avgResponseTime: importRecord.avgResponseTimeSeconds,
initialMsg: importRecord.initialMessage,
// AI-processed fields: Leave empty, will be filled by AI processing
// All these will be null initially and filled by AI
processed: false, // Will be processed later by AI
},
}); });
}
// Update import status to DONE // Handle session creation (parse messages)
await prisma.sessionImport.update({ await ProcessingStatusManager.startStage(sessionId, ProcessingStage.SESSION_CREATION);
where: { id: importRecord.id },
data: { if (transcriptContent) {
status: ImportStatus.DONE, await parseTranscriptIntoMessages(sessionId, transcriptContent);
processedAt: new Date(), }
errorMsg: null,
}, await ProcessingStatusManager.completeStage(sessionId, ProcessingStage.SESSION_CREATION, {
hasTranscript: !!transcriptContent,
transcriptLength: transcriptContent?.length || 0
}); });
return { success: true }; return { success: true };
} catch (error) { } catch (error) {
// Update import status to ERROR const errorMessage = error instanceof Error ? error.message : String(error);
await prisma.sessionImport.update({
where: { id: importRecord.id }, // Mark the current stage as failed if we have a sessionId
data: { if (sessionId) {
status: ImportStatus.ERROR, // Determine which stage failed based on the error
errorMsg: error instanceof Error ? error.message : String(error), if (errorMessage.includes('transcript') || errorMessage.includes('fetch')) {
}, await ProcessingStatusManager.failStage(sessionId, ProcessingStage.TRANSCRIPT_FETCH, errorMessage);
}); } else if (errorMessage.includes('message') || errorMessage.includes('parse')) {
await ProcessingStatusManager.failStage(sessionId, ProcessingStage.SESSION_CREATION, errorMessage);
} else {
// General failure - mark CSV_IMPORT as failed
await ProcessingStatusManager.failStage(sessionId, ProcessingStage.CSV_IMPORT, errorMessage);
}
}
return { return {
success: false, success: false,
error: error instanceof Error ? error.message : String(error), error: errorMessage,
}; };
} }
} }
/** /**
* Process queued SessionImport records into Session records * Process unprocessed SessionImport records into Session records
* Uses new processing status system to find imports that need processing
*/ */
export async function processQueuedImports(batchSize: number = 50): Promise<void> { export async function processQueuedImports(batchSize: number = 50): Promise<void> {
console.log('[Import Processor] Starting to process queued imports...'); console.log('[Import Processor] Starting to process unprocessed imports...');
// Find queued imports let totalSuccessCount = 0;
const queuedImports = await prisma.sessionImport.findMany({ let totalErrorCount = 0;
let batchNumber = 1;
while (true) {
// Find SessionImports that don't have a corresponding Session yet
const unprocessedImports = await prisma.sessionImport.findMany({
where: { where: {
status: ImportStatus.QUEUED, session: null, // No session created yet
}, },
take: batchSize, take: batchSize,
orderBy: { orderBy: {
@ -195,30 +289,44 @@ export async function processQueuedImports(batchSize: number = 50): Promise<void
}, },
}); });
if (queuedImports.length === 0) { if (unprocessedImports.length === 0) {
console.log('[Import Processor] No queued imports found'); if (batchNumber === 1) {
console.log('[Import Processor] No unprocessed imports found');
} else {
console.log(`[Import Processor] All batches completed. Total: ${totalSuccessCount} successful, ${totalErrorCount} failed`);
}
return; return;
} }
console.log(`[Import Processor] Processing ${queuedImports.length} queued imports...`); console.log(`[Import Processor] Processing batch ${batchNumber}: ${unprocessedImports.length} imports...`);
let successCount = 0; let batchSuccessCount = 0;
let errorCount = 0; let batchErrorCount = 0;
// Process each import // Process each import in this batch
for (const importRecord of queuedImports) { for (const importRecord of unprocessedImports) {
const result = await processSingleImport(importRecord); const result = await processSingleImport(importRecord);
if (result.success) { if (result.success) {
successCount++; batchSuccessCount++;
totalSuccessCount++;
console.log(`[Import Processor] ✓ Processed import ${importRecord.externalSessionId}`); console.log(`[Import Processor] ✓ Processed import ${importRecord.externalSessionId}`);
} else { } else {
errorCount++; batchErrorCount++;
totalErrorCount++;
console.log(`[Import Processor] ✗ Failed to process import ${importRecord.externalSessionId}: ${result.error}`); console.log(`[Import Processor] ✗ Failed to process import ${importRecord.externalSessionId}: ${result.error}`);
} }
} }
console.log(`[Import Processor] Completed: ${successCount} successful, ${errorCount} failed`); console.log(`[Import Processor] Batch ${batchNumber} completed: ${batchSuccessCount} successful, ${batchErrorCount} failed`);
batchNumber++;
// If this batch was smaller than the batch size, we're done
if (unprocessedImports.length < batchSize) {
console.log(`[Import Processor] All batches completed. Total: ${totalSuccessCount} successful, ${totalErrorCount} failed`);
return;
}
}
} }
/** /**

View File

@ -7,25 +7,62 @@ import { getSchedulerConfig } from "./schedulerConfig";
const prisma = new PrismaClient(); const prisma = new PrismaClient();
const OPENAI_API_KEY = process.env.OPENAI_API_KEY; const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions"; const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
const DEFAULT_MODEL = process.env.OPENAI_MODEL || "gpt-4o";
// Model pricing in USD (update as needed)
const MODEL_PRICING = {
'gpt-4o-2024-08-06': {
promptTokenCost: 0.0000025, // $2.50 per 1M tokens
completionTokenCost: 0.00001, // $10.00 per 1M tokens
},
'gpt-4-turbo': {
promptTokenCost: 0.00001, // $10.00 per 1M tokens
completionTokenCost: 0.00003, // $30.00 per 1M tokens
},
'gpt-4o': {
promptTokenCost: 0.000005, // $5.00 per 1M tokens
completionTokenCost: 0.000015, // $15.00 per 1M tokens
}
} as const;
const USD_TO_EUR_RATE = 0.85; // Update periodically or fetch from API const USD_TO_EUR_RATE = 0.85; // Update periodically or fetch from API
/**
* Get company's default AI model
*/
async function getCompanyAIModel(companyId: string): Promise<string> {
const companyModel = await prisma.companyAIModel.findFirst({
where: {
companyId,
isDefault: true,
},
include: {
aiModel: true,
},
});
return companyModel?.aiModel.name || DEFAULT_MODEL;
}
/**
* Get current pricing for an AI model
*/
async function getCurrentModelPricing(modelName: string): Promise<{
promptTokenCost: number;
completionTokenCost: number;
} | null> {
const model = await prisma.aIModel.findUnique({
where: { name: modelName },
include: {
pricing: {
where: {
effectiveFrom: { lte: new Date() },
OR: [
{ effectiveUntil: null },
{ effectiveUntil: { gte: new Date() } }
]
},
orderBy: { effectiveFrom: 'desc' },
take: 1,
},
},
});
if (!model || model.pricing.length === 0) {
return null;
}
const pricing = model.pricing[0];
return {
promptTokenCost: pricing.promptTokenCost,
completionTokenCost: pricing.completionTokenCost,
};
}
interface ProcessedData { interface ProcessedData {
language: string; language: string;
sentiment: "POSITIVE" | "NEUTRAL" | "NEGATIVE"; sentiment: "POSITIVE" | "NEUTRAL" | "NEGATIVE";
@ -53,10 +90,20 @@ async function recordAIProcessingRequest(
): Promise<void> { ): Promise<void> {
const usage = openaiResponse.usage; const usage = openaiResponse.usage;
const model = openaiResponse.model; const model = openaiResponse.model;
const pricing = MODEL_PRICING[model as keyof typeof MODEL_PRICING] || MODEL_PRICING['gpt-4-turbo']; // fallback
const promptCost = usage.prompt_tokens * pricing.promptTokenCost; // Get current pricing from database
const completionCost = usage.completion_tokens * pricing.completionTokenCost; const pricing = await getCurrentModelPricing(model);
// Fallback pricing if not found in database
const fallbackPricing = {
promptTokenCost: 0.00001, // $10.00 per 1M tokens (gpt-4-turbo rate)
completionTokenCost: 0.00003, // $30.00 per 1M tokens
};
const finalPricing = pricing || fallbackPricing;
const promptCost = usage.prompt_tokens * finalPricing.promptTokenCost;
const completionCost = usage.completion_tokens * finalPricing.completionTokenCost;
const totalCostUsd = promptCost + completionCost; const totalCostUsd = promptCost + completionCost;
const totalCostEur = totalCostUsd * USD_TO_EUR_RATE; const totalCostEur = totalCostUsd * USD_TO_EUR_RATE;
@ -80,8 +127,8 @@ async function recordAIProcessingRequest(
acceptedPredictionTokens: usage.completion_tokens_details?.accepted_prediction_tokens || null, acceptedPredictionTokens: usage.completion_tokens_details?.accepted_prediction_tokens || null,
rejectedPredictionTokens: usage.completion_tokens_details?.rejected_prediction_tokens || null, rejectedPredictionTokens: usage.completion_tokens_details?.rejected_prediction_tokens || null,
promptTokenCost: pricing.promptTokenCost, promptTokenCost: finalPricing.promptTokenCost,
completionTokenCost: pricing.completionTokenCost, completionTokenCost: finalPricing.completionTokenCost,
totalCostEur, totalCostEur,
processingType, processingType,
@ -177,11 +224,14 @@ async function calculateEndTime(sessionId: string, fallbackEndTime: Date): Promi
/** /**
* Processes a session transcript using OpenAI API * Processes a session transcript using OpenAI API
*/ */
async function processTranscriptWithOpenAI(sessionId: string, transcript: string): Promise<ProcessedData> { async function processTranscriptWithOpenAI(sessionId: string, transcript: string, companyId: string): Promise<ProcessedData> {
if (!OPENAI_API_KEY) { if (!OPENAI_API_KEY) {
throw new Error("OPENAI_API_KEY environment variable is not set"); throw new Error("OPENAI_API_KEY environment variable is not set");
} }
// Get company's AI model
const aiModel = await getCompanyAIModel(companyId);
// Updated system message with exact enum values // Updated system message with exact enum values
const systemMessage = ` const systemMessage = `
You are an AI assistant tasked with analyzing chat transcripts. You are an AI assistant tasked with analyzing chat transcripts.
@ -218,7 +268,7 @@ async function processTranscriptWithOpenAI(sessionId: string, transcript: string
Authorization: `Bearer ${OPENAI_API_KEY}`, Authorization: `Bearer ${OPENAI_API_KEY}`,
}, },
body: JSON.stringify({ body: JSON.stringify({
model: "gpt-4o", // Use latest model model: aiModel, // Use company's configured AI model
messages: [ messages: [
{ {
role: "system", role: "system",
@ -348,7 +398,7 @@ async function processSingleSession(session: any): Promise<ProcessingResult> {
) )
.join("\n"); .join("\n");
const processedData = await processTranscriptWithOpenAI(session.id, transcript); const processedData = await processTranscriptWithOpenAI(session.id, transcript, session.companyId);
// Calculate messagesSent from actual Message records // Calculate messagesSent from actual Message records
const messagesSent = await calculateMessagesSent(session.id); const messagesSent = await calculateMessagesSent(session.id);

View File

@ -0,0 +1,295 @@
import { PrismaClient, ProcessingStage, ProcessingStatus } from '@prisma/client';
const prisma = new PrismaClient();
/**
* Centralized processing status management
*/
export class ProcessingStatusManager {
/**
* Initialize processing status for a session with all stages set to PENDING
*/
static async initializeSession(sessionId: string): Promise<void> {
const stages = [
ProcessingStage.CSV_IMPORT,
ProcessingStage.TRANSCRIPT_FETCH,
ProcessingStage.SESSION_CREATION,
ProcessingStage.AI_ANALYSIS,
ProcessingStage.QUESTION_EXTRACTION,
];
// Create all processing status records for this session
await prisma.sessionProcessingStatus.createMany({
data: stages.map(stage => ({
sessionId,
stage,
status: ProcessingStatus.PENDING,
})),
skipDuplicates: true, // In case some already exist
});
}
/**
* Start a processing stage
*/
static async startStage(
sessionId: string,
stage: ProcessingStage,
metadata?: any
): Promise<void> {
await prisma.sessionProcessingStatus.upsert({
where: {
sessionId_stage: { sessionId, stage }
},
update: {
status: ProcessingStatus.IN_PROGRESS,
startedAt: new Date(),
errorMessage: null,
metadata: metadata || null,
},
create: {
sessionId,
stage,
status: ProcessingStatus.IN_PROGRESS,
startedAt: new Date(),
metadata: metadata || null,
},
});
}
/**
* Complete a processing stage successfully
*/
static async completeStage(
sessionId: string,
stage: ProcessingStage,
metadata?: any
): Promise<void> {
await prisma.sessionProcessingStatus.upsert({
where: {
sessionId_stage: { sessionId, stage }
},
update: {
status: ProcessingStatus.COMPLETED,
completedAt: new Date(),
errorMessage: null,
metadata: metadata || null,
},
create: {
sessionId,
stage,
status: ProcessingStatus.COMPLETED,
startedAt: new Date(),
completedAt: new Date(),
metadata: metadata || null,
},
});
}
/**
* Mark a processing stage as failed
*/
static async failStage(
sessionId: string,
stage: ProcessingStage,
errorMessage: string,
metadata?: any
): Promise<void> {
await prisma.sessionProcessingStatus.upsert({
where: {
sessionId_stage: { sessionId, stage }
},
update: {
status: ProcessingStatus.FAILED,
completedAt: new Date(),
errorMessage,
retryCount: { increment: 1 },
metadata: metadata || null,
},
create: {
sessionId,
stage,
status: ProcessingStatus.FAILED,
startedAt: new Date(),
completedAt: new Date(),
errorMessage,
retryCount: 1,
metadata: metadata || null,
},
});
}
/**
* Skip a processing stage (e.g., no transcript URL available)
*/
static async skipStage(
sessionId: string,
stage: ProcessingStage,
reason: string
): Promise<void> {
await prisma.sessionProcessingStatus.upsert({
where: {
sessionId_stage: { sessionId, stage }
},
update: {
status: ProcessingStatus.SKIPPED,
completedAt: new Date(),
errorMessage: reason,
},
create: {
sessionId,
stage,
status: ProcessingStatus.SKIPPED,
startedAt: new Date(),
completedAt: new Date(),
errorMessage: reason,
},
});
}
/**
* Get processing status for a specific session
*/
static async getSessionStatus(sessionId: string) {
return await prisma.sessionProcessingStatus.findMany({
where: { sessionId },
orderBy: { stage: 'asc' },
});
}
/**
* Get sessions that need processing for a specific stage
*/
static async getSessionsNeedingProcessing(
stage: ProcessingStage,
limit: number = 50
) {
return await prisma.sessionProcessingStatus.findMany({
where: {
stage,
status: ProcessingStatus.PENDING,
},
include: {
session: {
include: {
import: true,
company: true,
},
},
},
take: limit,
orderBy: { session: { createdAt: 'asc' } },
});
}
/**
* Get pipeline status overview
*/
static async getPipelineStatus() {
// Get counts by stage and status
const statusCounts = await prisma.sessionProcessingStatus.groupBy({
by: ['stage', 'status'],
_count: { id: true },
});
// Get total sessions
const totalSessions = await prisma.session.count();
// Organize the data
const pipeline: Record<string, Record<string, number>> = {};
for (const { stage, status, _count } of statusCounts) {
if (!pipeline[stage]) {
pipeline[stage] = {};
}
pipeline[stage][status] = _count.id;
}
return {
totalSessions,
pipeline,
};
}
/**
* Get sessions with failed processing
*/
static async getFailedSessions(stage?: ProcessingStage) {
const where: any = {
status: ProcessingStatus.FAILED,
};
if (stage) {
where.stage = stage;
}
return await prisma.sessionProcessingStatus.findMany({
where,
include: {
session: {
include: {
import: true,
},
},
},
orderBy: { completedAt: 'desc' },
});
}
/**
* Reset a failed stage for retry
*/
static async resetStageForRetry(sessionId: string, stage: ProcessingStage): Promise<void> {
await prisma.sessionProcessingStatus.update({
where: {
sessionId_stage: { sessionId, stage }
},
data: {
status: ProcessingStatus.PENDING,
startedAt: null,
completedAt: null,
errorMessage: null,
},
});
}
/**
* Check if a session has completed a specific stage
*/
static async hasCompletedStage(sessionId: string, stage: ProcessingStage): Promise<boolean> {
const status = await prisma.sessionProcessingStatus.findUnique({
where: {
sessionId_stage: { sessionId, stage }
},
});
return status?.status === ProcessingStatus.COMPLETED;
}
/**
* Check if a session is ready for a specific stage (previous stages completed)
*/
static async isReadyForStage(sessionId: string, stage: ProcessingStage): Promise<boolean> {
const stageOrder = [
ProcessingStage.CSV_IMPORT,
ProcessingStage.TRANSCRIPT_FETCH,
ProcessingStage.SESSION_CREATION,
ProcessingStage.AI_ANALYSIS,
ProcessingStage.QUESTION_EXTRACTION,
];
const currentStageIndex = stageOrder.indexOf(stage);
if (currentStageIndex === 0) return true; // First stage is always ready
// Check if all previous stages are completed
const previousStages = stageOrder.slice(0, currentStageIndex);
for (const prevStage of previousStages) {
const isCompleted = await this.hasCompletedStage(sessionId, prevStage);
if (!isCompleted) return false;
}
return true;
}
}

View File

@ -0,0 +1,129 @@
import { PrismaClient, ProcessingStage, ProcessingStatus } from '@prisma/client';
import { ProcessingStatusManager } from './lib/processingStatusManager';
const prisma = new PrismaClient();
async function migrateToRefactoredSystem() {
try {
console.log('=== MIGRATING TO REFACTORED PROCESSING SYSTEM ===\n');
// Get all existing sessions
const sessions = await prisma.session.findMany({
include: {
import: true,
messages: true,
sessionQuestions: true,
},
orderBy: { createdAt: 'asc' }
});
console.log(`Found ${sessions.length} sessions to migrate...\n`);
let migratedCount = 0;
for (const session of sessions) {
console.log(`Migrating session ${session.import?.externalSessionId || session.id}...`);
// Initialize processing status for this session
await ProcessingStatusManager.initializeSession(session.id);
// Determine the current state of each stage based on existing data
// 1. CSV_IMPORT - Always completed if session exists
await ProcessingStatusManager.completeStage(session.id, ProcessingStage.CSV_IMPORT, {
migratedFrom: 'existing_session',
importId: session.importId
});
// 2. TRANSCRIPT_FETCH - Check if transcript content exists
if (session.import?.rawTranscriptContent) {
await ProcessingStatusManager.completeStage(session.id, ProcessingStage.TRANSCRIPT_FETCH, {
migratedFrom: 'existing_transcript',
contentLength: session.import.rawTranscriptContent.length
});
} else if (!session.import?.fullTranscriptUrl) {
// No transcript URL - skip this stage
await ProcessingStatusManager.skipStage(session.id, ProcessingStage.TRANSCRIPT_FETCH, 'No transcript URL in original import');
} else {
// Has URL but no content - mark as pending for retry
console.log(` - Transcript fetch pending for ${session.import.externalSessionId}`);
}
// 3. SESSION_CREATION - Check if messages exist
if (session.messages.length > 0) {
await ProcessingStatusManager.completeStage(session.id, ProcessingStage.SESSION_CREATION, {
migratedFrom: 'existing_messages',
messageCount: session.messages.length
});
} else if (session.import?.rawTranscriptContent) {
// Has transcript but no messages - needs reprocessing
console.log(` - Session creation pending for ${session.import.externalSessionId} (has transcript but no messages)`);
} else {
// No transcript content - skip or mark as pending based on transcript fetch status
if (!session.import?.fullTranscriptUrl) {
await ProcessingStatusManager.skipStage(session.id, ProcessingStage.SESSION_CREATION, 'No transcript content available');
}
}
// 4. AI_ANALYSIS - Check if AI fields are populated
const hasAIAnalysis = session.summary || session.sentiment || session.category || session.language;
if (hasAIAnalysis) {
await ProcessingStatusManager.completeStage(session.id, ProcessingStage.AI_ANALYSIS, {
migratedFrom: 'existing_ai_analysis',
hasSummary: !!session.summary,
hasSentiment: !!session.sentiment,
hasCategory: !!session.category,
hasLanguage: !!session.language
});
} else {
// No AI analysis - mark as pending if session creation is complete
if (session.messages.length > 0) {
console.log(` - AI analysis pending for ${session.import?.externalSessionId}`);
}
}
// 5. QUESTION_EXTRACTION - Check if questions exist
if (session.sessionQuestions.length > 0) {
await ProcessingStatusManager.completeStage(session.id, ProcessingStage.QUESTION_EXTRACTION, {
migratedFrom: 'existing_questions',
questionCount: session.sessionQuestions.length
});
} else {
// No questions - mark as pending if AI analysis is complete
if (hasAIAnalysis) {
console.log(` - Question extraction pending for ${session.import?.externalSessionId}`);
}
}
migratedCount++;
if (migratedCount % 10 === 0) {
console.log(` Migrated ${migratedCount}/${sessions.length} sessions...`);
}
}
console.log(`\n✓ Successfully migrated ${migratedCount} sessions to the new processing system`);
// Show final status
console.log('\n=== MIGRATION COMPLETE - FINAL STATUS ===');
const pipelineStatus = await ProcessingStatusManager.getPipelineStatus();
const stages = ['CSV_IMPORT', 'TRANSCRIPT_FETCH', 'SESSION_CREATION', 'AI_ANALYSIS', 'QUESTION_EXTRACTION'];
for (const stage of stages) {
const stageData = pipelineStatus.pipeline[stage] || {};
const pending = stageData.PENDING || 0;
const completed = stageData.COMPLETED || 0;
const skipped = stageData.SKIPPED || 0;
console.log(`${stage}: ${completed} completed, ${pending} pending, ${skipped} skipped`);
}
} catch (error) {
console.error('Error migrating to refactored system:', error);
} finally {
await prisma.$disconnect();
}
}
migrateToRefactoredSystem();

View File

@ -0,0 +1,63 @@
-- CreateTable
CREATE TABLE "AIModel" (
"id" TEXT NOT NULL,
"name" TEXT NOT NULL,
"provider" TEXT NOT NULL,
"maxTokens" INTEGER,
"isActive" BOOLEAN NOT NULL DEFAULT true,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "AIModel_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "AIModelPricing" (
"id" TEXT NOT NULL,
"aiModelId" TEXT NOT NULL,
"promptTokenCost" DOUBLE PRECISION NOT NULL,
"completionTokenCost" DOUBLE PRECISION NOT NULL,
"effectiveFrom" TIMESTAMP(3) NOT NULL,
"effectiveUntil" TIMESTAMP(3),
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "AIModelPricing_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "CompanyAIModel" (
"id" TEXT NOT NULL,
"companyId" TEXT NOT NULL,
"aiModelId" TEXT NOT NULL,
"isDefault" BOOLEAN NOT NULL DEFAULT false,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "CompanyAIModel_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "AIModel_name_key" ON "AIModel"("name");
-- CreateIndex
CREATE INDEX "AIModel_provider_isActive_idx" ON "AIModel"("provider", "isActive");
-- CreateIndex
CREATE INDEX "AIModelPricing_aiModelId_effectiveFrom_idx" ON "AIModelPricing"("aiModelId", "effectiveFrom");
-- CreateIndex
CREATE INDEX "AIModelPricing_effectiveFrom_effectiveUntil_idx" ON "AIModelPricing"("effectiveFrom", "effectiveUntil");
-- CreateIndex
CREATE INDEX "CompanyAIModel_companyId_isDefault_idx" ON "CompanyAIModel"("companyId", "isDefault");
-- CreateIndex
CREATE UNIQUE INDEX "CompanyAIModel_companyId_aiModelId_key" ON "CompanyAIModel"("companyId", "aiModelId");
-- AddForeignKey
ALTER TABLE "AIModelPricing" ADD CONSTRAINT "AIModelPricing_aiModelId_fkey" FOREIGN KEY ("aiModelId") REFERENCES "AIModel"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "CompanyAIModel" ADD CONSTRAINT "CompanyAIModel_companyId_fkey" FOREIGN KEY ("companyId") REFERENCES "Company"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "CompanyAIModel" ADD CONSTRAINT "CompanyAIModel_aiModelId_fkey" FOREIGN KEY ("aiModelId") REFERENCES "AIModel"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@ -1,10 +1,12 @@
generator client { generator client {
provider = "prisma-client-js" provider = "prisma-client-js"
previewFeatures = ["driverAdapters"]
} }
datasource db { datasource db {
provider = "postgresql" provider = "postgresql"
url = env("DATABASE_URL") url = env("DATABASE_URL")
directUrl = env("DATABASE_URL_DIRECT")
} }
/** /**
@ -38,6 +40,22 @@ enum SessionCategory {
UNRECOGNIZED_OTHER UNRECOGNIZED_OTHER
} }
enum ProcessingStage {
CSV_IMPORT // SessionImport created
TRANSCRIPT_FETCH // Transcript content fetched
SESSION_CREATION // Session + Messages created
AI_ANALYSIS // AI processing completed
QUESTION_EXTRACTION // Questions extracted
}
enum ProcessingStatus {
PENDING
IN_PROGRESS
COMPLETED
FAILED
SKIPPED
}
/** /**
* COMPANY (multi-tenant root) * COMPANY (multi-tenant root)
*/ */
@ -53,6 +71,7 @@ model Company {
users User[] @relation("CompanyUsers") users User[] @relation("CompanyUsers")
sessions Session[] sessions Session[]
imports SessionImport[] imports SessionImport[]
companyAiModels CompanyAIModel[]
createdAt DateTime @default(now()) createdAt DateTime @default(now())
updatedAt DateTime @updatedAt updatedAt DateTime @updatedAt
@ -119,15 +138,13 @@ model Session {
// AI-generated fields // AI-generated fields
summary String? // AI-generated summary summary String? // AI-generated summary
// Processing metadata
processed Boolean @default(false)
/** /**
* Relationships * Relationships
*/ */
messages Message[] // Individual conversation messages messages Message[] // Individual conversation messages
sessionQuestions SessionQuestion[] // Questions asked in this session sessionQuestions SessionQuestion[] // Questions asked in this session
aiProcessingRequests AIProcessingRequest[] // AI processing cost tracking aiProcessingRequests AIProcessingRequest[] // AI processing cost tracking
processingStatus SessionProcessingStatus[] // Processing pipeline status
createdAt DateTime @default(now()) createdAt DateTime @default(now())
updatedAt DateTime @updatedAt updatedAt DateTime @updatedAt
@ -136,15 +153,8 @@ model Session {
} }
/** /**
* 2. Raw CSV row waiting to be processed ---------- * 2. Raw CSV row (pure data storage) ----------
*/ */
enum ImportStatus {
QUEUED
PROCESSING
DONE
ERROR
}
model SessionImport { model SessionImport {
id String @id @default(uuid()) id String @id @default(uuid())
company Company @relation(fields: [companyId], references: [id], onDelete: Cascade) company Company @relation(fields: [companyId], references: [id], onDelete: Cascade)
@ -177,13 +187,9 @@ model SessionImport {
rawTranscriptContent String? // Fetched content from fullTranscriptUrl rawTranscriptContent String? // Fetched content from fullTranscriptUrl
// ─── bookkeeping ───────────────────────────────── // ─── bookkeeping ─────────────────────────────────
status ImportStatus @default(QUEUED)
errorMsg String?
processedAt DateTime?
createdAt DateTime @default(now()) createdAt DateTime @default(now())
@@unique([companyId, externalSessionId]) // idempotent re-imports @@unique([companyId, externalSessionId]) // idempotent re-imports
@@index([status])
} }
/** /**
@ -206,6 +212,30 @@ model Message {
@@index([sessionId, order]) @@index([sessionId, order])
} }
/**
* UNIFIED PROCESSING STATUS TRACKING
*/
model SessionProcessingStatus {
id String @id @default(uuid())
sessionId String
stage ProcessingStage
status ProcessingStatus @default(PENDING)
startedAt DateTime?
completedAt DateTime?
errorMessage String?
retryCount Int @default(0)
// Stage-specific metadata (e.g., AI costs, token usage, fetch details)
metadata Json?
session Session @relation(fields: [sessionId], references: [id], onDelete: Cascade)
@@unique([sessionId, stage])
@@index([stage, status])
@@index([sessionId])
}
/** /**
* QUESTION MANAGEMENT (separate from Session for better analytics) * QUESTION MANAGEMENT (separate from Session for better analytics)
*/ */
@ -281,3 +311,66 @@ model AIProcessingRequest {
@@index([requestedAt]) @@index([requestedAt])
@@index([model]) @@index([model])
} }
/**
* AI MODEL MANAGEMENT SYSTEM
*/
/**
* AI Model definitions (without pricing)
*/
model AIModel {
id String @id @default(uuid())
name String @unique // "gpt-4o", "gpt-4-turbo", etc.
provider String // "openai", "anthropic", etc.
maxTokens Int? // Maximum tokens for this model
isActive Boolean @default(true)
// Relationships
pricing AIModelPricing[]
companyModels CompanyAIModel[]
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@index([provider, isActive])
}
/**
* Time-based pricing for AI models
*/
model AIModelPricing {
id String @id @default(uuid())
aiModelId String
promptTokenCost Float // Cost per prompt token in USD
completionTokenCost Float // Cost per completion token in USD
effectiveFrom DateTime // When this pricing becomes effective
effectiveUntil DateTime? // When this pricing expires (null = current)
// Relationships
aiModel AIModel @relation(fields: [aiModelId], references: [id], onDelete: Cascade)
createdAt DateTime @default(now())
@@index([aiModelId, effectiveFrom])
@@index([effectiveFrom, effectiveUntil])
}
/**
* Company-specific AI model assignments
*/
model CompanyAIModel {
id String @id @default(uuid())
companyId String
aiModelId String
isDefault Boolean @default(false) // Is this the default model for the company?
// Relationships
company Company @relation(fields: [companyId], references: [id], onDelete: Cascade)
aiModel AIModel @relation(fields: [aiModelId], references: [id], onDelete: Cascade)
createdAt DateTime @default(now())
@@unique([companyId, aiModelId]) // Prevent duplicate assignments
@@index([companyId, isDefault])
}

View File

@ -1,4 +1,4 @@
// seed.ts - Create initial admin user and company // seed.ts - Create initial admin user, company, and AI models
import { PrismaClient } from "@prisma/client"; import { PrismaClient } from "@prisma/client";
import bcrypt from "bcryptjs"; import bcrypt from "bcryptjs";
@ -6,30 +6,133 @@ const prisma = new PrismaClient();
async function main() { async function main() {
try { try {
// Create a company console.log("🌱 Starting database seeding...");
// Create the Jumbo company
const company = await prisma.company.create({ const company = await prisma.company.create({
data: { data: {
name: "Demo Company", name: "Jumbo Bas Bobbeldijk",
csvUrl: "https://proto.notso.ai/jumbo/chats", // Replace with a real URL if available csvUrl: "https://proto.notso.ai/jumbo/chats",
csvUsername: "jumboadmin",
csvPassword: "jumboadmin",
}, },
}); });
console.log(`✅ Created company: ${company.name}`);
// Create an admin user // Create admin user
const hashedPassword = await bcrypt.hash("admin123", 10); const hashedPassword = await bcrypt.hash("8QbL26tB7fWS", 10);
await prisma.user.create({ const adminUser = await prisma.user.create({
data: { data: {
email: "admin@demo.com", email: "max.kowalski.contact@gmail.com",
password: hashedPassword, password: hashedPassword,
role: "ADMIN", role: "ADMIN",
companyId: company.id, companyId: company.id,
}, },
}); });
console.log(`✅ Created admin user: ${adminUser.email}`);
// Create AI Models
const aiModels = [
{
name: "gpt-4o",
provider: "openai",
maxTokens: 128000,
isActive: true,
},
{
name: "gpt-4o-2024-08-06",
provider: "openai",
maxTokens: 128000,
isActive: true,
},
{
name: "gpt-4-turbo",
provider: "openai",
maxTokens: 128000,
isActive: true,
},
{
name: "gpt-4o-mini",
provider: "openai",
maxTokens: 128000,
isActive: true,
},
];
const createdModels: any[] = [];
for (const modelData of aiModels) {
const model = await prisma.aIModel.create({
data: modelData,
});
createdModels.push(model);
console.log(`✅ Created AI model: ${model.name}`);
}
// Create current pricing for AI models (as of December 2024)
const currentTime = new Date();
const pricingData = [
{
modelName: "gpt-4o",
promptTokenCost: 0.0000025, // $2.50 per 1M tokens
completionTokenCost: 0.00001, // $10.00 per 1M tokens
},
{
modelName: "gpt-4o-2024-08-06",
promptTokenCost: 0.0000025, // $2.50 per 1M tokens
completionTokenCost: 0.00001, // $10.00 per 1M tokens
},
{
modelName: "gpt-4-turbo",
promptTokenCost: 0.00001, // $10.00 per 1M tokens
completionTokenCost: 0.00003, // $30.00 per 1M tokens
},
{
modelName: "gpt-4o-mini",
promptTokenCost: 0.00000015, // $0.15 per 1M tokens
completionTokenCost: 0.0000006, // $0.60 per 1M tokens
},
];
for (const pricing of pricingData) {
const model = createdModels.find(m => m.name === pricing.modelName);
if (model) {
await prisma.aIModelPricing.create({
data: {
aiModelId: model.id,
promptTokenCost: pricing.promptTokenCost,
completionTokenCost: pricing.completionTokenCost,
effectiveFrom: currentTime,
effectiveUntil: null, // Current pricing
},
});
console.log(`✅ Created pricing for: ${model.name}`);
}
}
// Assign default AI model to company (gpt-4o)
const defaultModel = createdModels.find(m => m.name === "gpt-4o");
if (defaultModel) {
await prisma.companyAIModel.create({
data: {
companyId: company.id,
aiModelId: defaultModel.id,
isDefault: true,
},
});
console.log(`✅ Set default AI model for company: ${defaultModel.name}`);
}
console.log("\n🎉 Database seeding completed successfully!");
console.log("\n📋 Summary:");
console.log(`Company: ${company.name}`);
console.log(`Admin user: ${adminUser.email}`);
console.log(`Password: 8QbL26tB7fWS`);
console.log(`AI Models: ${createdModels.length} models created with current pricing`);
console.log(`Default model: ${defaultModel?.name}`);
console.log("\n🚀 Ready to start importing CSV data!");
console.log("Seed data created successfully:");
console.log("Company: Demo Company");
console.log("Admin user: admin@demo.com (password: admin123)");
} catch (error) { } catch (error) {
console.error("Error seeding database:", error); console.error("Error seeding database:", error);
process.exit(1); process.exit(1);
} finally { } finally {
await prisma.$disconnect(); await prisma.$disconnect();

17
test-ai-processing.ts Normal file
View File

@ -0,0 +1,17 @@
import { processUnprocessedSessions } from './lib/processingScheduler';
async function testAIProcessing() {
console.log('=== TESTING AI PROCESSING ===\n');
try {
// Process with batch size of 10 to test multiple batches (since we have 109 sessions)
await processUnprocessedSessions(10, 3); // batch size 10, max concurrency 3
console.log('\n=== AI PROCESSING COMPLETED ===');
} catch (error) {
console.error('Error during AI processing:', error);
}
}
testAIProcessing();

17
test-import-processing.ts Normal file
View File

@ -0,0 +1,17 @@
import { processQueuedImports } from './lib/importProcessor';
async function testImportProcessing() {
console.log('=== TESTING IMPORT PROCESSING ===\n');
try {
// Process with batch size of 50 to test multiple batches
await processQueuedImports(50);
console.log('\n=== IMPORT PROCESSING COMPLETED ===');
} catch (error) {
console.error('Error during import processing:', error);
}
}
testImportProcessing();