feat: Refactor data processing pipeline with AI cost tracking and enhanced session management

- Updated environment configuration to include Postgres database settings.
- Enhanced import processing to minimize field copying and rely on AI for analysis.
- Implemented detailed AI processing request tracking, including token usage and costs.
- Added new models for Question and SessionQuestion to manage user inquiries separately.
- Improved session processing scheduler with AI cost reporting functionality.
- Created a test script to validate the refactored pipeline and display processing statistics.
- Updated Prisma schema and migration files to reflect new database structure and relationships.
This commit is contained in:
Max Kowalski
2025-06-27 21:15:44 +02:00
parent 601e2e4026
commit 6f9ac219c2
10 changed files with 747 additions and 198 deletions

View File

@ -0,0 +1,183 @@
-- CreateTable
CREATE TABLE "Company" (
"id" TEXT NOT NULL PRIMARY KEY,
"name" TEXT NOT NULL,
"csvUrl" TEXT NOT NULL,
"csvUsername" TEXT,
"csvPassword" TEXT,
"sentimentAlert" REAL,
"dashboardOpts" JSONB,
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" DATETIME NOT NULL
);
-- CreateTable
CREATE TABLE "User" (
"id" TEXT NOT NULL PRIMARY KEY,
"email" TEXT NOT NULL,
"password" TEXT NOT NULL,
"role" TEXT NOT NULL DEFAULT 'USER',
"companyId" TEXT NOT NULL,
"resetToken" TEXT,
"resetTokenExpiry" DATETIME,
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" DATETIME NOT NULL,
CONSTRAINT "User_companyId_fkey" FOREIGN KEY ("companyId") REFERENCES "Company" ("id") ON DELETE CASCADE ON UPDATE CASCADE
);
-- CreateTable
CREATE TABLE "Session" (
"id" TEXT NOT NULL PRIMARY KEY,
"companyId" TEXT NOT NULL,
"importId" TEXT,
"startTime" DATETIME NOT NULL,
"endTime" DATETIME NOT NULL,
"ipAddress" TEXT,
"country" TEXT,
"fullTranscriptUrl" TEXT,
"avgResponseTime" REAL,
"initialMsg" TEXT,
"language" TEXT,
"messagesSent" INTEGER,
"sentiment" TEXT,
"escalated" BOOLEAN,
"forwardedHr" BOOLEAN,
"category" TEXT,
"summary" TEXT,
"processed" BOOLEAN NOT NULL DEFAULT false,
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" DATETIME NOT NULL,
CONSTRAINT "Session_companyId_fkey" FOREIGN KEY ("companyId") REFERENCES "Company" ("id") ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT "Session_importId_fkey" FOREIGN KEY ("importId") REFERENCES "SessionImport" ("id") ON DELETE SET NULL ON UPDATE CASCADE
);
-- CreateTable
CREATE TABLE "SessionImport" (
"id" TEXT NOT NULL PRIMARY KEY,
"companyId" TEXT NOT NULL,
"externalSessionId" TEXT NOT NULL,
"startTimeRaw" TEXT NOT NULL,
"endTimeRaw" TEXT NOT NULL,
"ipAddress" TEXT,
"countryCode" TEXT,
"language" TEXT,
"messagesSent" INTEGER,
"sentimentRaw" TEXT,
"escalatedRaw" TEXT,
"forwardedHrRaw" TEXT,
"fullTranscriptUrl" TEXT,
"avgResponseTimeSeconds" REAL,
"tokens" INTEGER,
"tokensEur" REAL,
"category" TEXT,
"initialMessage" TEXT,
"rawTranscriptContent" TEXT,
"status" TEXT NOT NULL DEFAULT 'QUEUED',
"errorMsg" TEXT,
"processedAt" DATETIME,
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "SessionImport_companyId_fkey" FOREIGN KEY ("companyId") REFERENCES "Company" ("id") ON DELETE CASCADE ON UPDATE CASCADE
);
-- CreateTable
CREATE TABLE "Message" (
"id" TEXT NOT NULL PRIMARY KEY,
"sessionId" TEXT NOT NULL,
"timestamp" DATETIME,
"role" TEXT NOT NULL,
"content" TEXT NOT NULL,
"order" INTEGER NOT NULL,
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "Message_sessionId_fkey" FOREIGN KEY ("sessionId") REFERENCES "Session" ("id") ON DELETE CASCADE ON UPDATE CASCADE
);
-- CreateTable
CREATE TABLE "Question" (
"id" TEXT NOT NULL PRIMARY KEY,
"content" TEXT NOT NULL,
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
);
-- CreateTable
CREATE TABLE "SessionQuestion" (
"id" TEXT NOT NULL PRIMARY KEY,
"sessionId" TEXT NOT NULL,
"questionId" TEXT NOT NULL,
"order" INTEGER NOT NULL,
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "SessionQuestion_sessionId_fkey" FOREIGN KEY ("sessionId") REFERENCES "Session" ("id") ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT "SessionQuestion_questionId_fkey" FOREIGN KEY ("questionId") REFERENCES "Question" ("id") ON DELETE RESTRICT ON UPDATE CASCADE
);
-- CreateTable
CREATE TABLE "AIProcessingRequest" (
"id" TEXT NOT NULL PRIMARY KEY,
"sessionId" TEXT NOT NULL,
"openaiRequestId" TEXT,
"model" TEXT NOT NULL,
"serviceTier" TEXT,
"systemFingerprint" TEXT,
"promptTokens" INTEGER NOT NULL,
"completionTokens" INTEGER NOT NULL,
"totalTokens" INTEGER NOT NULL,
"cachedTokens" INTEGER,
"audioTokensPrompt" INTEGER,
"reasoningTokens" INTEGER,
"audioTokensCompletion" INTEGER,
"acceptedPredictionTokens" INTEGER,
"rejectedPredictionTokens" INTEGER,
"promptTokenCost" REAL NOT NULL,
"completionTokenCost" REAL NOT NULL,
"totalCostEur" REAL NOT NULL,
"processingType" TEXT NOT NULL,
"success" BOOLEAN NOT NULL,
"errorMessage" TEXT,
"requestedAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
"completedAt" DATETIME,
CONSTRAINT "AIProcessingRequest_sessionId_fkey" FOREIGN KEY ("sessionId") REFERENCES "Session" ("id") ON DELETE CASCADE ON UPDATE CASCADE
);
-- CreateIndex
CREATE UNIQUE INDEX "User_email_key" ON "User"("email");
-- CreateIndex
CREATE UNIQUE INDEX "Session_importId_key" ON "Session"("importId");
-- CreateIndex
CREATE INDEX "Session_companyId_startTime_idx" ON "Session"("companyId", "startTime");
-- CreateIndex
CREATE UNIQUE INDEX "SessionImport_externalSessionId_key" ON "SessionImport"("externalSessionId");
-- CreateIndex
CREATE INDEX "SessionImport_status_idx" ON "SessionImport"("status");
-- CreateIndex
CREATE UNIQUE INDEX "SessionImport_companyId_externalSessionId_key" ON "SessionImport"("companyId", "externalSessionId");
-- CreateIndex
CREATE INDEX "Message_sessionId_order_idx" ON "Message"("sessionId", "order");
-- CreateIndex
CREATE UNIQUE INDEX "Message_sessionId_order_key" ON "Message"("sessionId", "order");
-- CreateIndex
CREATE UNIQUE INDEX "Question_content_key" ON "Question"("content");
-- CreateIndex
CREATE INDEX "SessionQuestion_sessionId_idx" ON "SessionQuestion"("sessionId");
-- CreateIndex
CREATE UNIQUE INDEX "SessionQuestion_sessionId_questionId_key" ON "SessionQuestion"("sessionId", "questionId");
-- CreateIndex
CREATE UNIQUE INDEX "SessionQuestion_sessionId_order_key" ON "SessionQuestion"("sessionId", "order");
-- CreateIndex
CREATE INDEX "AIProcessingRequest_sessionId_idx" ON "AIProcessingRequest"("sessionId");
-- CreateIndex
CREATE INDEX "AIProcessingRequest_requestedAt_idx" ON "AIProcessingRequest"("requestedAt");
-- CreateIndex
CREATE INDEX "AIProcessingRequest_model_idx" ON "AIProcessingRequest"("model");

View File

@ -0,0 +1,3 @@
# Please do not edit this file manually
# It should be added in your version-control system (e.g., Git)
provider = "sqlite"

View File

@ -22,6 +22,22 @@ enum SentimentCategory {
NEGATIVE
}
enum SessionCategory {
SCHEDULE_HOURS
LEAVE_VACATION
SICK_LEAVE_RECOVERY
SALARY_COMPENSATION
CONTRACT_HOURS
ONBOARDING
OFFBOARDING
WORKWEAR_STAFF_PASS
TEAM_CONTACTS
PERSONAL_QUESTIONS
ACCESS_LOGIN
SOCIAL_QUESTIONS
UNRECOGNIZED_OTHER
}
/**
* COMPANY (multi-tenant root)
*/
@ -85,31 +101,33 @@ model Session {
startTime DateTime
endTime DateTime
// Processed fields from SessionImport data
// Direct copies from SessionImport (minimal processing)
ipAddress String?
country String? // processed from countryCode
language String? // processed from language
messagesSent Int?
sentiment Float? // processed from sentimentRaw
sentimentCategory SentimentCategory?
escalated Boolean?
forwardedHr Boolean?
country String? // from countryCode
fullTranscriptUrl String?
avgResponseTime Float? // processed from avgResponseTimeSeconds
tokens Int?
tokensEur Float?
category String?
initialMsg String? // processed from initialMessage
avgResponseTime Float? // from avgResponseTimeSeconds
initialMsg String? // from initialMessage
// AI-processed fields (calculated from Messages or AI analysis)
language String? // AI-detected from Messages
messagesSent Int? // Calculated from Message count
sentiment SentimentCategory? // AI-analyzed (changed from Float to enum)
escalated Boolean? // AI-detected
forwardedHr Boolean? // AI-detected
category SessionCategory? // AI-categorized (changed to enum)
// AI-generated fields
summary String? // AI-generated summary
// Processing metadata
processed Boolean @default(false)
questions String? // JSON array of extracted questions
summary String? // AI-generated summary
/**
* ---------- the missing opposite side ----------
* Relationships
*/
messages Message[] // <-- satisfies Message.session
messages Message[] // Individual conversation messages
sessionQuestions SessionQuestion[] // Questions asked in this session
aiProcessingRequests AIProcessingRequest[] // AI processing cost tracking
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@ -187,3 +205,79 @@ model Message {
@@unique([sessionId, order]) // guards against duplicate order values
@@index([sessionId, order])
}
/**
* QUESTION MANAGEMENT (separate from Session for better analytics)
*/
model Question {
id String @id @default(uuid())
content String @unique // The actual question text
createdAt DateTime @default(now())
// Relationships
sessionQuestions SessionQuestion[]
}
model SessionQuestion {
id String @id @default(uuid())
sessionId String
questionId String
order Int // Order within the session
createdAt DateTime @default(now())
// Relationships
session Session @relation(fields: [sessionId], references: [id], onDelete: Cascade)
question Question @relation(fields: [questionId], references: [id])
@@unique([sessionId, questionId]) // Prevent duplicate questions per session
@@unique([sessionId, order]) // Ensure unique ordering
@@index([sessionId])
}
/**
* AI PROCESSING COST TRACKING
*/
model AIProcessingRequest {
id String @id @default(uuid())
sessionId String
// OpenAI Request Details
openaiRequestId String? // "chatcmpl-Bn8IH9UM8t7luZVWnwZG7CVJ0kjPo"
model String // "gpt-4o-2024-08-06"
serviceTier String? // "default"
systemFingerprint String? // "fp_07871e2ad8"
// Token Usage (from usage object)
promptTokens Int // 11
completionTokens Int // 9
totalTokens Int // 20
// Detailed Token Breakdown
cachedTokens Int? // prompt_tokens_details.cached_tokens
audioTokensPrompt Int? // prompt_tokens_details.audio_tokens
reasoningTokens Int? // completion_tokens_details.reasoning_tokens
audioTokensCompletion Int? // completion_tokens_details.audio_tokens
acceptedPredictionTokens Int? // completion_tokens_details.accepted_prediction_tokens
rejectedPredictionTokens Int? // completion_tokens_details.rejected_prediction_tokens
// Cost Calculation
promptTokenCost Float // Cost per prompt token (varies by model)
completionTokenCost Float // Cost per completion token (varies by model)
totalCostEur Float // Calculated total cost in EUR
// Processing Context
processingType String // "session_analysis", "reprocessing", etc.
success Boolean // Whether the request succeeded
errorMessage String? // If failed, what went wrong
// Timestamps
requestedAt DateTime @default(now())
completedAt DateTime?
// Relationships
session Session @relation(fields: [sessionId], references: [id], onDelete: Cascade)
@@index([sessionId])
@@index([requestedAt])
@@index([model])
}