mirror of
https://github.com/kjanat/livedash-node.git
synced 2026-01-16 12:12:09 +01:00
Refactor transcript fetching and processing scripts
- Introduced a new function `fetchTranscriptContent` to handle fetching transcripts with optional authentication. - Enhanced error handling and logging for transcript fetching. - Updated the `parseTranscriptToMessages` function to improve message parsing logic. - Replaced the old session processing logic with a new approach that utilizes `SessionImport` records. - Removed obsolete scripts related to manual triggers and whitespace fixing. - Updated the server initialization to remove direct server handling, transitioning to a more modular approach. - Improved overall code structure and readability across various scripts.
This commit is contained in:
@ -1,73 +0,0 @@
|
||||
// Script to check what's in the transcript files
|
||||
// Usage: node scripts/check-transcript-content.js
|
||||
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
import fetch from 'node-fetch';
|
||||
|
||||
const prisma = new PrismaClient();
|
||||
|
||||
async function checkTranscriptContent() {
|
||||
try {
|
||||
// Get a few sessions without messages
|
||||
const sessions = await prisma.session.findMany({
|
||||
where: {
|
||||
AND: [
|
||||
{ fullTranscriptUrl: { not: null } },
|
||||
{ messages: { none: {} } },
|
||||
]
|
||||
},
|
||||
include: { company: true },
|
||||
take: 3,
|
||||
});
|
||||
|
||||
for (const session of sessions) {
|
||||
console.log(`\n📄 Checking session ${session.id}:`);
|
||||
console.log(` URL: ${session.fullTranscriptUrl}`);
|
||||
|
||||
try {
|
||||
const authHeader = session.company.csvUsername && session.company.csvPassword
|
||||
? "Basic " + Buffer.from(`${session.company.csvUsername}:${session.company.csvPassword}`).toString("base64")
|
||||
: undefined;
|
||||
|
||||
const response = await fetch(session.fullTranscriptUrl, {
|
||||
headers: authHeader ? { Authorization: authHeader } : {},
|
||||
timeout: 10000,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.log(` ❌ HTTP ${response.status}: ${response.statusText}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const content = await response.text();
|
||||
console.log(` 📏 Content length: ${content.length} characters`);
|
||||
|
||||
if (content.length === 0) {
|
||||
console.log(` ⚠️ Empty file`);
|
||||
} else if (content.length < 100) {
|
||||
console.log(` 📝 Full content: "${content}"`);
|
||||
} else {
|
||||
console.log(` 📝 First 200 chars: "${content.substring(0, 200)}..."`);
|
||||
}
|
||||
|
||||
// Check if it matches our expected format
|
||||
const lines = content.split('\n').filter(line => line.trim());
|
||||
const formatMatches = lines.filter(line =>
|
||||
line.match(/^\[([^\]]+)\]\s*([^:]+):\s*(.+)$/)
|
||||
);
|
||||
|
||||
console.log(` 🔍 Lines total: ${lines.length}, Format matches: ${formatMatches.length}`);
|
||||
|
||||
} catch (error) {
|
||||
console.log(` ❌ Error: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Error:', error);
|
||||
} finally {
|
||||
await prisma.$disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
checkTranscriptContent();
|
||||
@ -1,185 +0,0 @@
|
||||
// Script to fetch transcripts and parse them into messages
|
||||
// Usage: node scripts/fetch-and-parse-transcripts.js
|
||||
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
import fetch from 'node-fetch';
|
||||
|
||||
const prisma = new PrismaClient();
|
||||
|
||||
/**
|
||||
* Fetches transcript content from a URL
|
||||
*/
|
||||
async function fetchTranscriptContent(url, username, password) {
|
||||
try {
|
||||
const authHeader = username && password
|
||||
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
|
||||
: undefined;
|
||||
|
||||
const response = await fetch(url, {
|
||||
headers: authHeader ? { Authorization: authHeader } : {},
|
||||
timeout: 10000,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.log(`❌ Failed to fetch ${url}: ${response.status} ${response.statusText}`);
|
||||
return null;
|
||||
}
|
||||
return await response.text();
|
||||
} catch (error) {
|
||||
console.log(`❌ Error fetching ${url}: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses transcript content into messages
|
||||
*/
|
||||
function parseTranscriptToMessages(transcript, sessionId) {
|
||||
if (!transcript || transcript.trim() === '') {
|
||||
return [];
|
||||
}
|
||||
|
||||
const lines = transcript.split('\n').filter(line => line.trim());
|
||||
const messages = [];
|
||||
let messageOrder = 0;
|
||||
let currentTimestamp = new Date();
|
||||
|
||||
for (const line of lines) {
|
||||
// Try format 1: [DD-MM-YYYY HH:MM:SS] Role: Content
|
||||
const timestampMatch = line.match(/^\[([^\]]+)\]\s*([^:]+):\s*(.+)$/);
|
||||
|
||||
if (timestampMatch) {
|
||||
const [, timestamp, role, content] = timestampMatch;
|
||||
|
||||
// Parse timestamp (DD-MM-YYYY HH:MM:SS)
|
||||
const dateMatch = timestamp.match(/^(\d{1,2})-(\d{1,2})-(\d{4}) (\d{1,2}):(\d{1,2}):(\d{1,2})$/);
|
||||
let parsedTimestamp = new Date();
|
||||
|
||||
if (dateMatch) {
|
||||
const [, day, month, year, hour, minute, second] = dateMatch;
|
||||
parsedTimestamp = new Date(
|
||||
parseInt(year),
|
||||
parseInt(month) - 1, // Month is 0-indexed
|
||||
parseInt(day),
|
||||
parseInt(hour),
|
||||
parseInt(minute),
|
||||
parseInt(second)
|
||||
);
|
||||
}
|
||||
|
||||
messages.push({
|
||||
sessionId,
|
||||
role: role.trim().toLowerCase(),
|
||||
content: content.trim(),
|
||||
timestamp: parsedTimestamp,
|
||||
order: messageOrder++,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try format 2: Role: Content (simple format)
|
||||
const simpleMatch = line.match(/^([^:]+):\s*(.+)$/);
|
||||
|
||||
if (simpleMatch) {
|
||||
const [, role, content] = simpleMatch;
|
||||
|
||||
// Use incremental timestamps (add 1 minute per message)
|
||||
currentTimestamp = new Date(currentTimestamp.getTime() + 60000);
|
||||
|
||||
messages.push({
|
||||
sessionId,
|
||||
role: role.trim().toLowerCase(),
|
||||
content: content.trim(),
|
||||
timestamp: new Date(currentTimestamp),
|
||||
order: messageOrder++,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process sessions without messages
|
||||
*/
|
||||
async function fetchAndParseTranscripts() {
|
||||
try {
|
||||
console.log('🔍 Finding sessions without messages...\n');
|
||||
|
||||
// Get sessions that have fullTranscriptUrl but no messages
|
||||
const sessionsWithoutMessages = await prisma.session.findMany({
|
||||
where: {
|
||||
AND: [
|
||||
{ fullTranscriptUrl: { not: null } },
|
||||
{ messages: { none: {} } }, // No messages
|
||||
]
|
||||
},
|
||||
include: {
|
||||
company: true,
|
||||
},
|
||||
take: 20, // Process 20 at a time to avoid overwhelming
|
||||
});
|
||||
|
||||
if (sessionsWithoutMessages.length === 0) {
|
||||
console.log('✅ All sessions with transcript URLs already have messages!');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`📥 Found ${sessionsWithoutMessages.length} sessions to process\n`);
|
||||
|
||||
let successCount = 0;
|
||||
let errorCount = 0;
|
||||
|
||||
for (const session of sessionsWithoutMessages) {
|
||||
console.log(`📄 Processing session ${session.id.substring(0, 8)}...`);
|
||||
|
||||
try {
|
||||
// Fetch transcript content
|
||||
const transcriptContent = await fetchTranscriptContent(
|
||||
session.fullTranscriptUrl,
|
||||
session.company.csvUsername,
|
||||
session.company.csvPassword
|
||||
);
|
||||
|
||||
if (!transcriptContent) {
|
||||
console.log(` ⚠️ No transcript content available`);
|
||||
errorCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse transcript into messages
|
||||
const messages = parseTranscriptToMessages(transcriptContent, session.id);
|
||||
|
||||
if (messages.length === 0) {
|
||||
console.log(` ⚠️ No messages found in transcript`);
|
||||
errorCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Save messages to database
|
||||
await prisma.message.createMany({
|
||||
data: messages,
|
||||
});
|
||||
|
||||
console.log(` ✅ Added ${messages.length} messages`);
|
||||
successCount++;
|
||||
|
||||
} catch (error) {
|
||||
console.log(` ❌ Error: ${error.message}`);
|
||||
errorCount++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n📊 Results:`);
|
||||
console.log(` ✅ Successfully processed: ${successCount} sessions`);
|
||||
console.log(` ❌ Failed to process: ${errorCount} sessions`);
|
||||
console.log(`\n💡 Now you can run the processing scheduler to analyze these sessions!`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Error:', error);
|
||||
} finally {
|
||||
await prisma.$disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
fetchAndParseTranscripts();
|
||||
@ -1,83 +1,182 @@
|
||||
import { PrismaClient } from "@prisma/client";
|
||||
import fetch from "node-fetch";
|
||||
|
||||
const prisma = new PrismaClient();
|
||||
|
||||
async function main() {
|
||||
console.log("Starting to fetch missing transcripts...");
|
||||
/**
|
||||
* Fetches transcript content from a URL with optional authentication
|
||||
*/
|
||||
async function fetchTranscriptContent(
|
||||
url: string,
|
||||
username?: string,
|
||||
password?: string
|
||||
): Promise<string | null> {
|
||||
try {
|
||||
const authHeader =
|
||||
username && password
|
||||
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
|
||||
: undefined;
|
||||
|
||||
const sessionsToUpdate = await prisma.session.findMany({
|
||||
const response = await fetch(url, {
|
||||
headers: authHeader ? { Authorization: authHeader } : {},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.warn(`Failed to fetch transcript from ${url}: ${response.statusText}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
return await response.text();
|
||||
} catch (error) {
|
||||
console.warn(`Error fetching transcript from ${url}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse transcript content into individual messages
|
||||
*/
|
||||
function parseTranscriptToMessages(transcriptContent: string): Array<{
|
||||
timestamp: Date | null;
|
||||
role: string;
|
||||
content: string;
|
||||
order: number;
|
||||
}> {
|
||||
const lines = transcriptContent.split('\n').filter(line => line.trim());
|
||||
const messages: Array<{
|
||||
timestamp: Date | null;
|
||||
role: string;
|
||||
content: string;
|
||||
order: number;
|
||||
}> = [];
|
||||
|
||||
let order = 0;
|
||||
|
||||
for (const line of lines) {
|
||||
// Try to parse lines in format: [timestamp] role: content
|
||||
const match = line.match(/^\[([^\]]+)\]\s*([^:]+):\s*(.+)$/);
|
||||
|
||||
if (match) {
|
||||
const [, timestampStr, role, content] = match;
|
||||
|
||||
// Try to parse the timestamp
|
||||
let timestamp: Date | null = null;
|
||||
try {
|
||||
timestamp = new Date(timestampStr);
|
||||
if (isNaN(timestamp.getTime())) {
|
||||
timestamp = null;
|
||||
}
|
||||
} catch {
|
||||
timestamp = null;
|
||||
}
|
||||
|
||||
messages.push({
|
||||
timestamp,
|
||||
role: role.trim(),
|
||||
content: content.trim(),
|
||||
order: order++,
|
||||
});
|
||||
} else {
|
||||
// If line doesn't match expected format, treat as content continuation
|
||||
if (messages.length > 0) {
|
||||
messages[messages.length - 1].content += '\n' + line;
|
||||
} else {
|
||||
// First line doesn't match format, create a generic message
|
||||
messages.push({
|
||||
timestamp: null,
|
||||
role: 'unknown',
|
||||
content: line,
|
||||
order: order++,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main function to fetch transcripts for sessions that don't have messages yet
|
||||
*/
|
||||
async function fetchTranscriptsForSessions() {
|
||||
console.log("Starting to fetch transcripts for sessions without messages...");
|
||||
|
||||
// Find sessions that have transcript URLs but no messages
|
||||
const sessionsNeedingTranscripts = await prisma.session.findMany({
|
||||
where: {
|
||||
AND: [
|
||||
{ fullTranscriptUrl: { not: null } },
|
||||
{ fullTranscriptUrl: { not: "" } }, // Ensure URL is not an empty string
|
||||
{ transcriptContent: null },
|
||||
{ messages: { none: {} } }, // No messages yet
|
||||
],
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
fullTranscriptUrl: true,
|
||||
include: {
|
||||
company: true,
|
||||
messages: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (sessionsToUpdate.length === 0) {
|
||||
console.log("No sessions found requiring transcript fetching.");
|
||||
if (sessionsNeedingTranscripts.length === 0) {
|
||||
console.log("No sessions found that need transcript fetching.");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Found ${sessionsToUpdate.length} sessions to update.`);
|
||||
console.log(`Found ${sessionsNeedingTranscripts.length} sessions that need transcript fetching.`);
|
||||
let successCount = 0;
|
||||
let errorCount = 0;
|
||||
|
||||
for (const session of sessionsToUpdate) {
|
||||
for (const session of sessionsNeedingTranscripts) {
|
||||
if (!session.fullTranscriptUrl) {
|
||||
// Should not happen due to query, but good for type safety
|
||||
console.warn(`Session ${session.id} has no fullTranscriptUrl, skipping.`);
|
||||
console.warn(`Session ${session.id} has no transcript URL, skipping.`);
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(
|
||||
`Fetching transcript for session ${session.id} from ${session.fullTranscriptUrl}...`
|
||||
);
|
||||
console.log(`Fetching transcript for session ${session.id}...`);
|
||||
|
||||
try {
|
||||
const response = await fetch(session.fullTranscriptUrl);
|
||||
if (!response.ok) {
|
||||
console.error(
|
||||
`Failed to fetch transcript for session ${session.id}: ${response.status} ${response.statusText}`
|
||||
);
|
||||
const errorBody = await response.text();
|
||||
console.error(`Error details: ${errorBody.substring(0, 500)}`); // Log first 500 chars of error
|
||||
errorCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const transcriptText = await response.text();
|
||||
|
||||
if (transcriptText.trim() === "") {
|
||||
console.warn(
|
||||
`Fetched empty transcript for session ${session.id}. Storing as empty string.`
|
||||
);
|
||||
}
|
||||
|
||||
await prisma.session.update({
|
||||
where: { id: session.id },
|
||||
data: { transcriptContent: transcriptText },
|
||||
});
|
||||
console.log(
|
||||
`Successfully fetched and stored transcript for session ${session.id}.`
|
||||
// Fetch transcript content
|
||||
const transcriptContent = await fetchTranscriptContent(
|
||||
session.fullTranscriptUrl,
|
||||
session.company.csvUsername || undefined,
|
||||
session.company.csvPassword || undefined
|
||||
);
|
||||
|
||||
if (!transcriptContent) {
|
||||
throw new Error("Failed to fetch transcript content");
|
||||
}
|
||||
|
||||
// Parse transcript into messages
|
||||
const messages = parseTranscriptToMessages(transcriptContent);
|
||||
|
||||
if (messages.length === 0) {
|
||||
throw new Error("No messages found in transcript");
|
||||
}
|
||||
|
||||
// Create messages in database
|
||||
await prisma.message.createMany({
|
||||
data: messages.map(msg => ({
|
||||
sessionId: session.id,
|
||||
timestamp: msg.timestamp,
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
order: msg.order,
|
||||
})),
|
||||
});
|
||||
|
||||
console.log(`Successfully fetched transcript for session ${session.id} (${messages.length} messages)`);
|
||||
successCount++;
|
||||
} catch (error) {
|
||||
console.error(`Error processing session ${session.id}:`, error);
|
||||
console.error(`Error fetching transcript for session ${session.id}:`, error);
|
||||
errorCount++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log("Transcript fetching complete.");
|
||||
console.log(`Successfully updated: ${successCount} sessions.`);
|
||||
console.log(`Failed to update: ${errorCount} sessions.`);
|
||||
console.log(`Successfully fetched: ${successCount} transcripts.`);
|
||||
console.log(`Failed to fetch: ${errorCount} transcripts.`);
|
||||
}
|
||||
|
||||
main()
|
||||
// Run the main function
|
||||
fetchTranscriptsForSessions()
|
||||
.catch((e) => {
|
||||
console.error("An error occurred during the script execution:", e);
|
||||
process.exitCode = 1;
|
||||
|
||||
@ -1,68 +0,0 @@
|
||||
// Fix Trailing Whitespace
|
||||
// This script removes trailing whitespace from specified file types
|
||||
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// Configure which file types to process
|
||||
const fileTypes = [".ts", ".tsx", ".js", ".jsx", ".json", ".md", ".css"];
|
||||
|
||||
// Configure directories to ignore
|
||||
const ignoreDirs = ["node_modules", ".next", ".git", "out", "build", "dist"];
|
||||
|
||||
// Recursively process directories
|
||||
async function processDirectory(dir) {
|
||||
try {
|
||||
const files = await fs.promises.readdir(dir, { withFileTypes: true });
|
||||
|
||||
for (const file of files) {
|
||||
const fullPath = path.join(dir, file.name);
|
||||
|
||||
// Skip ignored directories
|
||||
if (file.isDirectory()) {
|
||||
if (!ignoreDirs.includes(file.name)) {
|
||||
await processDirectory(fullPath);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process only files with matching extensions
|
||||
const ext = path.extname(file.name);
|
||||
if (!fileTypes.includes(ext)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
// Read and process the file
|
||||
const content = await fs.promises.readFile(fullPath, "utf8");
|
||||
|
||||
// Remove trailing whitespace from each line
|
||||
const processedContent = content
|
||||
.split("\n")
|
||||
.map((line) => line.replace(/\s+$/, ""))
|
||||
.join("\n");
|
||||
|
||||
// Only write if changes were made
|
||||
if (processedContent !== content) {
|
||||
await fs.promises.writeFile(fullPath, processedContent, "utf8");
|
||||
console.log(`Fixed trailing whitespace in ${fullPath}`);
|
||||
}
|
||||
} catch (fileError) {
|
||||
console.error(`Error processing file ${fullPath}:`, fileError);
|
||||
}
|
||||
}
|
||||
} catch (dirError) {
|
||||
console.error(`Error reading directory ${dir}:`, dirError);
|
||||
}
|
||||
}
|
||||
|
||||
// Start processing from root directory
|
||||
const rootDir = process.cwd();
|
||||
console.log(`Starting whitespace cleanup from ${rootDir}`);
|
||||
processDirectory(rootDir)
|
||||
.then(() => console.log("Whitespace cleanup completed"))
|
||||
.catch((err) => console.error("Error in whitespace cleanup:", err));
|
||||
@ -1,38 +0,0 @@
|
||||
// Simple script to test the manual processing trigger
|
||||
// Usage: node scripts/manual-trigger-test.js
|
||||
|
||||
import fetch from 'node-fetch';
|
||||
|
||||
async function testManualTrigger() {
|
||||
try {
|
||||
console.log('Testing manual processing trigger...');
|
||||
|
||||
const response = await fetch('http://localhost:3000/api/admin/trigger-processing', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
// Note: In a real scenario, you'd need to include authentication cookies
|
||||
// For testing, you might need to login first and copy the session cookie
|
||||
},
|
||||
body: JSON.stringify({
|
||||
batchSize: 5, // Process max 5 sessions
|
||||
maxConcurrency: 3 // Use 3 concurrent workers
|
||||
})
|
||||
});
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
if (response.ok) {
|
||||
console.log('✅ Manual trigger successful:');
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
} else {
|
||||
console.log('❌ Manual trigger failed:');
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Error testing manual trigger:', error.message);
|
||||
}
|
||||
}
|
||||
|
||||
testManualTrigger();
|
||||
@ -1,243 +0,0 @@
|
||||
// Manual trigger scripts for both schedulers
|
||||
import { fetchAndStoreSessionsForAllCompanies } from "../lib/csvFetcher.js";
|
||||
import { processAllUnparsedTranscripts } from "../lib/transcriptParser.js";
|
||||
import { PrismaClient } from "@prisma/client";
|
||||
import fetch from "node-fetch";
|
||||
import { readFileSync } from "fs";
|
||||
import { fileURLToPath } from "url";
|
||||
import { dirname, join } from "path";
|
||||
|
||||
// Load environment variables from .env.local
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
const envPath = join(__dirname, '..', '.env.local');
|
||||
|
||||
try {
|
||||
const envFile = readFileSync(envPath, 'utf8');
|
||||
const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
|
||||
|
||||
envVars.forEach(line => {
|
||||
const [key, ...valueParts] = line.split('=');
|
||||
if (key && valueParts.length > 0) {
|
||||
const value = valueParts.join('=').trim();
|
||||
if (!process.env[key.trim()]) {
|
||||
process.env[key.trim()] = value;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
console.log("✅ Environment variables loaded from .env.local");
|
||||
} catch (error) {
|
||||
console.warn("⚠️ Could not load .env.local file:", error.message);
|
||||
}
|
||||
|
||||
const prisma = new PrismaClient();
|
||||
|
||||
/**
|
||||
* Manually trigger the session refresh scheduler
|
||||
*/
|
||||
async function triggerSessionRefresh() {
|
||||
console.log("=== Manual Session Refresh Trigger ===");
|
||||
try {
|
||||
await fetchAndStoreSessionsForAllCompanies();
|
||||
console.log("✅ Session refresh completed successfully");
|
||||
} catch (error) {
|
||||
console.error("❌ Session refresh failed:", error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually trigger the processing scheduler
|
||||
*/
|
||||
async function triggerProcessingScheduler() {
|
||||
console.log("=== Manual Processing Scheduler Trigger ===");
|
||||
|
||||
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
|
||||
if (!OPENAI_API_KEY) {
|
||||
console.error("❌ OPENAI_API_KEY environment variable is not set");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// Find sessions that need processing
|
||||
const sessionsToProcess = await prisma.session.findMany({
|
||||
where: {
|
||||
AND: [
|
||||
{ messages: { some: {} } },
|
||||
{
|
||||
OR: [
|
||||
{ processed: false },
|
||||
{ processed: null }
|
||||
]
|
||||
}
|
||||
],
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
processed: true,
|
||||
},
|
||||
take: 5, // Process 5 sessions for manual testing
|
||||
});
|
||||
|
||||
console.log(`Found ${sessionsToProcess.length} sessions to process:`);
|
||||
sessionsToProcess.forEach((session) => {
|
||||
console.log(`- Session ${session.id}: processed=${session.processed}`);
|
||||
});
|
||||
|
||||
if (sessionsToProcess.length === 0) {
|
||||
console.log("✅ No sessions found requiring processing");
|
||||
return;
|
||||
}
|
||||
|
||||
// Import and run the processing function
|
||||
const { processUnprocessedSessions } = await import(
|
||||
"../lib/processingScheduler.js"
|
||||
);
|
||||
await processUnprocessedSessions();
|
||||
|
||||
console.log("✅ Processing scheduler completed");
|
||||
} catch (error) {
|
||||
console.error("❌ Processing scheduler failed:", error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually trigger transcript parsing
|
||||
*/
|
||||
async function triggerTranscriptParsing() {
|
||||
console.log("=== Manual Transcript Parsing Trigger ===");
|
||||
try {
|
||||
const result = await processAllUnparsedTranscripts();
|
||||
console.log(
|
||||
`✅ Transcript parsing completed: ${result.processed} processed, ${result.errors} errors`
|
||||
);
|
||||
} catch (error) {
|
||||
console.error("❌ Transcript parsing failed:", error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Show current processing status
|
||||
*/
|
||||
async function showProcessingStatus() {
|
||||
console.log("=== Processing Status ===");
|
||||
|
||||
try {
|
||||
const totalSessions = await prisma.session.count();
|
||||
const processedSessions = await prisma.session.count({
|
||||
where: { processed: true },
|
||||
});
|
||||
const unprocessedSessions = await prisma.session.count({
|
||||
where: {
|
||||
OR: [
|
||||
{ processed: false },
|
||||
{ processed: null }
|
||||
]
|
||||
},
|
||||
});
|
||||
const withMessages = await prisma.session.count({
|
||||
where: {
|
||||
messages: {
|
||||
some: {},
|
||||
},
|
||||
},
|
||||
});
|
||||
const readyForProcessing = await prisma.session.count({
|
||||
where: {
|
||||
AND: [
|
||||
{ messages: { some: {} } },
|
||||
{
|
||||
OR: [
|
||||
{ processed: false },
|
||||
{ processed: null }
|
||||
]
|
||||
}
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`📊 Total sessions: ${totalSessions}`);
|
||||
console.log(`✅ Processed sessions: ${processedSessions}`);
|
||||
console.log(`⏳ Unprocessed sessions: ${unprocessedSessions}`);
|
||||
console.log(`📄 Sessions with messages: ${withMessages}`);
|
||||
console.log(`🔄 Ready for processing: ${readyForProcessing}`);
|
||||
|
||||
// Show some examples of unprocessed sessions
|
||||
if (readyForProcessing > 0) {
|
||||
console.log("\n📋 Sample unprocessed sessions:");
|
||||
const samples = await prisma.session.findMany({
|
||||
where: {
|
||||
AND: [
|
||||
{ messages: { some: {} } },
|
||||
{
|
||||
OR: [
|
||||
{ processed: false },
|
||||
{ processed: null }
|
||||
]
|
||||
}
|
||||
],
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
processed: true,
|
||||
startTime: true,
|
||||
},
|
||||
take: 3,
|
||||
});
|
||||
|
||||
samples.forEach((session) => {
|
||||
console.log(
|
||||
`- ${session.id} (${session.startTime.toISOString()}) - processed: ${session.processed}`
|
||||
);
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("❌ Failed to get processing status:", error);
|
||||
}
|
||||
}
|
||||
|
||||
// Main execution based on command line argument
|
||||
const command = process.argv[2];
|
||||
|
||||
switch (command) {
|
||||
case "refresh":
|
||||
await triggerSessionRefresh();
|
||||
break;
|
||||
case "process":
|
||||
await triggerProcessingScheduler();
|
||||
break;
|
||||
case "parse":
|
||||
await triggerTranscriptParsing();
|
||||
break;
|
||||
case "status":
|
||||
await showProcessingStatus();
|
||||
break;
|
||||
case "both":
|
||||
await triggerSessionRefresh();
|
||||
console.log("\n" + "=".repeat(50) + "\n");
|
||||
await triggerProcessingScheduler();
|
||||
break;
|
||||
case "all":
|
||||
await triggerSessionRefresh();
|
||||
console.log("\n" + "=".repeat(50) + "\n");
|
||||
await triggerTranscriptParsing();
|
||||
console.log("\n" + "=".repeat(50) + "\n");
|
||||
await triggerProcessingScheduler();
|
||||
break;
|
||||
default:
|
||||
console.log("Usage: node scripts/manual-triggers.js [command]");
|
||||
console.log("Commands:");
|
||||
console.log(
|
||||
" refresh - Trigger session refresh (fetch new sessions from CSV)"
|
||||
);
|
||||
console.log(" parse - Parse transcripts into structured messages");
|
||||
console.log(
|
||||
" process - Trigger processing scheduler (process unprocessed sessions)"
|
||||
);
|
||||
console.log(" status - Show current processing status");
|
||||
console.log(" both - Run both refresh and processing");
|
||||
console.log(" all - Run refresh, parse, and processing in sequence");
|
||||
break;
|
||||
}
|
||||
|
||||
await prisma.$disconnect();
|
||||
@ -1,283 +0,0 @@
|
||||
// Script to manually process unprocessed sessions with OpenAI
|
||||
import { PrismaClient } from "@prisma/client";
|
||||
import fetch from "node-fetch";
|
||||
|
||||
const prisma = new PrismaClient();
|
||||
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
|
||||
const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
|
||||
|
||||
/**
|
||||
* Processes a session transcript using OpenAI API
|
||||
* @param {string} sessionId The session ID
|
||||
* @param {string} transcript The transcript content to process
|
||||
* @returns {Promise<Object>} Processed data from OpenAI
|
||||
*/
|
||||
async function processTranscriptWithOpenAI(sessionId, transcript) {
|
||||
if (!OPENAI_API_KEY) {
|
||||
throw new Error("OPENAI_API_KEY environment variable is not set");
|
||||
}
|
||||
|
||||
// Create a system message with instructions
|
||||
const systemMessage = `
|
||||
You are an AI assistant tasked with analyzing chat transcripts.
|
||||
Extract the following information from the transcript:
|
||||
1. The primary language used by the user (ISO 639-1 code)
|
||||
2. Number of messages sent by the user
|
||||
3. Overall sentiment (positive, neutral, or negative)
|
||||
4. Whether the conversation was escalated
|
||||
5. Whether HR contact was mentioned or provided
|
||||
6. The best-fitting category for the conversation from this list:
|
||||
- Schedule & Hours
|
||||
- Leave & Vacation
|
||||
- Sick Leave & Recovery
|
||||
- Salary & Compensation
|
||||
- Contract & Hours
|
||||
- Onboarding
|
||||
- Offboarding
|
||||
- Workwear & Staff Pass
|
||||
- Team & Contacts
|
||||
- Personal Questions
|
||||
- Access & Login
|
||||
- Social questions
|
||||
- Unrecognized / Other
|
||||
7. Up to 5 paraphrased questions asked by the user (in English)
|
||||
8. A brief summary of the conversation (10-300 characters)
|
||||
|
||||
Return the data in JSON format matching this schema:
|
||||
{
|
||||
"language": "ISO 639-1 code",
|
||||
"messages_sent": number,
|
||||
"sentiment": "positive|neutral|negative",
|
||||
"escalated": boolean,
|
||||
"forwarded_hr": boolean,
|
||||
"category": "one of the categories listed above",
|
||||
"questions": ["question 1", "question 2", ...],
|
||||
"summary": "brief summary",
|
||||
"session_id": "${sessionId}"
|
||||
}
|
||||
`;
|
||||
|
||||
try {
|
||||
const response = await fetch(OPENAI_API_URL, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${OPENAI_API_KEY}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "gpt-4-turbo",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: systemMessage,
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: transcript,
|
||||
},
|
||||
],
|
||||
temperature: 0.3, // Lower temperature for more consistent results
|
||||
response_format: { type: "json_object" },
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`OpenAI API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const processedData = JSON.parse(data.choices[0].message.content);
|
||||
|
||||
// Validate the response against our expected schema
|
||||
validateOpenAIResponse(processedData);
|
||||
|
||||
return processedData;
|
||||
} catch (error) {
|
||||
console.error(`Error processing transcript with OpenAI:`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates the OpenAI response against our expected schema
|
||||
* @param {Object} data The data to validate
|
||||
*/
|
||||
function validateOpenAIResponse(data) {
|
||||
// Check required fields
|
||||
const requiredFields = [
|
||||
"language",
|
||||
"messages_sent",
|
||||
"sentiment",
|
||||
"escalated",
|
||||
"forwarded_hr",
|
||||
"category",
|
||||
"questions",
|
||||
"summary",
|
||||
"session_id",
|
||||
];
|
||||
|
||||
for (const field of requiredFields) {
|
||||
if (!(field in data)) {
|
||||
throw new Error(`Missing required field: ${field}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Validate field types
|
||||
if (typeof data.language !== "string" || !/^[a-z]{2}$/.test(data.language)) {
|
||||
throw new Error(
|
||||
"Invalid language format. Expected ISO 639-1 code (e.g., 'en')"
|
||||
);
|
||||
}
|
||||
|
||||
if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
|
||||
throw new Error("Invalid messages_sent. Expected non-negative number");
|
||||
}
|
||||
|
||||
if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
|
||||
throw new Error(
|
||||
"Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
|
||||
);
|
||||
}
|
||||
|
||||
if (typeof data.escalated !== "boolean") {
|
||||
throw new Error("Invalid escalated. Expected boolean");
|
||||
}
|
||||
|
||||
if (typeof data.forwarded_hr !== "boolean") {
|
||||
throw new Error("Invalid forwarded_hr. Expected boolean");
|
||||
}
|
||||
|
||||
const validCategories = [
|
||||
"Schedule & Hours",
|
||||
"Leave & Vacation",
|
||||
"Sick Leave & Recovery",
|
||||
"Salary & Compensation",
|
||||
"Contract & Hours",
|
||||
"Onboarding",
|
||||
"Offboarding",
|
||||
"Workwear & Staff Pass",
|
||||
"Team & Contacts",
|
||||
"Personal Questions",
|
||||
"Access & Login",
|
||||
"Social questions",
|
||||
"Unrecognized / Other",
|
||||
];
|
||||
|
||||
if (!validCategories.includes(data.category)) {
|
||||
throw new Error(
|
||||
`Invalid category. Expected one of: ${validCategories.join(", ")}`
|
||||
);
|
||||
}
|
||||
|
||||
if (!Array.isArray(data.questions)) {
|
||||
throw new Error("Invalid questions. Expected array of strings");
|
||||
}
|
||||
|
||||
if (
|
||||
typeof data.summary !== "string" ||
|
||||
data.summary.length < 10 ||
|
||||
data.summary.length > 300
|
||||
) {
|
||||
throw new Error(
|
||||
"Invalid summary. Expected string between 10-300 characters"
|
||||
);
|
||||
}
|
||||
|
||||
if (typeof data.session_id !== "string") {
|
||||
throw new Error("Invalid session_id. Expected string");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main function to process unprocessed sessions
|
||||
*/
|
||||
async function processUnprocessedSessions() {
|
||||
console.log("Starting to process unprocessed sessions...");
|
||||
|
||||
// Find sessions that have transcript content but haven't been processed
|
||||
const sessionsToProcess = await prisma.session.findMany({
|
||||
where: {
|
||||
AND: [
|
||||
{ transcriptContent: { not: null } },
|
||||
{ transcriptContent: { not: "" } },
|
||||
{ processed: { not: true } }, // Either false or null
|
||||
],
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
transcriptContent: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (sessionsToProcess.length === 0) {
|
||||
console.log("No sessions found requiring processing.");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Found ${sessionsToProcess.length} sessions to process.`);
|
||||
let successCount = 0;
|
||||
let errorCount = 0;
|
||||
|
||||
for (const session of sessionsToProcess) {
|
||||
if (!session.transcriptContent) {
|
||||
// Should not happen due to query, but good for type safety
|
||||
console.warn(
|
||||
`Session ${session.id} has no transcript content, skipping.`
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(`Processing transcript for session ${session.id}...`);
|
||||
try {
|
||||
const processedData = await processTranscriptWithOpenAI(
|
||||
session.id,
|
||||
session.transcriptContent
|
||||
);
|
||||
|
||||
// Map sentiment string to float value for compatibility with existing data
|
||||
const sentimentMap = {
|
||||
positive: 0.8,
|
||||
neutral: 0.0,
|
||||
negative: -0.8,
|
||||
};
|
||||
|
||||
// Update the session with processed data
|
||||
await prisma.session.update({
|
||||
where: { id: session.id },
|
||||
data: {
|
||||
language: processedData.language,
|
||||
messagesSent: processedData.messages_sent,
|
||||
sentiment: sentimentMap[processedData.sentiment] || 0,
|
||||
sentimentCategory: processedData.sentiment,
|
||||
escalated: processedData.escalated,
|
||||
forwardedHr: processedData.forwarded_hr,
|
||||
category: processedData.category,
|
||||
questions: JSON.stringify(processedData.questions),
|
||||
summary: processedData.summary,
|
||||
processed: true,
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`Successfully processed session ${session.id}.`);
|
||||
successCount++;
|
||||
} catch (error) {
|
||||
console.error(`Error processing session ${session.id}:`, error);
|
||||
errorCount++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log("Session processing complete.");
|
||||
console.log(`Successfully processed: ${successCount} sessions.`);
|
||||
console.log(`Failed to process: ${errorCount} sessions.`);
|
||||
}
|
||||
|
||||
// Run the main function
|
||||
processUnprocessedSessions()
|
||||
.catch((e) => {
|
||||
console.error("An error occurred during the script execution:", e);
|
||||
process.exitCode = 1;
|
||||
})
|
||||
.finally(async () => {
|
||||
await prisma.$disconnect();
|
||||
});
|
||||
@ -18,11 +18,37 @@ interface OpenAIProcessedData {
|
||||
session_id: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches transcript content from a URL
|
||||
*/
|
||||
async function fetchTranscriptContent(
|
||||
url: string,
|
||||
username?: string,
|
||||
password?: string
|
||||
): Promise<string | null> {
|
||||
try {
|
||||
const authHeader =
|
||||
username && password
|
||||
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
|
||||
: undefined;
|
||||
|
||||
const response = await fetch(url, {
|
||||
headers: authHeader ? { Authorization: authHeader } : {},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.warn(`Failed to fetch transcript from ${url}: ${response.statusText}`);
|
||||
return null;
|
||||
}
|
||||
return await response.text();
|
||||
} catch (error) {
|
||||
console.warn(`Error fetching transcript from ${url}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a session transcript using OpenAI API
|
||||
* @param sessionId The session ID
|
||||
* @param transcript The transcript content to process
|
||||
* @returns Processed data from OpenAI
|
||||
*/
|
||||
async function processTranscriptWithOpenAI(
|
||||
sessionId: string,
|
||||
@ -32,7 +58,6 @@ async function processTranscriptWithOpenAI(
|
||||
throw new Error("OPENAI_API_KEY environment variable is not set");
|
||||
}
|
||||
|
||||
// Create a system message with instructions
|
||||
const systemMessage = `
|
||||
You are an AI assistant tasked with analyzing chat transcripts.
|
||||
Extract the following information from the transcript:
|
||||
@ -91,7 +116,7 @@ async function processTranscriptWithOpenAI(
|
||||
content: transcript,
|
||||
},
|
||||
],
|
||||
temperature: 0.3, // Lower temperature for more consistent results
|
||||
temperature: 0.3,
|
||||
response_format: { type: "json_object" },
|
||||
}),
|
||||
});
|
||||
@ -104,9 +129,7 @@ async function processTranscriptWithOpenAI(
|
||||
const data = (await response.json()) as any;
|
||||
const processedData = JSON.parse(data.choices[0].message.content);
|
||||
|
||||
// Validate the response against our expected schema
|
||||
validateOpenAIResponse(processedData);
|
||||
|
||||
return processedData;
|
||||
} catch (error) {
|
||||
console.error(`Error processing transcript with OpenAI:`, error);
|
||||
@ -116,22 +139,11 @@ async function processTranscriptWithOpenAI(
|
||||
|
||||
/**
|
||||
* Validates the OpenAI response against our expected schema
|
||||
* @param data The data to validate
|
||||
*/
|
||||
function validateOpenAIResponse(
|
||||
data: any
|
||||
): asserts data is OpenAIProcessedData {
|
||||
// Check required fields
|
||||
function validateOpenAIResponse(data: any): asserts data is OpenAIProcessedData {
|
||||
const requiredFields = [
|
||||
"language",
|
||||
"messages_sent",
|
||||
"sentiment",
|
||||
"escalated",
|
||||
"forwarded_hr",
|
||||
"category",
|
||||
"questions",
|
||||
"summary",
|
||||
"session_id",
|
||||
"language", "messages_sent", "sentiment", "escalated",
|
||||
"forwarded_hr", "category", "questions", "summary", "session_id"
|
||||
];
|
||||
|
||||
for (const field of requiredFields) {
|
||||
@ -140,11 +152,8 @@ function validateOpenAIResponse(
|
||||
}
|
||||
}
|
||||
|
||||
// Validate field types
|
||||
if (typeof data.language !== "string" || !/^[a-z]{2}$/.test(data.language)) {
|
||||
throw new Error(
|
||||
"Invalid language format. Expected ISO 639-1 code (e.g., 'en')"
|
||||
);
|
||||
throw new Error("Invalid language format. Expected ISO 639-1 code (e.g., 'en')");
|
||||
}
|
||||
|
||||
if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
|
||||
@ -152,9 +161,7 @@ function validateOpenAIResponse(
|
||||
}
|
||||
|
||||
if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
|
||||
throw new Error(
|
||||
"Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
|
||||
);
|
||||
throw new Error("Invalid sentiment. Expected 'positive', 'neutral', or 'negative'");
|
||||
}
|
||||
|
||||
if (typeof data.escalated !== "boolean") {
|
||||
@ -166,39 +173,22 @@ function validateOpenAIResponse(
|
||||
}
|
||||
|
||||
const validCategories = [
|
||||
"Schedule & Hours",
|
||||
"Leave & Vacation",
|
||||
"Sick Leave & Recovery",
|
||||
"Salary & Compensation",
|
||||
"Contract & Hours",
|
||||
"Onboarding",
|
||||
"Offboarding",
|
||||
"Workwear & Staff Pass",
|
||||
"Team & Contacts",
|
||||
"Personal Questions",
|
||||
"Access & Login",
|
||||
"Social questions",
|
||||
"Unrecognized / Other",
|
||||
"Schedule & Hours", "Leave & Vacation", "Sick Leave & Recovery",
|
||||
"Salary & Compensation", "Contract & Hours", "Onboarding", "Offboarding",
|
||||
"Workwear & Staff Pass", "Team & Contacts", "Personal Questions",
|
||||
"Access & Login", "Social questions", "Unrecognized / Other"
|
||||
];
|
||||
|
||||
if (!validCategories.includes(data.category)) {
|
||||
throw new Error(
|
||||
`Invalid category. Expected one of: ${validCategories.join(", ")}`
|
||||
);
|
||||
throw new Error(`Invalid category. Expected one of: ${validCategories.join(", ")}`);
|
||||
}
|
||||
|
||||
if (!Array.isArray(data.questions)) {
|
||||
throw new Error("Invalid questions. Expected array of strings");
|
||||
}
|
||||
|
||||
if (
|
||||
typeof data.summary !== "string" ||
|
||||
data.summary.length < 10 ||
|
||||
data.summary.length > 300
|
||||
) {
|
||||
throw new Error(
|
||||
"Invalid summary. Expected string between 10-300 characters"
|
||||
);
|
||||
if (typeof data.summary !== "string" || data.summary.length < 10 || data.summary.length > 300) {
|
||||
throw new Error("Invalid summary. Expected string between 10-300 characters");
|
||||
}
|
||||
|
||||
if (typeof data.session_id !== "string") {
|
||||
@ -207,86 +197,146 @@ function validateOpenAIResponse(
|
||||
}
|
||||
|
||||
/**
|
||||
* Main function to process unprocessed sessions
|
||||
* Main function to process SessionImport records that need processing
|
||||
*/
|
||||
async function processUnprocessedSessions() {
|
||||
console.log("Starting to process unprocessed sessions...");
|
||||
console.log("Starting to process unprocessed SessionImport records...");
|
||||
|
||||
// Find sessions that have transcript content but haven't been processed
|
||||
const sessionsToProcess = await prisma.session.findMany({
|
||||
// Find SessionImport records that are QUEUED and have transcript URLs
|
||||
const importsToProcess = await prisma.sessionImport.findMany({
|
||||
where: {
|
||||
AND: [
|
||||
{ transcriptContent: { not: null } },
|
||||
{ transcriptContent: { not: "" } },
|
||||
{ processed: { not: true } }, // Either false or null
|
||||
],
|
||||
status: "QUEUED",
|
||||
fullTranscriptUrl: { not: null },
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
transcriptContent: true,
|
||||
include: {
|
||||
company: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (sessionsToProcess.length === 0) {
|
||||
console.log("No sessions found requiring processing.");
|
||||
if (importsToProcess.length === 0) {
|
||||
console.log("No SessionImport records found requiring processing.");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Found ${sessionsToProcess.length} sessions to process.`);
|
||||
console.log(`Found ${importsToProcess.length} SessionImport records to process.`);
|
||||
let successCount = 0;
|
||||
let errorCount = 0;
|
||||
|
||||
for (const session of sessionsToProcess) {
|
||||
if (!session.transcriptContent) {
|
||||
// Should not happen due to query, but good for type safety
|
||||
console.warn(
|
||||
`Session ${session.id} has no transcript content, skipping.`
|
||||
);
|
||||
for (const importRecord of importsToProcess) {
|
||||
if (!importRecord.fullTranscriptUrl) {
|
||||
console.warn(`SessionImport ${importRecord.id} has no transcript URL, skipping.`);
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(`Processing transcript for session ${session.id}...`);
|
||||
console.log(`Processing transcript for SessionImport ${importRecord.id}...`);
|
||||
|
||||
try {
|
||||
const processedData = await processTranscriptWithOpenAI(
|
||||
session.id,
|
||||
session.transcriptContent
|
||||
// Mark as processing
|
||||
await prisma.sessionImport.update({
|
||||
where: { id: importRecord.id },
|
||||
data: { status: "PROCESSING" },
|
||||
});
|
||||
|
||||
// Fetch transcript content
|
||||
const transcriptContent = await fetchTranscriptContent(
|
||||
importRecord.fullTranscriptUrl,
|
||||
importRecord.company.csvUsername || undefined,
|
||||
importRecord.company.csvPassword || undefined
|
||||
);
|
||||
|
||||
// Map sentiment string to float value for compatibility with existing data
|
||||
const sentimentMap: Record<string, number> = {
|
||||
positive: 0.8,
|
||||
neutral: 0.0,
|
||||
negative: -0.8,
|
||||
};
|
||||
if (!transcriptContent) {
|
||||
throw new Error("Failed to fetch transcript content");
|
||||
}
|
||||
|
||||
// Update the session with processed data
|
||||
await prisma.session.update({
|
||||
where: { id: session.id },
|
||||
data: {
|
||||
// Process with OpenAI
|
||||
const processedData = await processTranscriptWithOpenAI(
|
||||
importRecord.externalSessionId,
|
||||
transcriptContent
|
||||
);
|
||||
|
||||
// Parse dates from raw strings
|
||||
const startTime = new Date(importRecord.startTimeRaw);
|
||||
const endTime = new Date(importRecord.endTimeRaw);
|
||||
|
||||
// Create or update Session record
|
||||
const session = await prisma.session.upsert({
|
||||
where: { importId: importRecord.id },
|
||||
update: {
|
||||
startTime: isNaN(startTime.getTime()) ? new Date() : startTime,
|
||||
endTime: isNaN(endTime.getTime()) ? new Date() : endTime,
|
||||
ipAddress: importRecord.ipAddress,
|
||||
country: importRecord.countryCode,
|
||||
language: processedData.language,
|
||||
messagesSent: processedData.messages_sent,
|
||||
sentiment: sentimentMap[processedData.sentiment] || 0,
|
||||
sentimentCategory: processedData.sentiment,
|
||||
sentiment: { positive: 0.8, neutral: 0.0, negative: -0.8 }[processedData.sentiment] || 0,
|
||||
sentimentCategory: processedData.sentiment.toUpperCase() as "POSITIVE" | "NEUTRAL" | "NEGATIVE",
|
||||
escalated: processedData.escalated,
|
||||
forwardedHr: processedData.forwarded_hr,
|
||||
fullTranscriptUrl: importRecord.fullTranscriptUrl,
|
||||
avgResponseTime: importRecord.avgResponseTimeSeconds,
|
||||
tokens: importRecord.tokens,
|
||||
tokensEur: importRecord.tokensEur,
|
||||
category: processedData.category,
|
||||
initialMsg: importRecord.initialMessage,
|
||||
processed: true,
|
||||
questions: JSON.stringify(processedData.questions),
|
||||
summary: processedData.summary,
|
||||
},
|
||||
create: {
|
||||
companyId: importRecord.companyId,
|
||||
importId: importRecord.id,
|
||||
startTime: isNaN(startTime.getTime()) ? new Date() : startTime,
|
||||
endTime: isNaN(endTime.getTime()) ? new Date() : endTime,
|
||||
ipAddress: importRecord.ipAddress,
|
||||
country: importRecord.countryCode,
|
||||
language: processedData.language,
|
||||
messagesSent: processedData.messages_sent,
|
||||
sentiment: { positive: 0.8, neutral: 0.0, negative: -0.8 }[processedData.sentiment] || 0,
|
||||
sentimentCategory: processedData.sentiment.toUpperCase() as "POSITIVE" | "NEUTRAL" | "NEGATIVE",
|
||||
escalated: processedData.escalated,
|
||||
forwardedHr: processedData.forwarded_hr,
|
||||
fullTranscriptUrl: importRecord.fullTranscriptUrl,
|
||||
avgResponseTime: importRecord.avgResponseTimeSeconds,
|
||||
tokens: importRecord.tokens,
|
||||
tokensEur: importRecord.tokensEur,
|
||||
category: processedData.category,
|
||||
initialMsg: importRecord.initialMessage,
|
||||
processed: true,
|
||||
questions: JSON.stringify(processedData.questions),
|
||||
summary: processedData.summary,
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`Successfully processed session ${session.id}.`);
|
||||
// Mark SessionImport as DONE
|
||||
await prisma.sessionImport.update({
|
||||
where: { id: importRecord.id },
|
||||
data: {
|
||||
status: "DONE",
|
||||
processedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`Successfully processed SessionImport ${importRecord.id} -> Session ${session.id}`);
|
||||
successCount++;
|
||||
} catch (error) {
|
||||
console.error(`Error processing session ${session.id}:`, error);
|
||||
console.error(`Error processing SessionImport ${importRecord.id}:`, error);
|
||||
|
||||
// Mark as ERROR
|
||||
await prisma.sessionImport.update({
|
||||
where: { id: importRecord.id },
|
||||
data: {
|
||||
status: "ERROR",
|
||||
errorMsg: error instanceof Error ? error.message : String(error),
|
||||
},
|
||||
});
|
||||
|
||||
errorCount++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log("Session processing complete.");
|
||||
console.log(`Successfully processed: ${successCount} sessions.`);
|
||||
console.log(`Failed to process: ${errorCount} sessions.`);
|
||||
console.log("SessionImport processing complete.");
|
||||
console.log(`Successfully processed: ${successCount} records.`);
|
||||
console.log(`Failed to process: ${errorCount} records.`);
|
||||
}
|
||||
|
||||
// Run the main function
|
||||
|
||||
@ -1,75 +0,0 @@
|
||||
// Script to check processing status and trigger processing
|
||||
// Usage: node scripts/test-processing-status.js
|
||||
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
|
||||
const prisma = new PrismaClient();
|
||||
|
||||
async function checkProcessingStatus() {
|
||||
try {
|
||||
console.log('🔍 Checking processing status...\n');
|
||||
|
||||
// Get processing status
|
||||
const totalSessions = await prisma.session.count();
|
||||
const processedSessions = await prisma.session.count({
|
||||
where: { processed: true }
|
||||
});
|
||||
const unprocessedSessions = await prisma.session.count({
|
||||
where: { processed: false }
|
||||
});
|
||||
const sessionsWithMessages = await prisma.session.count({
|
||||
where: {
|
||||
processed: false,
|
||||
messages: { some: {} }
|
||||
}
|
||||
});
|
||||
|
||||
console.log('📊 Processing Status:');
|
||||
console.log(` Total sessions: ${totalSessions}`);
|
||||
console.log(` ✅ Processed: ${processedSessions}`);
|
||||
console.log(` ⏳ Unprocessed: ${unprocessedSessions}`);
|
||||
console.log(` 📝 Unprocessed with messages: ${sessionsWithMessages}`);
|
||||
|
||||
const processedPercentage = ((processedSessions / totalSessions) * 100).toFixed(1);
|
||||
console.log(` 📈 Processing progress: ${processedPercentage}%\n`);
|
||||
|
||||
// Check recent processing activity
|
||||
const recentlyProcessed = await prisma.session.findMany({
|
||||
where: {
|
||||
processed: true,
|
||||
createdAt: {
|
||||
gte: new Date(Date.now() - 60 * 60 * 1000) // Last hour
|
||||
}
|
||||
},
|
||||
orderBy: { createdAt: 'desc' },
|
||||
take: 5,
|
||||
select: {
|
||||
id: true,
|
||||
createdAt: true,
|
||||
category: true,
|
||||
sentiment: true
|
||||
}
|
||||
});
|
||||
|
||||
if (recentlyProcessed.length > 0) {
|
||||
console.log('🕒 Recently processed sessions:');
|
||||
recentlyProcessed.forEach(session => {
|
||||
const timeAgo = Math.round((Date.now() - session.createdAt.getTime()) / 1000 / 60);
|
||||
console.log(` • ${session.id.substring(0, 8)}... (${timeAgo}m ago) - ${session.category || 'No category'}`);
|
||||
});
|
||||
} else {
|
||||
console.log('🕒 No sessions processed in the last hour');
|
||||
}
|
||||
|
||||
console.log('\n✨ Processing system is working correctly!');
|
||||
console.log('💡 The parallel processing successfully processed sessions.');
|
||||
console.log('🎯 For manual triggers, you need to be logged in as an admin user.');
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Error checking status:', error);
|
||||
} finally {
|
||||
await prisma.$disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
checkProcessingStatus();
|
||||
@ -1,20 +0,0 @@
|
||||
// Direct trigger for processing scheduler (bypasses authentication)
|
||||
// Usage: node scripts/trigger-processing-direct.js
|
||||
|
||||
import { processUnprocessedSessions } from '../lib/processingScheduler.js';
|
||||
|
||||
async function triggerProcessing() {
|
||||
try {
|
||||
console.log('🚀 Manually triggering processing scheduler...\n');
|
||||
|
||||
// Process with custom parameters
|
||||
await processUnprocessedSessions(50, 3); // Process 50 sessions with 3 concurrent workers
|
||||
|
||||
console.log('\n✅ Processing trigger completed!');
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Error triggering processing:', error);
|
||||
}
|
||||
}
|
||||
|
||||
triggerProcessing();
|
||||
Reference in New Issue
Block a user