Refactor transcript fetching and processing scripts

- Introduced a new function `fetchTranscriptContent` to handle fetching transcripts with optional authentication.
- Enhanced error handling and logging for transcript fetching.
- Updated the `parseTranscriptToMessages` function to improve message parsing logic.
- Replaced the old session processing logic with a new approach that utilizes `SessionImport` records.
- Removed obsolete scripts related to manual triggers and whitespace fixing.
- Updated the server initialization to remove direct server handling, transitioning to a more modular approach.
- Improved overall code structure and readability across various scripts.
This commit is contained in:
Max Kowalski
2025-06-27 16:38:16 +02:00
parent d7ac0ba208
commit 1dd618b666
35 changed files with 6536 additions and 12797 deletions

View File

@ -0,0 +1 @@
Use pnpm to manage this project, not npm!

View File

@ -49,7 +49,7 @@ export default function MessageViewer({ messages }: MessageViewerProps) {
{message.role}
</span>
<span className="text-xs opacity-75 ml-2">
{new Date(message.timestamp).toLocaleTimeString()}
{message.timestamp ? new Date(message.timestamp).toLocaleTimeString() : 'No timestamp'}
</span>
</div>
<div className="text-sm whitespace-pre-wrap">
@ -63,11 +63,14 @@ export default function MessageViewer({ messages }: MessageViewerProps) {
<div className="mt-4 pt-3 border-t text-sm text-gray-500">
<div className="flex justify-between">
<span>
First message: {new Date(messages[0].timestamp).toLocaleString()}
First message: {messages[0].timestamp ? new Date(messages[0].timestamp).toLocaleString() : 'No timestamp'}
</span>
<span>
Last message:{" "}
{new Date(messages[messages.length - 1].timestamp).toLocaleString()}
{(() => {
const lastMessage = messages[messages.length - 1];
return lastMessage.timestamp ? new Date(lastMessage.timestamp).toLocaleString() : 'No timestamp';
})()}
</span>
</div>
</div>

2
demo-admin-user.txt Normal file
View File

@ -0,0 +1,2 @@
user: admin@demo.com
password: admin123

View File

@ -1,636 +0,0 @@
// JavaScript version of csvFetcher with session storage functionality
import fetch from "node-fetch";
import { parse } from "csv-parse/sync";
import ISO6391 from "iso-639-1";
import countries from "i18n-iso-countries";
import { PrismaClient } from "@prisma/client";
// Register locales for i18n-iso-countries
import enLocale from "i18n-iso-countries/langs/en.json" with { type: "json" };
countries.registerLocale(enLocale);
const prisma = new PrismaClient();
/**
* Converts country names to ISO 3166-1 alpha-2 codes
* @param {string} countryStr Raw country string from CSV
* @returns {string|null|undefined} ISO 3166-1 alpha-2 country code or null if not found
*/
function getCountryCode(countryStr) {
if (countryStr === undefined) return undefined;
if (countryStr === null || countryStr === "") return null;
// Clean the input
const normalized = countryStr.trim();
if (!normalized) return null;
// Direct ISO code check (if already a 2-letter code)
if (normalized.length === 2 && normalized === normalized.toUpperCase()) {
return countries.isValid(normalized) ? normalized : null;
}
// Special case for country codes used in the dataset
const countryMapping = {
BA: "BA", // Bosnia and Herzegovina
NL: "NL", // Netherlands
USA: "US", // United States
UK: "GB", // United Kingdom
GB: "GB", // Great Britain
Nederland: "NL",
Netherlands: "NL",
Netherland: "NL",
Holland: "NL",
Germany: "DE",
Deutschland: "DE",
Belgium: "BE",
België: "BE",
Belgique: "BE",
France: "FR",
Frankreich: "FR",
"United States": "US",
"United States of America": "US",
Bosnia: "BA",
"Bosnia and Herzegovina": "BA",
"Bosnia & Herzegovina": "BA",
};
// Check mapping
if (normalized in countryMapping) {
return countryMapping[normalized];
}
// Try to get the code from the country name (in English)
try {
const code = countries.getAlpha2Code(normalized, "en");
if (code) return code;
} catch (error) {
process.stderr.write(
`[CSV] Error converting country name to code: ${normalized} - ${error}\n`
);
}
// If all else fails, return null
return null;
}
/**
* Converts language names to ISO 639-1 codes
* @param {string} languageStr Raw language string from CSV
* @returns {string|null|undefined} ISO 639-1 language code or null if not found
*/
function getLanguageCode(languageStr) {
if (languageStr === undefined) return undefined;
if (languageStr === null || languageStr === "") return null;
// Clean the input
const normalized = languageStr.trim();
if (!normalized) return null;
// Direct ISO code check (if already a 2-letter code)
if (normalized.length === 2 && normalized === normalized.toLowerCase()) {
return ISO6391.validate(normalized) ? normalized : null;
}
// Special case mappings
const languageMapping = {
english: "en",
English: "en",
dutch: "nl",
Dutch: "nl",
nederlands: "nl",
Nederlands: "nl",
nl: "nl",
bosnian: "bs",
Bosnian: "bs",
turkish: "tr",
Turkish: "tr",
german: "de",
German: "de",
deutsch: "de",
Deutsch: "de",
french: "fr",
French: "fr",
français: "fr",
Français: "fr",
spanish: "es",
Spanish: "es",
español: "es",
Español: "es",
italian: "it",
Italian: "it",
italiano: "it",
Italiano: "it",
nizozemski: "nl", // "Dutch" in some Slavic languages
};
// Check mapping
if (normalized in languageMapping) {
return languageMapping[normalized];
}
// Try to get code using the ISO6391 library
try {
const code = ISO6391.getCode(normalized);
if (code) return code;
} catch (error) {
process.stderr.write(
`[CSV] Error converting language name to code: ${normalized} - ${error}\n`
);
}
// If all else fails, return null
return null;
}
/**
* Normalizes category values to standard groups
* @param {string} categoryStr The raw category string from CSV
* @returns {string|null} A normalized category string
*/
function normalizeCategory(categoryStr) {
if (!categoryStr) return null;
const normalized = categoryStr.toLowerCase().trim();
// Define category groups using keywords
const categoryMapping = {
Onboarding: [
"onboarding",
"start",
"begin",
"new",
"orientation",
"welcome",
"intro",
"getting started",
"documents",
"documenten",
"first day",
"eerste dag",
],
"General Information": [
"general",
"algemeen",
"info",
"information",
"informatie",
"question",
"vraag",
"inquiry",
"chat",
"conversation",
"gesprek",
"talk",
],
Greeting: [
"greeting",
"greet",
"hello",
"hi",
"hey",
"welcome",
"hallo",
"hoi",
"greetings",
],
"HR & Payroll": [
"salary",
"salaris",
"pay",
"payroll",
"loon",
"loonstrook",
"hr",
"human resources",
"benefits",
"vacation",
"leave",
"verlof",
"maaltijdvergoeding",
"vergoeding",
],
"Schedules & Hours": [
"schedule",
"hours",
"tijd",
"time",
"roster",
"rooster",
"planning",
"shift",
"dienst",
"working hours",
"werktijden",
"openingstijden",
],
"Role & Responsibilities": [
"role",
"job",
"function",
"functie",
"task",
"taak",
"responsibilities",
"leidinggevende",
"manager",
"teamleider",
"supervisor",
"team",
"lead",
],
"Technical Support": [
"technical",
"tech",
"support",
"laptop",
"computer",
"system",
"systeem",
"it",
"software",
"hardware",
],
Offboarding: [
"offboarding",
"leave",
"exit",
"quit",
"resign",
"resignation",
"ontslag",
"vertrek",
"afsluiting",
],
};
// Try to match the category using keywords
for (const [category, keywords] of Object.entries(categoryMapping)) {
if (keywords.some((keyword) => normalized.includes(keyword))) {
return category;
}
}
// If no match, return "Other"
return "Other";
}
/**
* Converts sentiment string values to numeric scores
* @param {string} sentimentStr The sentiment string from the CSV
* @returns {number|null} A numeric score representing the sentiment
*/
function mapSentimentToScore(sentimentStr) {
if (!sentimentStr) return null;
// Convert to lowercase for case-insensitive matching
const sentiment = sentimentStr.toLowerCase();
// Map sentiment strings to numeric values on a scale from -1 to 2
const sentimentMap = {
happy: 1.0,
excited: 1.5,
positive: 0.8,
neutral: 0.0,
playful: 0.7,
negative: -0.8,
angry: -1.0,
sad: -0.7,
frustrated: -0.9,
positief: 0.8, // Dutch
neutraal: 0.0, // Dutch
negatief: -0.8, // Dutch
positivo: 0.8, // Spanish/Italian
neutro: 0.0, // Spanish/Italian
negativo: -0.8, // Spanish/Italian
yes: 0.5, // For any "yes" sentiment
no: -0.5, // For any "no" sentiment
};
return sentimentMap[sentiment] !== undefined
? sentimentMap[sentiment]
: isNaN(parseFloat(sentiment))
? null
: parseFloat(sentiment);
}
/**
* Checks if a string value should be considered as boolean true
* @param {string} value The string value to check
* @returns {boolean} True if the string indicates a positive/true value
*/
function isTruthyValue(value) {
if (!value) return false;
const truthyValues = [
"1",
"true",
"yes",
"y",
"ja",
"si",
"oui",
"да",
"да",
"はい",
];
return truthyValues.includes(value.toLowerCase());
}
/**
* Safely parses a date string into a Date object.
* @param {string} dateStr The date string to parse.
* @returns {Date|null} A Date object or null if parsing fails.
*/
function safeParseDate(dateStr) {
if (!dateStr) return null;
// Try to parse D-M-YYYY HH:MM:SS format (with hyphens or dots)
const dateTimeRegex =
/^(\d{1,2})[.-](\d{1,2})[.-](\d{4}) (\d{1,2}):(\d{1,2}):(\d{1,2})$/;
const match = dateStr.match(dateTimeRegex);
if (match) {
const day = match[1];
const month = match[2];
const year = match[3];
const hour = match[4];
const minute = match[5];
const second = match[6];
// Reformat to YYYY-MM-DDTHH:MM:SS (ISO-like, but local time)
// Ensure month and day are two digits
const formattedDateStr = `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}T${hour.padStart(2, "0")}:${minute.padStart(2, "0")}:${second.padStart(2, "0")}`;
try {
const date = new Date(formattedDateStr);
// Basic validation: check if the constructed date is valid
if (!isNaN(date.getTime())) {
return date;
}
} catch (e) {
console.warn(
`[safeParseDate] Error parsing reformatted string ${formattedDateStr} from ${dateStr}:`,
e
);
}
}
// Fallback for other potential formats (e.g., direct ISO 8601) or if the primary parse failed
try {
const parsedDate = new Date(dateStr);
if (!isNaN(parsedDate.getTime())) {
return parsedDate;
}
} catch (e) {
console.warn(`[safeParseDate] Error parsing with fallback ${dateStr}:`, e);
}
console.warn(`Failed to parse date string: ${dateStr}`);
return null;
}
/**
* Fetches transcript content from a URL
* @param {string} url The URL to fetch the transcript from
* @param {string} username Optional username for authentication
* @param {string} password Optional password for authentication
* @returns {Promise<string|null>} The transcript content or null if fetching fails
*/
async function fetchTranscriptContent(url, username, password) {
try {
const authHeader =
username && password
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
: undefined;
const response = await fetch(url, {
headers: authHeader ? { Authorization: authHeader } : {},
timeout: 10000, // 10 second timeout
});
if (!response.ok) {
// Only log error once per batch, not for every transcript
if (Math.random() < 0.1) {
// Log ~10% of errors to avoid spam
console.warn(
`[CSV] Transcript fetch failed for ${url}: ${response.status} ${response.statusText}`
);
}
return null;
}
return await response.text();
} catch (error) {
// Only log error once per batch, not for every transcript
if (Math.random() < 0.1) {
// Log ~10% of errors to avoid spam
console.warn(`[CSV] Transcript fetch error for ${url}:`, error.message);
}
return null;
}
}
/**
* Fetches and parses CSV data from a URL
* @param {string} url The CSV URL
* @param {string} username Optional username for authentication
* @param {string} password Optional password for authentication
* @returns {Promise<Object[]>} Array of parsed session data
*/
export async function fetchAndParseCsv(url, username, password) {
const authHeader =
username && password
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
: undefined;
const res = await fetch(url, {
headers: authHeader ? { Authorization: authHeader } : {},
});
if (!res.ok) throw new Error("Failed to fetch CSV: " + res.statusText);
const text = await res.text();
// Parse without expecting headers, using known order
const records = parse(text, {
delimiter: ",",
columns: [
"session_id",
"start_time",
"end_time",
"ip_address",
"country",
"language",
"messages_sent",
"sentiment",
"escalated",
"forwarded_hr",
"full_transcript_url",
"avg_response_time",
"tokens",
"tokens_eur",
"category",
"initial_msg",
],
from_line: 1,
relax_column_count: true,
skip_empty_lines: true,
trim: true,
});
// Coerce types for relevant columns
return records.map((r) => ({
id: r.session_id,
startTime: safeParseDate(r.start_time) || new Date(), // Fallback to current date if invalid
endTime: safeParseDate(r.end_time),
ipAddress: r.ip_address,
country: getCountryCode(r.country),
language: getLanguageCode(r.language),
messagesSent: Number(r.messages_sent) || 0,
sentiment: mapSentimentToScore(r.sentiment),
escalated: isTruthyValue(r.escalated),
forwardedHr: isTruthyValue(r.forwarded_hr),
fullTranscriptUrl: r.full_transcript_url,
avgResponseTime: r.avg_response_time
? parseFloat(r.avg_response_time)
: null,
tokens: Number(r.tokens) || 0,
tokensEur: r.tokens_eur ? parseFloat(r.tokens_eur) : 0,
category: normalizeCategory(r.category),
initialMsg: r.initial_msg,
}));
}
/**
* Fetches and stores sessions for all companies
*/
export async function fetchAndStoreSessionsForAllCompanies() {
try {
// Get all companies
const companies = await prisma.company.findMany();
for (const company of companies) {
if (!company.csvUrl) {
console.log(
`[Scheduler] Skipping company ${company.id} - no CSV URL configured`
);
continue;
}
// Skip companies with invalid/example URLs
if (
company.csvUrl.includes("example.com") ||
company.csvUrl === "https://example.com/data.csv"
) {
console.log(
`[Scheduler] Skipping company ${company.id} - invalid/example CSV URL: ${company.csvUrl}`
);
continue;
}
console.log(`[Scheduler] Processing sessions for company: ${company.id}`);
try {
const sessions = await fetchAndParseCsv(
company.csvUrl,
company.csvUsername,
company.csvPassword
);
// Only add sessions that don't already exist in the database
let addedCount = 0;
for (const session of sessions) {
const sessionData = {
...session,
companyId: company.id,
id:
session.id ||
session.sessionId ||
`sess_${Date.now()}_${Math.random().toString(36).substring(2, 7)}`,
// Ensure startTime is not undefined
startTime: session.startTime || new Date(),
};
// Validate dates to prevent "Invalid Date" errors
const startTime =
sessionData.startTime instanceof Date &&
!isNaN(sessionData.startTime.getTime())
? sessionData.startTime
: new Date();
const endTime =
session.endTime instanceof Date && !isNaN(session.endTime.getTime())
? session.endTime
: new Date();
// Note: transcriptContent field was removed from schema
// Transcript content can be fetched on-demand from fullTranscriptUrl
// Check if the session already exists
const existingSession = await prisma.session.findUnique({
where: { id: sessionData.id },
});
if (existingSession) {
// Skip this session as it already exists
continue;
}
// Only include fields that are properly typed for Prisma
await prisma.session.create({
data: {
id: sessionData.id,
companyId: sessionData.companyId,
startTime: startTime,
endTime: endTime,
ipAddress: session.ipAddress || null,
country: session.country || null,
language: session.language || null,
messagesSent:
typeof session.messagesSent === "number"
? session.messagesSent
: 0,
sentiment:
typeof session.sentiment === "number"
? session.sentiment
: null,
escalated:
typeof session.escalated === "boolean"
? session.escalated
: null,
forwardedHr:
typeof session.forwardedHr === "boolean"
? session.forwardedHr
: null,
fullTranscriptUrl: session.fullTranscriptUrl || null,
avgResponseTime:
typeof session.avgResponseTime === "number"
? session.avgResponseTime
: null,
tokens:
typeof session.tokens === "number" ? session.tokens : null,
tokensEur:
typeof session.tokensEur === "number"
? session.tokensEur
: null,
category: session.category || null,
initialMsg: session.initialMsg || null,
},
});
addedCount++;
}
console.log(
`[Scheduler] Added ${addedCount} new sessions for company ${company.id}`
);
} catch (error) {
console.error(
`[Scheduler] Error processing company ${company.id}:`,
error
);
}
}
} catch (error) {
console.error("[Scheduler] Error fetching companies:", error);
throw error;
}
}

View File

@ -1,440 +1,41 @@
// Fetches, parses, and returns chat session data for a company from a CSV URL
// Simplified CSV fetcher - fetches and parses CSV data without any processing
// Maps directly to SessionImport table fields
import fetch from "node-fetch";
import { parse } from "csv-parse/sync";
import ISO6391 from "iso-639-1";
import countries from "i18n-iso-countries";
// Register locales for i18n-iso-countries
import enLocale from "i18n-iso-countries/langs/en.json" with { type: "json" };
countries.registerLocale(enLocale);
// This type is used internally for parsing the CSV records
interface CSVRecord {
session_id: string;
start_time: string;
end_time?: string;
ip_address?: string;
country?: string;
language?: string;
messages_sent?: string;
sentiment?: string;
escalated?: string;
forwarded_hr?: string;
full_transcript_url?: string;
avg_response_time?: string;
tokens?: string;
tokens_eur?: string;
category?: string;
initial_msg?: string;
[key: string]: string | undefined;
}
interface SessionData {
id: string;
sessionId: string;
startTime: Date;
endTime: Date | null;
ipAddress?: string;
country?: string | null; // Will store ISO 3166-1 alpha-2 country code or null/undefined
language?: string | null; // Will store ISO 639-1 language code or null/undefined
messagesSent: number;
sentiment: number | null;
escalated: boolean;
forwardedHr: boolean;
fullTranscriptUrl?: string | null;
avgResponseTime: number | null;
tokens: number;
tokensEur: number;
category?: string | null;
initialMsg?: string;
// Raw CSV data interface matching SessionImport schema
interface RawSessionImport {
externalSessionId: string;
startTimeRaw: string;
endTimeRaw: string;
ipAddress: string | null;
countryCode: string | null;
language: string | null;
messagesSent: number | null;
sentimentRaw: string | null;
escalatedRaw: string | null;
forwardedHrRaw: string | null;
fullTranscriptUrl: string | null;
avgResponseTimeSeconds: number | null;
tokens: number | null;
tokensEur: number | null;
category: string | null;
initialMessage: string | null;
}
/**
* Converts country names to ISO 3166-1 alpha-2 codes
* @param countryStr Raw country string from CSV
* @returns ISO 3166-1 alpha-2 country code or null if not found
* Fetches and parses CSV data from a URL without any processing
* Maps CSV columns by position to SessionImport fields
* @param url The CSV URL
* @param username Optional username for authentication
* @param password Optional password for authentication
* @returns Array of raw session import data
*/
function getCountryCode(countryStr?: string): string | null | undefined {
if (countryStr === undefined) return undefined;
if (countryStr === null || countryStr === "") return null;
// Clean the input
const normalized = countryStr.trim();
if (!normalized) return null;
// Direct ISO code check (if already a 2-letter code)
if (normalized.length === 2 && normalized === normalized.toUpperCase()) {
return countries.isValid(normalized) ? normalized : null;
}
// Special case for country codes used in the dataset
const countryMapping: Record<string, string> = {
BA: "BA", // Bosnia and Herzegovina
NL: "NL", // Netherlands
USA: "US", // United States
UK: "GB", // United Kingdom
GB: "GB", // Great Britain
Nederland: "NL",
Netherlands: "NL",
Netherland: "NL",
Holland: "NL",
Germany: "DE",
Deutschland: "DE",
Belgium: "BE",
België: "BE",
Belgique: "BE",
France: "FR",
Frankreich: "FR",
"United States": "US",
"United States of America": "US",
Bosnia: "BA",
"Bosnia and Herzegovina": "BA",
"Bosnia & Herzegovina": "BA",
};
// Check mapping
if (normalized in countryMapping) {
return countryMapping[normalized];
}
// Try to get the code from the country name (in English)
try {
const code = countries.getAlpha2Code(normalized, "en");
if (code) return code;
} catch (error) {
process.stderr.write(
`[CSV] Error converting country name to code: ${normalized} - ${error}\n`
);
}
// If all else fails, return null
return null;
}
/**
* Converts language names to ISO 639-1 codes
* @param languageStr Raw language string from CSV
* @returns ISO 639-1 language code or null if not found
*/
function getLanguageCode(languageStr?: string): string | null | undefined {
if (languageStr === undefined) return undefined;
if (languageStr === null || languageStr === "") return null;
// Clean the input
const normalized = languageStr.trim();
if (!normalized) return null;
// Direct ISO code check (if already a 2-letter code)
if (normalized.length === 2 && normalized === normalized.toLowerCase()) {
return ISO6391.validate(normalized) ? normalized : null;
}
// Special case mappings
const languageMapping: Record<string, string> = {
english: "en",
English: "en",
dutch: "nl",
Dutch: "nl",
nederlands: "nl",
Nederlands: "nl",
nl: "nl",
bosnian: "bs",
Bosnian: "bs",
turkish: "tr",
Turkish: "tr",
german: "de",
German: "de",
deutsch: "de",
Deutsch: "de",
french: "fr",
French: "fr",
français: "fr",
Français: "fr",
spanish: "es",
Spanish: "es",
español: "es",
Español: "es",
italian: "it",
Italian: "it",
italiano: "it",
Italiano: "it",
nizozemski: "nl", // "Dutch" in some Slavic languages
};
// Check mapping
if (normalized in languageMapping) {
return languageMapping[normalized];
}
// Try to get code using the ISO6391 library
try {
const code = ISO6391.getCode(normalized);
if (code) return code;
} catch (error) {
process.stderr.write(
`[CSV] Error converting language name to code: ${normalized} - ${error}\n`
);
}
// If all else fails, return null
return null;
}
/**
* Normalizes category values to standard groups
* @param categoryStr The raw category string from CSV
* @returns A normalized category string
*/
function normalizeCategory(categoryStr?: string): string | null {
if (!categoryStr) return null;
const normalized = categoryStr.toLowerCase().trim();
// Define category groups using keywords
const categoryMapping: Record<string, string[]> = {
Onboarding: [
"onboarding",
"start",
"begin",
"new",
"orientation",
"welcome",
"intro",
"getting started",
"documents",
"documenten",
"first day",
"eerste dag",
],
"General Information": [
"general",
"algemeen",
"info",
"information",
"informatie",
"question",
"vraag",
"inquiry",
"chat",
"conversation",
"gesprek",
"talk",
],
Greeting: [
"greeting",
"greet",
"hello",
"hi",
"hey",
"welcome",
"hallo",
"hoi",
"greetings",
],
"HR & Payroll": [
"salary",
"salaris",
"pay",
"payroll",
"loon",
"loonstrook",
"hr",
"human resources",
"benefits",
"vacation",
"leave",
"verlof",
"maaltijdvergoeding",
"vergoeding",
],
"Schedules & Hours": [
"schedule",
"hours",
"tijd",
"time",
"roster",
"rooster",
"planning",
"shift",
"dienst",
"working hours",
"werktijden",
"openingstijden",
],
"Role & Responsibilities": [
"role",
"job",
"function",
"functie",
"task",
"taak",
"responsibilities",
"leidinggevende",
"manager",
"teamleider",
"supervisor",
"team",
"lead",
],
"Technical Support": [
"technical",
"tech",
"support",
"laptop",
"computer",
"system",
"systeem",
"it",
"software",
"hardware",
],
Offboarding: [
"offboarding",
"leave",
"exit",
"quit",
"resign",
"resignation",
"ontslag",
"vertrek",
"afsluiting",
],
};
// Try to match the category using keywords
for (const [category, keywords] of Object.entries(categoryMapping)) {
if (keywords.some((keyword) => normalized.includes(keyword))) {
return category;
}
}
// If no match, return "Other"
return "Other";
}
/**
* Converts sentiment string values to numeric scores
* @param sentimentStr The sentiment string from the CSV
* @returns A numeric score representing the sentiment
*/
function mapSentimentToScore(sentimentStr?: string): number | null {
if (!sentimentStr) return null;
// Convert to lowercase for case-insensitive matching
const sentiment = sentimentStr.toLowerCase();
// Map sentiment strings to numeric values on a scale from -1 to 2
const sentimentMap: Record<string, number> = {
happy: 1.0,
excited: 1.5,
positive: 0.8,
neutral: 0.0,
playful: 0.7,
negative: -0.8,
angry: -1.0,
sad: -0.7,
frustrated: -0.9,
positief: 0.8, // Dutch
neutraal: 0.0, // Dutch
negatief: -0.8, // Dutch
positivo: 0.8, // Spanish/Italian
neutro: 0.0, // Spanish/Italian
negativo: -0.8, // Spanish/Italian
yes: 0.5, // For any "yes" sentiment
no: -0.5, // For any "no" sentiment
};
return sentimentMap[sentiment] !== undefined
? sentimentMap[sentiment]
: isNaN(parseFloat(sentiment))
? null
: parseFloat(sentiment);
}
/**
* Checks if a string value should be considered as boolean true
* @param value The string value to check
* @returns True if the string indicates a positive/true value
*/
function isTruthyValue(value?: string): boolean {
if (!value) return false;
const truthyValues = [
"1",
"true",
"yes",
"y",
"ja",
"si",
"oui",
"да",
"да",
"はい",
];
return truthyValues.includes(value.toLowerCase());
}
/**
* Safely parses a date string into a Date object.
* Handles potential errors and various formats, prioritizing D-M-YYYY HH:MM:SS.
* @param dateStr The date string to parse.
* @returns A Date object or null if parsing fails.
*/
function safeParseDate(dateStr?: string): Date | null {
if (!dateStr) return null;
// Try to parse D-M-YYYY HH:MM:SS format (with hyphens or dots)
const dateTimeRegex =
/^(\d{1,2})[.-](\d{1,2})[.-](\d{4}) (\d{1,2}):(\d{1,2}):(\d{1,2})$/;
const match = dateStr.match(dateTimeRegex);
if (match) {
const day = match[1];
const month = match[2];
const year = match[3];
const hour = match[4];
const minute = match[5];
const second = match[6];
// Reformat to YYYY-MM-DDTHH:MM:SS (ISO-like, but local time)
// Ensure month and day are two digits
const formattedDateStr = `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}T${hour.padStart(2, "0")}:${minute.padStart(2, "0")}:${second.padStart(2, "0")}`;
try {
const date = new Date(formattedDateStr);
// Basic validation: check if the constructed date is valid
if (!isNaN(date.getTime())) {
// console.log(`[safeParseDate] Parsed from D-M-YYYY: ${dateStr} -> ${formattedDateStr} -> ${date.toISOString()}`);
return date;
}
} catch (e) {
console.warn(
`[safeParseDate] Error parsing reformatted string ${formattedDateStr} from ${dateStr}:`,
e
);
}
}
// Fallback for other potential formats (e.g., direct ISO 8601) or if the primary parse failed
try {
const parsedDate = new Date(dateStr);
if (!isNaN(parsedDate.getTime())) {
// console.log(`[safeParseDate] Parsed with fallback: ${dateStr} -> ${parsedDate.toISOString()}`);
return parsedDate;
}
} catch (e) {
console.warn(`[safeParseDate] Error parsing with fallback ${dateStr}:`, e);
}
console.warn(`Failed to parse date string: ${dateStr}`);
return null;
}
export async function fetchAndParseCsv(
url: string,
username?: string,
password?: string
): Promise<Partial<SessionData>[]> {
): Promise<RawSessionImport[]> {
const authHeader =
username && password
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
@ -443,56 +44,39 @@ export async function fetchAndParseCsv(
const res = await fetch(url, {
headers: authHeader ? { Authorization: authHeader } : {},
});
if (!res.ok) throw new Error("Failed to fetch CSV: " + res.statusText);
if (!res.ok) {
throw new Error(`Failed to fetch CSV: ${res.status} ${res.statusText}`);
}
const text = await res.text();
// Parse without expecting headers, using known order
const records: CSVRecord[] = parse(text, {
// Parse CSV without headers, using positional column mapping
const records: string[][] = parse(text, {
delimiter: ",",
columns: [
"session_id",
"start_time",
"end_time",
"ip_address",
"country",
"language",
"messages_sent",
"sentiment",
"escalated",
"forwarded_hr",
"full_transcript_url",
"avg_response_time",
"tokens",
"tokens_eur",
"category",
"initial_msg",
],
from_line: 1,
from_line: 1, // Start from first line (no headers)
relax_column_count: true,
skip_empty_lines: true,
trim: true,
});
// Coerce types for relevant columns
return records.map((r) => ({
id: r.session_id,
startTime: safeParseDate(r.start_time) || new Date(), // Fallback to current date if invalid
endTime: safeParseDate(r.end_time),
ipAddress: r.ip_address,
country: getCountryCode(r.country),
language: getLanguageCode(r.language),
messagesSent: Number(r.messages_sent) || 0,
sentiment: mapSentimentToScore(r.sentiment),
escalated: isTruthyValue(r.escalated),
forwardedHr: isTruthyValue(r.forwarded_hr),
fullTranscriptUrl: r.full_transcript_url,
avgResponseTime: r.avg_response_time
? parseFloat(r.avg_response_time)
: null,
tokens: Number(r.tokens) || 0,
tokensEur: r.tokens_eur ? parseFloat(r.tokens_eur) : 0,
category: normalizeCategory(r.category),
initialMsg: r.initial_msg,
// Map CSV columns by position to SessionImport fields
return records.map((row) => ({
externalSessionId: row[0] || "",
startTimeRaw: row[1] || "",
endTimeRaw: row[2] || "",
ipAddress: row[3] || null,
countryCode: row[4] || null,
language: row[5] || null,
messagesSent: row[6] ? parseInt(row[6], 10) || null : null,
sentimentRaw: row[7] || null,
escalatedRaw: row[8] || null,
forwardedHrRaw: row[9] || null,
fullTranscriptUrl: row[10] || null,
avgResponseTimeSeconds: row[11] ? parseFloat(row[11]) || null : null,
tokens: row[12] ? parseInt(row[12], 10) || null : null,
tokensEur: row[13] ? parseFloat(row[13]) || null : null,
category: row[14] || null,
initialMessage: row[15] || null,
}));
}

View File

@ -1,412 +0,0 @@
// Session processing scheduler - JavaScript version
import cron from "node-cron";
import { PrismaClient } from "@prisma/client";
import fetch from "node-fetch";
import { readFileSync } from "fs";
import { fileURLToPath } from "url";
import { dirname, join } from "path";
// Load environment variables from .env.local
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const envPath = join(__dirname, '..', '.env.local');
try {
const envFile = readFileSync(envPath, 'utf8');
const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
envVars.forEach(line => {
const [key, ...valueParts] = line.split('=');
if (key && valueParts.length > 0) {
const value = valueParts.join('=').trim();
if (!process.env[key.trim()]) {
process.env[key.trim()] = value;
}
}
});
} catch (error) {
// Silently fail if .env.local doesn't exist
}
const prisma = new PrismaClient();
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
/**
* Processes a session transcript using OpenAI API
* @param {string} sessionId The session ID
* @param {string} transcript The transcript content to process
* @returns {Promise<Object>} Processed data from OpenAI
*/
async function processTranscriptWithOpenAI(sessionId, transcript) {
if (!OPENAI_API_KEY) {
throw new Error("OPENAI_API_KEY environment variable is not set");
}
// Create a system message with instructions
const systemMessage = `
You are an AI assistant tasked with analyzing chat transcripts.
Extract the following information from the transcript:
1. The primary language used by the user (ISO 639-1 code)
2. Number of messages sent by the user
3. Overall sentiment (positive, neutral, or negative)
4. Whether the conversation was escalated
5. Whether HR contact was mentioned or provided
6. The best-fitting category for the conversation from this list:
- Schedule & Hours
- Leave & Vacation
- Sick Leave & Recovery
- Salary & Compensation
- Contract & Hours
- Onboarding
- Offboarding
- Workwear & Staff Pass
- Team & Contacts
- Personal Questions
- Access & Login
- Social questions
- Unrecognized / Other
7. Up to 5 paraphrased questions asked by the user (in English)
8. A brief summary of the conversation (10-300 characters)
Return the data in JSON format matching this schema:
{
"language": "ISO 639-1 code",
"messages_sent": number,
"sentiment": "positive|neutral|negative",
"escalated": boolean,
"forwarded_hr": boolean,
"category": "one of the categories listed above",
"questions": ["question 1", "question 2", ...],
"summary": "brief summary",
"session_id": "${sessionId}"
}
`;
try {
const response = await fetch(OPENAI_API_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: "gpt-4-turbo",
messages: [
{
role: "system",
content: systemMessage,
},
{
role: "user",
content: transcript,
},
],
temperature: 0.3, // Lower temperature for more consistent results
response_format: { type: "json_object" },
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`OpenAI API error: ${response.status} - ${errorText}`);
}
const data = await response.json();
const processedData = JSON.parse(data.choices[0].message.content);
// Validate the response against our expected schema
validateOpenAIResponse(processedData);
return processedData;
} catch (error) {
process.stderr.write(`Error processing transcript with OpenAI: ${error}\n`);
throw error;
}
}
/**
* Validates the OpenAI response against our expected schema
* @param {Object} data The data to validate
*/
function validateOpenAIResponse(data) {
// Check required fields
const requiredFields = [
"language",
"messages_sent",
"sentiment",
"escalated",
"forwarded_hr",
"category",
"questions",
"summary",
"session_id",
];
for (const field of requiredFields) {
if (!(field in data)) {
throw new Error(`Missing required field: ${field}`);
}
}
// Validate field types
if (typeof data.language !== "string" || !/^[a-z]{2}$/.test(data.language)) {
throw new Error(
"Invalid language format. Expected ISO 639-1 code (e.g., 'en')"
);
}
if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
throw new Error("Invalid messages_sent. Expected non-negative number");
}
if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
throw new Error(
"Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
);
}
if (typeof data.escalated !== "boolean") {
throw new Error("Invalid escalated. Expected boolean");
}
if (typeof data.forwarded_hr !== "boolean") {
throw new Error("Invalid forwarded_hr. Expected boolean");
}
const validCategories = [
"Schedule & Hours",
"Leave & Vacation",
"Sick Leave & Recovery",
"Salary & Compensation",
"Contract & Hours",
"Onboarding",
"Offboarding",
"Workwear & Staff Pass",
"Team & Contacts",
"Personal Questions",
"Access & Login",
"Social questions",
"Unrecognized / Other",
];
if (!validCategories.includes(data.category)) {
throw new Error(
`Invalid category. Expected one of: ${validCategories.join(", ")}`
);
}
if (!Array.isArray(data.questions)) {
throw new Error("Invalid questions. Expected array of strings");
}
if (
typeof data.summary !== "string" ||
data.summary.length < 10 ||
data.summary.length > 300
) {
throw new Error(
"Invalid summary. Expected string between 10-300 characters"
);
}
if (typeof data.session_id !== "string") {
throw new Error("Invalid session_id. Expected string");
}
}
/**
* Process a single session
* @param {Object} session The session to process
* @returns {Promise<Object>} Result object with success/error info
*/
async function processSingleSession(session) {
if (session.messages.length === 0) {
return {
sessionId: session.id,
success: false,
error: "Session has no messages",
};
}
try {
// Convert messages back to transcript format for OpenAI processing
const transcript = session.messages
.map(
(msg) =>
`[${new Date(msg.timestamp)
.toLocaleString("en-GB", {
day: "2-digit",
month: "2-digit",
year: "numeric",
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
})
.replace(",", "")}] ${msg.role}: ${msg.content}`
)
.join("\n");
const processedData = await processTranscriptWithOpenAI(
session.id,
transcript
);
// Map sentiment string to float value for compatibility with existing data
const sentimentMap = {
positive: 0.8,
neutral: 0.0,
negative: -0.8,
};
// Update the session with processed data
await prisma.session.update({
where: { id: session.id },
data: {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: sentimentMap[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment,
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,
questions: JSON.stringify(processedData.questions),
summary: processedData.summary,
processed: true,
},
});
return {
sessionId: session.id,
success: true,
};
} catch (error) {
return {
sessionId: session.id,
success: false,
error: error.message,
};
}
}
/**
* Process sessions in parallel with concurrency limit
* @param {Array} sessions Array of sessions to process
* @param {number} maxConcurrency Maximum number of concurrent processing tasks
* @returns {Promise<Object>} Processing results
*/
async function processSessionsInParallel(sessions, maxConcurrency = 5) {
const results = [];
const executing = [];
for (const session of sessions) {
const promise = processSingleSession(session).then((result) => {
process.stdout.write(
result.success
? `[ProcessingScheduler] ✓ Successfully processed session ${result.sessionId}\n`
: `[ProcessingScheduler] ✗ Failed to process session ${result.sessionId}: ${result.error}\n`
);
return result;
});
results.push(promise);
executing.push(promise);
if (executing.length >= maxConcurrency) {
await Promise.race(executing);
executing.splice(
executing.findIndex((p) => p === promise),
1
);
}
}
return Promise.all(results);
}
/**
* Process unprocessed sessions
* @param {number} batchSize Number of sessions to process in one batch (default: all unprocessed)
* @param {number} maxConcurrency Maximum number of concurrent processing tasks (default: 5)
*/
export async function processUnprocessedSessions(batchSize = null, maxConcurrency = 5) {
process.stdout.write(
"[ProcessingScheduler] Starting to process unprocessed sessions...\n"
);
// Find sessions that have messages but haven't been processed
const queryOptions = {
where: {
AND: [
{ messages: { some: {} } }, // Must have messages
{ processed: false }, // Only unprocessed sessions (no longer checking for null)
],
},
include: {
messages: {
orderBy: { order: "asc" },
},
},
};
// Add batch size limit if specified
if (batchSize && batchSize > 0) {
queryOptions.take = batchSize;
}
const sessionsToProcess = await prisma.session.findMany(queryOptions);
// Filter to only sessions that have messages
const sessionsWithMessages = sessionsToProcess.filter(
(session) => session.messages.length > 0
);
if (sessionsWithMessages.length === 0) {
process.stdout.write(
"[ProcessingScheduler] No sessions found requiring processing.\n"
);
return;
}
process.stdout.write(
`[ProcessingScheduler] Found ${sessionsWithMessages.length} sessions to process (max concurrency: ${maxConcurrency}).\n`
);
const startTime = Date.now();
const results = await processSessionsInParallel(sessionsWithMessages, maxConcurrency);
const endTime = Date.now();
const successCount = results.filter((r) => r.success).length;
const errorCount = results.filter((r) => !r.success).length;
process.stdout.write("[ProcessingScheduler] Session processing complete.\n");
process.stdout.write(
`[ProcessingScheduler] Successfully processed: ${successCount} sessions.\n`
);
process.stdout.write(
`[ProcessingScheduler] Failed to process: ${errorCount} sessions.\n`
);
process.stdout.write(
`[ProcessingScheduler] Total processing time: ${((endTime - startTime) / 1000).toFixed(2)}s\n`
);
}
/**
* Start the processing scheduler
*/
export function startProcessingScheduler() {
// Process unprocessed sessions every hour
cron.schedule("0 * * * *", async () => {
try {
await processUnprocessedSessions();
} catch (error) {
process.stderr.write(
`[ProcessingScheduler] Error in scheduler: ${error}\n`
);
}
});
process.stdout.write(
"[ProcessingScheduler] Started processing scheduler (runs hourly).\n"
);
}

View File

@ -278,7 +278,7 @@ async function processSingleSession(session: any): Promise<ProcessingResult> {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: sentimentMap[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment,
sentimentCategory: processedData.sentiment.toUpperCase() as "POSITIVE" | "NEUTRAL" | "NEGATIVE",
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,

View File

@ -1,37 +0,0 @@
// Session refresh scheduler - JavaScript version
import cron from "node-cron";
import { PrismaClient } from "@prisma/client";
import { fetchAndStoreSessionsForAllCompanies } from "./csvFetcher.js";
const prisma = new PrismaClient();
/**
* Refresh sessions for all companies
*/
async function refreshSessions() {
console.log("[Scheduler] Starting session refresh...");
try {
await fetchAndStoreSessionsForAllCompanies();
console.log("[Scheduler] Session refresh completed successfully.");
} catch (error) {
console.error("[Scheduler] Error during session refresh:", error);
}
}
/**
* Start the session refresh scheduler
*/
export function startScheduler() {
// Run every 15 minutes
cron.schedule("*/15 * * * *", async () => {
try {
await refreshSessions();
} catch (error) {
console.error("[Scheduler] Error in scheduler:", error);
}
});
console.log(
"[Scheduler] Started session refresh scheduler (runs every 15 minutes)."
);
}

View File

@ -3,74 +3,82 @@ import cron from "node-cron";
import { prisma } from "./prisma";
import { fetchAndParseCsv } from "./csvFetcher";
interface SessionCreateData {
id: string;
startTime: Date;
companyId: string;
[key: string]: unknown;
}
export function startScheduler() {
cron.schedule("*/15 * * * *", async () => {
const companies = await prisma.company.findMany();
for (const company of companies) {
try {
const sessions = await fetchAndParseCsv(
const rawSessionData = await fetchAndParseCsv(
company.csvUrl,
company.csvUsername as string | undefined,
company.csvPassword as string | undefined
);
// Only add sessions that don't already exist in the database
for (const session of sessions) {
const sessionData: SessionCreateData = {
...session,
companyId: company.id,
id: session.id || session.sessionId || `sess_${Date.now()}`,
// Ensure startTime is not undefined
startTime: session.startTime || new Date(),
};
// Check if the session already exists
const existingSession = await prisma.session.findUnique({
where: { id: sessionData.id },
});
if (existingSession) {
// Skip this session as it already exists
continue;
// Create SessionImport records for new data
for (const rawSession of rawSessionData) {
try {
// Use upsert to handle duplicates gracefully
await prisma.sessionImport.upsert({
where: {
companyId_externalSessionId: {
companyId: company.id,
externalSessionId: rawSession.externalSessionId,
},
},
update: {
// Update existing record with latest data
startTimeRaw: rawSession.startTimeRaw,
endTimeRaw: rawSession.endTimeRaw,
ipAddress: rawSession.ipAddress,
countryCode: rawSession.countryCode,
language: rawSession.language,
messagesSent: rawSession.messagesSent,
sentimentRaw: rawSession.sentimentRaw,
escalatedRaw: rawSession.escalatedRaw,
forwardedHrRaw: rawSession.forwardedHrRaw,
fullTranscriptUrl: rawSession.fullTranscriptUrl,
avgResponseTimeSeconds: rawSession.avgResponseTimeSeconds,
tokens: rawSession.tokens,
tokensEur: rawSession.tokensEur,
category: rawSession.category,
initialMessage: rawSession.initialMessage,
status: "QUEUED", // Reset status for reprocessing if needed
},
create: {
companyId: company.id,
externalSessionId: rawSession.externalSessionId,
startTimeRaw: rawSession.startTimeRaw,
endTimeRaw: rawSession.endTimeRaw,
ipAddress: rawSession.ipAddress,
countryCode: rawSession.countryCode,
language: rawSession.language,
messagesSent: rawSession.messagesSent,
sentimentRaw: rawSession.sentimentRaw,
escalatedRaw: rawSession.escalatedRaw,
forwardedHrRaw: rawSession.forwardedHrRaw,
fullTranscriptUrl: rawSession.fullTranscriptUrl,
avgResponseTimeSeconds: rawSession.avgResponseTimeSeconds,
tokens: rawSession.tokens,
tokensEur: rawSession.tokensEur,
category: rawSession.category,
initialMessage: rawSession.initialMessage,
status: "QUEUED",
},
});
} catch (error) {
// Log individual session import errors but continue processing
process.stderr.write(
`[Scheduler] Failed to import session ${rawSession.externalSessionId} for company ${company.name}: ${error}\n`
);
}
// Only include fields that are properly typed for Prisma
await prisma.session.create({
data: {
id: sessionData.id,
companyId: sessionData.companyId,
startTime: sessionData.startTime,
// endTime is required in the schema, so use startTime if not available
endTime: session.endTime || new Date(),
ipAddress: session.ipAddress || null,
country: session.country || null,
language: session.language || null,
sentiment:
typeof session.sentiment === "number"
? session.sentiment
: null,
messagesSent:
typeof session.messagesSent === "number"
? session.messagesSent
: 0,
category: session.category || null,
},
});
}
// Using process.stdout.write instead of console.log to avoid ESLint warning
process.stdout.write(
`[Scheduler] Refreshed sessions for company: ${company.name}\n`
`[Scheduler] Imported ${rawSessionData.length} session records for company: ${company.name}\n`
);
} catch (e) {
// Using process.stderr.write instead of console.error to avoid ESLint warning
process.stderr.write(
`[Scheduler] Failed for company: ${company.name} - ${e}\n`
`[Scheduler] Failed to fetch CSV for company: ${company.name} - ${e}\n`
);
}
}

View File

@ -1,261 +0,0 @@
// Transcript parser utility - converts raw transcript text to structured messages
import { PrismaClient } from "@prisma/client";
const prisma = new PrismaClient();
/**
* Parses chat log string to JSON format with individual messages
* @param {string} logString - Raw transcript content
* @returns {Object} Parsed data with messages array and metadata
*/
export function parseChatLogToJSON(logString) {
// Convert to string if it's not already
const stringData =
typeof logString === "string" ? logString : String(logString);
// Split by lines and filter out empty lines
const lines = stringData.split("\n").filter((line) => line.trim() !== "");
const messages = [];
let currentMessage = null;
for (const line of lines) {
// Check if line starts with a timestamp pattern [DD.MM.YYYY HH:MM:SS]
const timestampMatch = line.match(
/^\[(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2})\] (.+?): (.*)$/
);
if (timestampMatch) {
// If we have a previous message, push it to the array
if (currentMessage) {
messages.push(currentMessage);
}
// Parse the timestamp
const [, timestamp, sender, content] = timestampMatch;
// Convert DD.MM.YYYY HH:MM:SS to ISO format
const [datePart, timePart] = timestamp.split(" ");
const [day, month, year] = datePart.split(".");
const [hour, minute, second] = timePart.split(":");
const dateObject = new Date(year, month - 1, day, hour, minute, second);
// Create new message object
currentMessage = {
timestamp: dateObject.toISOString(),
role: sender,
content: content,
};
} else if (currentMessage) {
// This is a continuation of the previous message (multiline)
currentMessage.content += "\n" + line;
}
}
// Don't forget the last message
if (currentMessage) {
messages.push(currentMessage);
}
return {
messages: messages.sort((a, b) => {
// First sort by timestamp (ascending)
const timeComparison = new Date(a.timestamp) - new Date(b.timestamp);
if (timeComparison !== 0) {
return timeComparison;
}
// If timestamps are equal, sort by role (descending)
// This puts "User" before "Assistant" when timestamps are the same
return b.role.localeCompare(a.role);
}),
totalMessages: messages.length,
};
}
/**
* Stores parsed messages in the database for a session
* @param {string} sessionId - The session ID
* @param {Array} messages - Array of parsed message objects
*/
export async function storeMessagesForSession(sessionId, messages) {
try {
// First, delete any existing messages for this session
await prisma.message.deleteMany({
where: { sessionId },
});
// Then insert the new messages
const messageData = messages.map((message, index) => ({
sessionId,
timestamp: new Date(message.timestamp),
role: message.role,
content: message.content,
order: index,
}));
if (messageData.length > 0) {
await prisma.message.createMany({
data: messageData,
});
// Extract actual end time from the latest message
const latestMessage = messages.reduce((latest, current) => {
return new Date(current.timestamp) > new Date(latest.timestamp) ? current : latest;
});
// Update the session's endTime with the actual conversation end time
await prisma.session.update({
where: { id: sessionId },
data: {
endTime: new Date(latestMessage.timestamp),
},
});
process.stdout.write(
`[TranscriptParser] Updated session ${sessionId} endTime to ${latestMessage.timestamp}\n`
);
}
process.stdout.write(
`[TranscriptParser] Stored ${messageData.length} messages for session ${sessionId}\n`
);
return messageData.length;
} catch (error) {
process.stderr.write(
`[TranscriptParser] Error storing messages for session ${sessionId}: ${error}\n`
);
throw error;
}
}
/**
* Processes and stores transcript for a single session
* @param {string} sessionId - The session ID
* @param {string} transcriptContent - Raw transcript content
* @returns {Promise<Object>} Processing result with message count
*/
export async function processTranscriptForSession(
sessionId,
transcriptContent
) {
if (!transcriptContent || transcriptContent.trim() === "") {
throw new Error("No transcript content provided");
}
try {
// Parse the transcript
const parsed = parseChatLogToJSON(transcriptContent);
// Store messages in database
const messageCount = await storeMessagesForSession(
sessionId,
parsed.messages
);
return {
sessionId,
messageCount,
totalMessages: parsed.totalMessages,
success: true,
};
} catch (error) {
process.stderr.write(
`[TranscriptParser] Error processing transcript for session ${sessionId}: ${error}\n`
);
throw error;
}
}
/**
* Processes transcripts for all sessions that have transcript content but no parsed messages
*/
export async function processAllUnparsedTranscripts() {
process.stdout.write(
"[TranscriptParser] Starting to process unparsed transcripts...\n"
);
try {
// Find sessions with transcript content but no messages
const sessionsToProcess = await prisma.session.findMany({
where: {
AND: [
{ transcriptContent: { not: null } },
{ transcriptContent: { not: "" } },
],
},
include: {
messages: true,
},
});
// Filter to only sessions without messages
const unparsedSessions = sessionsToProcess.filter(
(session) => session.messages.length === 0
);
if (unparsedSessions.length === 0) {
process.stdout.write(
"[TranscriptParser] No unparsed transcripts found.\n"
);
return { processed: 0, errors: 0 };
}
process.stdout.write(
`[TranscriptParser] Found ${unparsedSessions.length} sessions with unparsed transcripts.\n`
);
let successCount = 0;
let errorCount = 0;
for (const session of unparsedSessions) {
try {
const result = await processTranscriptForSession(
session.id,
session.transcriptContent
);
process.stdout.write(
`[TranscriptParser] Processed session ${session.id}: ${result.messageCount} messages\n`
);
successCount++;
} catch (error) {
process.stderr.write(
`[TranscriptParser] Failed to process session ${session.id}: ${error}\n`
);
errorCount++;
}
}
process.stdout.write(
`[TranscriptParser] Completed processing. Success: ${successCount}, Errors: ${errorCount}\n`
);
return { processed: successCount, errors: errorCount };
} catch (error) {
process.stderr.write(
`[TranscriptParser] Error in processAllUnparsedTranscripts: ${error}\n`
);
throw error;
}
}
/**
* Gets parsed messages for a session
* @param {string} sessionId - The session ID
* @returns {Promise<Array>} Array of message objects
*/
export async function getMessagesForSession(sessionId) {
try {
const messages = await prisma.message.findMany({
where: { sessionId },
orderBy: { order: "asc" },
});
return messages;
} catch (error) {
process.stderr.write(
`[TranscriptParser] Error getting messages for session ${sessionId}: ${error}\n`
);
throw error;
}
}

View File

@ -38,7 +38,7 @@ export interface User {
export interface Message {
id: string;
sessionId: string;
timestamp: Date;
timestamp: Date | null;
role: string; // "User", "Assistant", "System", etc.
content: string;
order: number; // Order within the conversation (0, 1, 2, ...)

9506
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -121,5 +121,6 @@
".git",
"*.json"
]
}
},
"packageManager": "pnpm@10.12.4"
}

View File

@ -3,49 +3,6 @@ import { NextApiRequest, NextApiResponse } from "next";
import { fetchAndParseCsv } from "../../../lib/csvFetcher";
import { prisma } from "../../../lib/prisma";
interface SessionCreateData {
id: string;
startTime: Date;
companyId: string;
sessionId?: string;
[key: string]: unknown;
}
/**
* Fetches transcript content from a URL
* @param url The URL to fetch the transcript from
* @param username Optional username for authentication
* @param password Optional password for authentication
* @returns The transcript content or null if fetching fails
*/
async function fetchTranscriptContent(
url: string,
username?: string,
password?: string
): Promise<string | null> {
try {
const authHeader =
username && password
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
: undefined;
const response = await fetch(url, {
headers: authHeader ? { Authorization: authHeader } : {},
});
if (!response.ok) {
process.stderr.write(
`Error fetching transcript: ${response.statusText}\n`
);
return null;
}
return await response.text();
} catch (error) {
process.stderr.write(`Failed to fetch transcript: ${error}\n`);
return null;
}
}
export default async function handler(
req: NextApiRequest,
res: NextApiResponse
@ -88,85 +45,80 @@ export default async function handler(
if (!company) return res.status(404).json({ error: "Company not found" });
try {
const sessions = await fetchAndParseCsv(
const rawSessionData = await fetchAndParseCsv(
company.csvUrl,
company.csvUsername as string | undefined,
company.csvPassword as string | undefined
);
// Only add sessions that don't already exist in the database
for (const session of sessions) {
const sessionData: SessionCreateData = {
...session,
companyId: company.id,
id:
session.id ||
session.sessionId ||
`sess_${Date.now()}_${Math.random().toString(36).substring(2, 7)}`,
// Ensure startTime is not undefined
startTime: session.startTime || new Date(),
};
let importedCount = 0;
// Validate dates to prevent "Invalid Date" errors
const startTime =
sessionData.startTime instanceof Date &&
!isNaN(sessionData.startTime.getTime())
? sessionData.startTime
: new Date();
const endTime =
session.endTime instanceof Date && !isNaN(session.endTime.getTime())
? session.endTime
: new Date();
// Note: transcriptContent field was removed from schema
// Transcript content can be fetched on-demand from fullTranscriptUrl
// Check if the session already exists
const existingSession = await prisma.session.findUnique({
where: { id: sessionData.id },
});
if (existingSession) {
// Skip this session as it already exists
continue;
// Create SessionImport records for new data
for (const rawSession of rawSessionData) {
try {
// Use upsert to handle duplicates gracefully
await prisma.sessionImport.upsert({
where: {
companyId_externalSessionId: {
companyId: company.id,
externalSessionId: rawSession.externalSessionId,
},
},
update: {
// Update existing record with latest data
startTimeRaw: rawSession.startTimeRaw,
endTimeRaw: rawSession.endTimeRaw,
ipAddress: rawSession.ipAddress,
countryCode: rawSession.countryCode,
language: rawSession.language,
messagesSent: rawSession.messagesSent,
sentimentRaw: rawSession.sentimentRaw,
escalatedRaw: rawSession.escalatedRaw,
forwardedHrRaw: rawSession.forwardedHrRaw,
fullTranscriptUrl: rawSession.fullTranscriptUrl,
avgResponseTimeSeconds: rawSession.avgResponseTimeSeconds,
tokens: rawSession.tokens,
tokensEur: rawSession.tokensEur,
category: rawSession.category,
initialMessage: rawSession.initialMessage,
status: "QUEUED", // Reset status for reprocessing if needed
},
create: {
companyId: company.id,
externalSessionId: rawSession.externalSessionId,
startTimeRaw: rawSession.startTimeRaw,
endTimeRaw: rawSession.endTimeRaw,
ipAddress: rawSession.ipAddress,
countryCode: rawSession.countryCode,
language: rawSession.language,
messagesSent: rawSession.messagesSent,
sentimentRaw: rawSession.sentimentRaw,
escalatedRaw: rawSession.escalatedRaw,
forwardedHrRaw: rawSession.forwardedHrRaw,
fullTranscriptUrl: rawSession.fullTranscriptUrl,
avgResponseTimeSeconds: rawSession.avgResponseTimeSeconds,
tokens: rawSession.tokens,
tokensEur: rawSession.tokensEur,
category: rawSession.category,
initialMessage: rawSession.initialMessage,
status: "QUEUED",
},
});
importedCount++;
} catch (error) {
// Log individual session import errors but continue processing
process.stderr.write(
`Failed to import session ${rawSession.externalSessionId}: ${error}\n`
);
}
// Only include fields that are properly typed for Prisma
await prisma.session.create({
data: {
id: sessionData.id,
companyId: sessionData.companyId,
startTime: startTime,
endTime: endTime,
ipAddress: session.ipAddress || null,
country: session.country || null,
language: session.language || null,
messagesSent:
typeof session.messagesSent === "number" ? session.messagesSent : 0,
sentiment:
typeof session.sentiment === "number" ? session.sentiment : null,
escalated:
typeof session.escalated === "boolean" ? session.escalated : null,
forwardedHr:
typeof session.forwardedHr === "boolean"
? session.forwardedHr
: null,
fullTranscriptUrl: session.fullTranscriptUrl || null,
avgResponseTime:
typeof session.avgResponseTime === "number"
? session.avgResponseTime
: null,
tokens: typeof session.tokens === "number" ? session.tokens : null,
tokensEur:
typeof session.tokensEur === "number" ? session.tokensEur : null,
category: session.category || null,
initialMsg: session.initialMsg || null,
},
});
}
res.json({ ok: true, imported: sessions.length });
res.json({
ok: true,
imported: importedCount,
total: rawSessionData.length,
message: `Successfully imported ${importedCount} session records to SessionImport table`
});
} catch (e) {
const error = e instanceof Error ? e.message : "An unknown error occurred";
res.status(500).json({ error });

View File

@ -41,7 +41,7 @@ export default async function handler(
}
// Check if user has admin role
if (user.role !== "admin") {
if (user.role !== "ADMIN") {
return res.status(403).json({ error: "Admin access required" });
}

View File

@ -67,7 +67,7 @@ export default async function handler(
prismaSession.messages?.map((msg) => ({
id: msg.id,
sessionId: msg.sessionId,
timestamp: new Date(msg.timestamp),
timestamp: msg.timestamp ? new Date(msg.timestamp) : new Date(),
role: msg.role,
content: msg.content,
order: msg.order,

View File

@ -150,7 +150,7 @@ export default async function handler(
tokensEur: ps.tokensEur ?? undefined,
initialMsg: ps.initialMsg ?? undefined,
fullTranscriptUrl: ps.fullTranscriptUrl ?? null,
transcriptContent: ps.transcriptContent ?? null,
transcriptContent: null, // Transcript content is now fetched from fullTranscriptUrl when needed
}));
return res.status(200).json({ sessions, totalSessions });

View File

@ -46,7 +46,7 @@ export default async function handler(
email,
password: hashed,
companyId: newCompany.id,
role: "admin",
role: "ADMIN",
},
});
res.status(201).json({

6029
pnpm-lock.yaml generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -80,11 +80,31 @@ model Session {
importId String? @unique
/**
* session-level data
* session-level data (processed from SessionImport)
*/
startTime DateTime
endTime DateTime
// … whatever other scalar fields you have here …
// Processed fields from SessionImport data
ipAddress String?
country String? // processed from countryCode
language String? // processed from language
messagesSent Int?
sentiment Float? // processed from sentimentRaw
sentimentCategory SentimentCategory?
escalated Boolean?
forwardedHr Boolean?
fullTranscriptUrl String?
avgResponseTime Float? // processed from avgResponseTimeSeconds
tokens Int?
tokensEur Float?
category String?
initialMsg String? // processed from initialMessage
// Processing metadata
processed Boolean @default(false)
questions String? // JSON array of extracted questions
summary String? // AI-generated summary
/**
* ---------- the missing opposite side ----------
@ -154,7 +174,7 @@ model Message {
session Session @relation(fields: [sessionId], references: [id], onDelete: Cascade)
sessionId String
timestamp DateTime
timestamp DateTime?
role String // "user" | "assistant" | "system" free-form keeps migration easy
content String
order Int

View File

@ -1,39 +0,0 @@
// seed.js - Create initial admin user and company
import { PrismaClient } from "@prisma/client";
import bcrypt from "bcryptjs";
const prisma = new PrismaClient();
async function main() {
// Create a company
const company = await prisma.company.create({
data: {
name: "Demo Company",
csvUrl: "https://example.com/data.csv", // Replace with a real URL if available
},
});
// Create an admin user
const hashedPassword = await bcrypt.hash("admin123", 10);
await prisma.user.create({
data: {
email: "admin@demo.com",
password: hashedPassword,
role: "admin",
companyId: company.id,
},
});
console.log("Seed data created successfully:");
console.log("Company: Demo Company");
console.log("Admin user: admin@demo.com (password: admin123)");
}
main()
.catch((e) => {
console.error("Error seeding database:", e);
process.exit(1);
})
.finally(async () => {
await prisma.$disconnect();
});

View File

@ -1,39 +0,0 @@
// Seed script for creating initial data
import { PrismaClient } from "@prisma/client";
import bcrypt from "bcryptjs";
const prisma = new PrismaClient();
async function main() {
try {
// Create a company
const company = await prisma.company.create({
data: {
name: "Demo Company",
csvUrl: "https://example.com/data.csv", // Replace with a real URL if available
},
});
// Create an admin user
const hashedPassword = await bcrypt.hash("admin123", 10);
await prisma.user.create({
data: {
email: "admin@demo.com",
password: hashedPassword,
role: "admin",
companyId: company.id,
},
});
console.log("Seed data created successfully:");
console.log("Company: Demo Company");
console.log("Admin user: admin@demo.com (password: admin123)");
} catch (error) {
console.error("Error seeding database:", error);
process.exit(1);
} finally {
await prisma.$disconnect();
}
}
main();

View File

@ -20,7 +20,7 @@ async function main() {
data: {
email: "admin@demo.com",
password: hashedPassword,
role: "admin",
role: "ADMIN",
companyId: company.id,
},
});

View File

@ -1,73 +0,0 @@
// Script to check what's in the transcript files
// Usage: node scripts/check-transcript-content.js
import { PrismaClient } from '@prisma/client';
import fetch from 'node-fetch';
const prisma = new PrismaClient();
async function checkTranscriptContent() {
try {
// Get a few sessions without messages
const sessions = await prisma.session.findMany({
where: {
AND: [
{ fullTranscriptUrl: { not: null } },
{ messages: { none: {} } },
]
},
include: { company: true },
take: 3,
});
for (const session of sessions) {
console.log(`\n📄 Checking session ${session.id}:`);
console.log(` URL: ${session.fullTranscriptUrl}`);
try {
const authHeader = session.company.csvUsername && session.company.csvPassword
? "Basic " + Buffer.from(`${session.company.csvUsername}:${session.company.csvPassword}`).toString("base64")
: undefined;
const response = await fetch(session.fullTranscriptUrl, {
headers: authHeader ? { Authorization: authHeader } : {},
timeout: 10000,
});
if (!response.ok) {
console.log(` ❌ HTTP ${response.status}: ${response.statusText}`);
continue;
}
const content = await response.text();
console.log(` 📏 Content length: ${content.length} characters`);
if (content.length === 0) {
console.log(` ⚠️ Empty file`);
} else if (content.length < 100) {
console.log(` 📝 Full content: "${content}"`);
} else {
console.log(` 📝 First 200 chars: "${content.substring(0, 200)}..."`);
}
// Check if it matches our expected format
const lines = content.split('\n').filter(line => line.trim());
const formatMatches = lines.filter(line =>
line.match(/^\[([^\]]+)\]\s*([^:]+):\s*(.+)$/)
);
console.log(` 🔍 Lines total: ${lines.length}, Format matches: ${formatMatches.length}`);
} catch (error) {
console.log(` ❌ Error: ${error.message}`);
}
}
} catch (error) {
console.error('❌ Error:', error);
} finally {
await prisma.$disconnect();
}
}
checkTranscriptContent();

View File

@ -1,185 +0,0 @@
// Script to fetch transcripts and parse them into messages
// Usage: node scripts/fetch-and-parse-transcripts.js
import { PrismaClient } from '@prisma/client';
import fetch from 'node-fetch';
const prisma = new PrismaClient();
/**
* Fetches transcript content from a URL
*/
async function fetchTranscriptContent(url, username, password) {
try {
const authHeader = username && password
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
: undefined;
const response = await fetch(url, {
headers: authHeader ? { Authorization: authHeader } : {},
timeout: 10000,
});
if (!response.ok) {
console.log(`❌ Failed to fetch ${url}: ${response.status} ${response.statusText}`);
return null;
}
return await response.text();
} catch (error) {
console.log(`❌ Error fetching ${url}: ${error.message}`);
return null;
}
}
/**
* Parses transcript content into messages
*/
function parseTranscriptToMessages(transcript, sessionId) {
if (!transcript || transcript.trim() === '') {
return [];
}
const lines = transcript.split('\n').filter(line => line.trim());
const messages = [];
let messageOrder = 0;
let currentTimestamp = new Date();
for (const line of lines) {
// Try format 1: [DD-MM-YYYY HH:MM:SS] Role: Content
const timestampMatch = line.match(/^\[([^\]]+)\]\s*([^:]+):\s*(.+)$/);
if (timestampMatch) {
const [, timestamp, role, content] = timestampMatch;
// Parse timestamp (DD-MM-YYYY HH:MM:SS)
const dateMatch = timestamp.match(/^(\d{1,2})-(\d{1,2})-(\d{4}) (\d{1,2}):(\d{1,2}):(\d{1,2})$/);
let parsedTimestamp = new Date();
if (dateMatch) {
const [, day, month, year, hour, minute, second] = dateMatch;
parsedTimestamp = new Date(
parseInt(year),
parseInt(month) - 1, // Month is 0-indexed
parseInt(day),
parseInt(hour),
parseInt(minute),
parseInt(second)
);
}
messages.push({
sessionId,
role: role.trim().toLowerCase(),
content: content.trim(),
timestamp: parsedTimestamp,
order: messageOrder++,
});
continue;
}
// Try format 2: Role: Content (simple format)
const simpleMatch = line.match(/^([^:]+):\s*(.+)$/);
if (simpleMatch) {
const [, role, content] = simpleMatch;
// Use incremental timestamps (add 1 minute per message)
currentTimestamp = new Date(currentTimestamp.getTime() + 60000);
messages.push({
sessionId,
role: role.trim().toLowerCase(),
content: content.trim(),
timestamp: new Date(currentTimestamp),
order: messageOrder++,
});
}
}
return messages;
}
/**
* Process sessions without messages
*/
async function fetchAndParseTranscripts() {
try {
console.log('🔍 Finding sessions without messages...\n');
// Get sessions that have fullTranscriptUrl but no messages
const sessionsWithoutMessages = await prisma.session.findMany({
where: {
AND: [
{ fullTranscriptUrl: { not: null } },
{ messages: { none: {} } }, // No messages
]
},
include: {
company: true,
},
take: 20, // Process 20 at a time to avoid overwhelming
});
if (sessionsWithoutMessages.length === 0) {
console.log('✅ All sessions with transcript URLs already have messages!');
return;
}
console.log(`📥 Found ${sessionsWithoutMessages.length} sessions to process\n`);
let successCount = 0;
let errorCount = 0;
for (const session of sessionsWithoutMessages) {
console.log(`📄 Processing session ${session.id.substring(0, 8)}...`);
try {
// Fetch transcript content
const transcriptContent = await fetchTranscriptContent(
session.fullTranscriptUrl,
session.company.csvUsername,
session.company.csvPassword
);
if (!transcriptContent) {
console.log(` ⚠️ No transcript content available`);
errorCount++;
continue;
}
// Parse transcript into messages
const messages = parseTranscriptToMessages(transcriptContent, session.id);
if (messages.length === 0) {
console.log(` ⚠️ No messages found in transcript`);
errorCount++;
continue;
}
// Save messages to database
await prisma.message.createMany({
data: messages,
});
console.log(` ✅ Added ${messages.length} messages`);
successCount++;
} catch (error) {
console.log(` ❌ Error: ${error.message}`);
errorCount++;
}
}
console.log(`\n📊 Results:`);
console.log(` ✅ Successfully processed: ${successCount} sessions`);
console.log(` ❌ Failed to process: ${errorCount} sessions`);
console.log(`\n💡 Now you can run the processing scheduler to analyze these sessions!`);
} catch (error) {
console.error('❌ Error:', error);
} finally {
await prisma.$disconnect();
}
}
fetchAndParseTranscripts();

View File

@ -1,83 +1,182 @@
import { PrismaClient } from "@prisma/client";
import fetch from "node-fetch";
const prisma = new PrismaClient();
async function main() {
console.log("Starting to fetch missing transcripts...");
/**
* Fetches transcript content from a URL with optional authentication
*/
async function fetchTranscriptContent(
url: string,
username?: string,
password?: string
): Promise<string | null> {
try {
const authHeader =
username && password
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
: undefined;
const sessionsToUpdate = await prisma.session.findMany({
const response = await fetch(url, {
headers: authHeader ? { Authorization: authHeader } : {},
});
if (!response.ok) {
console.warn(`Failed to fetch transcript from ${url}: ${response.statusText}`);
return null;
}
return await response.text();
} catch (error) {
console.warn(`Error fetching transcript from ${url}:`, error);
return null;
}
}
/**
* Parse transcript content into individual messages
*/
function parseTranscriptToMessages(transcriptContent: string): Array<{
timestamp: Date | null;
role: string;
content: string;
order: number;
}> {
const lines = transcriptContent.split('\n').filter(line => line.trim());
const messages: Array<{
timestamp: Date | null;
role: string;
content: string;
order: number;
}> = [];
let order = 0;
for (const line of lines) {
// Try to parse lines in format: [timestamp] role: content
const match = line.match(/^\[([^\]]+)\]\s*([^:]+):\s*(.+)$/);
if (match) {
const [, timestampStr, role, content] = match;
// Try to parse the timestamp
let timestamp: Date | null = null;
try {
timestamp = new Date(timestampStr);
if (isNaN(timestamp.getTime())) {
timestamp = null;
}
} catch {
timestamp = null;
}
messages.push({
timestamp,
role: role.trim(),
content: content.trim(),
order: order++,
});
} else {
// If line doesn't match expected format, treat as content continuation
if (messages.length > 0) {
messages[messages.length - 1].content += '\n' + line;
} else {
// First line doesn't match format, create a generic message
messages.push({
timestamp: null,
role: 'unknown',
content: line,
order: order++,
});
}
}
}
return messages;
}
/**
* Main function to fetch transcripts for sessions that don't have messages yet
*/
async function fetchTranscriptsForSessions() {
console.log("Starting to fetch transcripts for sessions without messages...");
// Find sessions that have transcript URLs but no messages
const sessionsNeedingTranscripts = await prisma.session.findMany({
where: {
AND: [
{ fullTranscriptUrl: { not: null } },
{ fullTranscriptUrl: { not: "" } }, // Ensure URL is not an empty string
{ transcriptContent: null },
{ messages: { none: {} } }, // No messages yet
],
},
select: {
id: true,
fullTranscriptUrl: true,
include: {
company: true,
messages: true,
},
});
if (sessionsToUpdate.length === 0) {
console.log("No sessions found requiring transcript fetching.");
if (sessionsNeedingTranscripts.length === 0) {
console.log("No sessions found that need transcript fetching.");
return;
}
console.log(`Found ${sessionsToUpdate.length} sessions to update.`);
console.log(`Found ${sessionsNeedingTranscripts.length} sessions that need transcript fetching.`);
let successCount = 0;
let errorCount = 0;
for (const session of sessionsToUpdate) {
for (const session of sessionsNeedingTranscripts) {
if (!session.fullTranscriptUrl) {
// Should not happen due to query, but good for type safety
console.warn(`Session ${session.id} has no fullTranscriptUrl, skipping.`);
console.warn(`Session ${session.id} has no transcript URL, skipping.`);
continue;
}
console.log(
`Fetching transcript for session ${session.id} from ${session.fullTranscriptUrl}...`
);
console.log(`Fetching transcript for session ${session.id}...`);
try {
const response = await fetch(session.fullTranscriptUrl);
if (!response.ok) {
console.error(
`Failed to fetch transcript for session ${session.id}: ${response.status} ${response.statusText}`
);
const errorBody = await response.text();
console.error(`Error details: ${errorBody.substring(0, 500)}`); // Log first 500 chars of error
errorCount++;
continue;
}
const transcriptText = await response.text();
if (transcriptText.trim() === "") {
console.warn(
`Fetched empty transcript for session ${session.id}. Storing as empty string.`
);
}
await prisma.session.update({
where: { id: session.id },
data: { transcriptContent: transcriptText },
});
console.log(
`Successfully fetched and stored transcript for session ${session.id}.`
// Fetch transcript content
const transcriptContent = await fetchTranscriptContent(
session.fullTranscriptUrl,
session.company.csvUsername || undefined,
session.company.csvPassword || undefined
);
if (!transcriptContent) {
throw new Error("Failed to fetch transcript content");
}
// Parse transcript into messages
const messages = parseTranscriptToMessages(transcriptContent);
if (messages.length === 0) {
throw new Error("No messages found in transcript");
}
// Create messages in database
await prisma.message.createMany({
data: messages.map(msg => ({
sessionId: session.id,
timestamp: msg.timestamp,
role: msg.role,
content: msg.content,
order: msg.order,
})),
});
console.log(`Successfully fetched transcript for session ${session.id} (${messages.length} messages)`);
successCount++;
} catch (error) {
console.error(`Error processing session ${session.id}:`, error);
console.error(`Error fetching transcript for session ${session.id}:`, error);
errorCount++;
}
}
console.log("Transcript fetching complete.");
console.log(`Successfully updated: ${successCount} sessions.`);
console.log(`Failed to update: ${errorCount} sessions.`);
console.log(`Successfully fetched: ${successCount} transcripts.`);
console.log(`Failed to fetch: ${errorCount} transcripts.`);
}
main()
// Run the main function
fetchTranscriptsForSessions()
.catch((e) => {
console.error("An error occurred during the script execution:", e);
process.exitCode = 1;

View File

@ -1,68 +0,0 @@
// Fix Trailing Whitespace
// This script removes trailing whitespace from specified file types
import fs from "fs";
import path from "path";
import { fileURLToPath } from "url";
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Configure which file types to process
const fileTypes = [".ts", ".tsx", ".js", ".jsx", ".json", ".md", ".css"];
// Configure directories to ignore
const ignoreDirs = ["node_modules", ".next", ".git", "out", "build", "dist"];
// Recursively process directories
async function processDirectory(dir) {
try {
const files = await fs.promises.readdir(dir, { withFileTypes: true });
for (const file of files) {
const fullPath = path.join(dir, file.name);
// Skip ignored directories
if (file.isDirectory()) {
if (!ignoreDirs.includes(file.name)) {
await processDirectory(fullPath);
}
continue;
}
// Process only files with matching extensions
const ext = path.extname(file.name);
if (!fileTypes.includes(ext)) {
continue;
}
try {
// Read and process the file
const content = await fs.promises.readFile(fullPath, "utf8");
// Remove trailing whitespace from each line
const processedContent = content
.split("\n")
.map((line) => line.replace(/\s+$/, ""))
.join("\n");
// Only write if changes were made
if (processedContent !== content) {
await fs.promises.writeFile(fullPath, processedContent, "utf8");
console.log(`Fixed trailing whitespace in ${fullPath}`);
}
} catch (fileError) {
console.error(`Error processing file ${fullPath}:`, fileError);
}
}
} catch (dirError) {
console.error(`Error reading directory ${dir}:`, dirError);
}
}
// Start processing from root directory
const rootDir = process.cwd();
console.log(`Starting whitespace cleanup from ${rootDir}`);
processDirectory(rootDir)
.then(() => console.log("Whitespace cleanup completed"))
.catch((err) => console.error("Error in whitespace cleanup:", err));

View File

@ -1,38 +0,0 @@
// Simple script to test the manual processing trigger
// Usage: node scripts/manual-trigger-test.js
import fetch from 'node-fetch';
async function testManualTrigger() {
try {
console.log('Testing manual processing trigger...');
const response = await fetch('http://localhost:3000/api/admin/trigger-processing', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
// Note: In a real scenario, you'd need to include authentication cookies
// For testing, you might need to login first and copy the session cookie
},
body: JSON.stringify({
batchSize: 5, // Process max 5 sessions
maxConcurrency: 3 // Use 3 concurrent workers
})
});
const result = await response.json();
if (response.ok) {
console.log('✅ Manual trigger successful:');
console.log(JSON.stringify(result, null, 2));
} else {
console.log('❌ Manual trigger failed:');
console.log(JSON.stringify(result, null, 2));
}
} catch (error) {
console.error('❌ Error testing manual trigger:', error.message);
}
}
testManualTrigger();

View File

@ -1,243 +0,0 @@
// Manual trigger scripts for both schedulers
import { fetchAndStoreSessionsForAllCompanies } from "../lib/csvFetcher.js";
import { processAllUnparsedTranscripts } from "../lib/transcriptParser.js";
import { PrismaClient } from "@prisma/client";
import fetch from "node-fetch";
import { readFileSync } from "fs";
import { fileURLToPath } from "url";
import { dirname, join } from "path";
// Load environment variables from .env.local
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const envPath = join(__dirname, '..', '.env.local');
try {
const envFile = readFileSync(envPath, 'utf8');
const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
envVars.forEach(line => {
const [key, ...valueParts] = line.split('=');
if (key && valueParts.length > 0) {
const value = valueParts.join('=').trim();
if (!process.env[key.trim()]) {
process.env[key.trim()] = value;
}
}
});
console.log("✅ Environment variables loaded from .env.local");
} catch (error) {
console.warn("⚠️ Could not load .env.local file:", error.message);
}
const prisma = new PrismaClient();
/**
* Manually trigger the session refresh scheduler
*/
async function triggerSessionRefresh() {
console.log("=== Manual Session Refresh Trigger ===");
try {
await fetchAndStoreSessionsForAllCompanies();
console.log("✅ Session refresh completed successfully");
} catch (error) {
console.error("❌ Session refresh failed:", error);
}
}
/**
* Manually trigger the processing scheduler
*/
async function triggerProcessingScheduler() {
console.log("=== Manual Processing Scheduler Trigger ===");
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
if (!OPENAI_API_KEY) {
console.error("❌ OPENAI_API_KEY environment variable is not set");
return;
}
try {
// Find sessions that need processing
const sessionsToProcess = await prisma.session.findMany({
where: {
AND: [
{ messages: { some: {} } },
{
OR: [
{ processed: false },
{ processed: null }
]
}
],
},
select: {
id: true,
processed: true,
},
take: 5, // Process 5 sessions for manual testing
});
console.log(`Found ${sessionsToProcess.length} sessions to process:`);
sessionsToProcess.forEach((session) => {
console.log(`- Session ${session.id}: processed=${session.processed}`);
});
if (sessionsToProcess.length === 0) {
console.log("✅ No sessions found requiring processing");
return;
}
// Import and run the processing function
const { processUnprocessedSessions } = await import(
"../lib/processingScheduler.js"
);
await processUnprocessedSessions();
console.log("✅ Processing scheduler completed");
} catch (error) {
console.error("❌ Processing scheduler failed:", error);
}
}
/**
* Manually trigger transcript parsing
*/
async function triggerTranscriptParsing() {
console.log("=== Manual Transcript Parsing Trigger ===");
try {
const result = await processAllUnparsedTranscripts();
console.log(
`✅ Transcript parsing completed: ${result.processed} processed, ${result.errors} errors`
);
} catch (error) {
console.error("❌ Transcript parsing failed:", error);
}
}
/**
* Show current processing status
*/
async function showProcessingStatus() {
console.log("=== Processing Status ===");
try {
const totalSessions = await prisma.session.count();
const processedSessions = await prisma.session.count({
where: { processed: true },
});
const unprocessedSessions = await prisma.session.count({
where: {
OR: [
{ processed: false },
{ processed: null }
]
},
});
const withMessages = await prisma.session.count({
where: {
messages: {
some: {},
},
},
});
const readyForProcessing = await prisma.session.count({
where: {
AND: [
{ messages: { some: {} } },
{
OR: [
{ processed: false },
{ processed: null }
]
}
],
},
});
console.log(`📊 Total sessions: ${totalSessions}`);
console.log(`✅ Processed sessions: ${processedSessions}`);
console.log(`⏳ Unprocessed sessions: ${unprocessedSessions}`);
console.log(`📄 Sessions with messages: ${withMessages}`);
console.log(`🔄 Ready for processing: ${readyForProcessing}`);
// Show some examples of unprocessed sessions
if (readyForProcessing > 0) {
console.log("\n📋 Sample unprocessed sessions:");
const samples = await prisma.session.findMany({
where: {
AND: [
{ messages: { some: {} } },
{
OR: [
{ processed: false },
{ processed: null }
]
}
],
},
select: {
id: true,
processed: true,
startTime: true,
},
take: 3,
});
samples.forEach((session) => {
console.log(
`- ${session.id} (${session.startTime.toISOString()}) - processed: ${session.processed}`
);
});
}
} catch (error) {
console.error("❌ Failed to get processing status:", error);
}
}
// Main execution based on command line argument
const command = process.argv[2];
switch (command) {
case "refresh":
await triggerSessionRefresh();
break;
case "process":
await triggerProcessingScheduler();
break;
case "parse":
await triggerTranscriptParsing();
break;
case "status":
await showProcessingStatus();
break;
case "both":
await triggerSessionRefresh();
console.log("\n" + "=".repeat(50) + "\n");
await triggerProcessingScheduler();
break;
case "all":
await triggerSessionRefresh();
console.log("\n" + "=".repeat(50) + "\n");
await triggerTranscriptParsing();
console.log("\n" + "=".repeat(50) + "\n");
await triggerProcessingScheduler();
break;
default:
console.log("Usage: node scripts/manual-triggers.js [command]");
console.log("Commands:");
console.log(
" refresh - Trigger session refresh (fetch new sessions from CSV)"
);
console.log(" parse - Parse transcripts into structured messages");
console.log(
" process - Trigger processing scheduler (process unprocessed sessions)"
);
console.log(" status - Show current processing status");
console.log(" both - Run both refresh and processing");
console.log(" all - Run refresh, parse, and processing in sequence");
break;
}
await prisma.$disconnect();

View File

@ -1,283 +0,0 @@
// Script to manually process unprocessed sessions with OpenAI
import { PrismaClient } from "@prisma/client";
import fetch from "node-fetch";
const prisma = new PrismaClient();
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
/**
* Processes a session transcript using OpenAI API
* @param {string} sessionId The session ID
* @param {string} transcript The transcript content to process
* @returns {Promise<Object>} Processed data from OpenAI
*/
async function processTranscriptWithOpenAI(sessionId, transcript) {
if (!OPENAI_API_KEY) {
throw new Error("OPENAI_API_KEY environment variable is not set");
}
// Create a system message with instructions
const systemMessage = `
You are an AI assistant tasked with analyzing chat transcripts.
Extract the following information from the transcript:
1. The primary language used by the user (ISO 639-1 code)
2. Number of messages sent by the user
3. Overall sentiment (positive, neutral, or negative)
4. Whether the conversation was escalated
5. Whether HR contact was mentioned or provided
6. The best-fitting category for the conversation from this list:
- Schedule & Hours
- Leave & Vacation
- Sick Leave & Recovery
- Salary & Compensation
- Contract & Hours
- Onboarding
- Offboarding
- Workwear & Staff Pass
- Team & Contacts
- Personal Questions
- Access & Login
- Social questions
- Unrecognized / Other
7. Up to 5 paraphrased questions asked by the user (in English)
8. A brief summary of the conversation (10-300 characters)
Return the data in JSON format matching this schema:
{
"language": "ISO 639-1 code",
"messages_sent": number,
"sentiment": "positive|neutral|negative",
"escalated": boolean,
"forwarded_hr": boolean,
"category": "one of the categories listed above",
"questions": ["question 1", "question 2", ...],
"summary": "brief summary",
"session_id": "${sessionId}"
}
`;
try {
const response = await fetch(OPENAI_API_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: "gpt-4-turbo",
messages: [
{
role: "system",
content: systemMessage,
},
{
role: "user",
content: transcript,
},
],
temperature: 0.3, // Lower temperature for more consistent results
response_format: { type: "json_object" },
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`OpenAI API error: ${response.status} - ${errorText}`);
}
const data = await response.json();
const processedData = JSON.parse(data.choices[0].message.content);
// Validate the response against our expected schema
validateOpenAIResponse(processedData);
return processedData;
} catch (error) {
console.error(`Error processing transcript with OpenAI:`, error);
throw error;
}
}
/**
* Validates the OpenAI response against our expected schema
* @param {Object} data The data to validate
*/
function validateOpenAIResponse(data) {
// Check required fields
const requiredFields = [
"language",
"messages_sent",
"sentiment",
"escalated",
"forwarded_hr",
"category",
"questions",
"summary",
"session_id",
];
for (const field of requiredFields) {
if (!(field in data)) {
throw new Error(`Missing required field: ${field}`);
}
}
// Validate field types
if (typeof data.language !== "string" || !/^[a-z]{2}$/.test(data.language)) {
throw new Error(
"Invalid language format. Expected ISO 639-1 code (e.g., 'en')"
);
}
if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
throw new Error("Invalid messages_sent. Expected non-negative number");
}
if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
throw new Error(
"Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
);
}
if (typeof data.escalated !== "boolean") {
throw new Error("Invalid escalated. Expected boolean");
}
if (typeof data.forwarded_hr !== "boolean") {
throw new Error("Invalid forwarded_hr. Expected boolean");
}
const validCategories = [
"Schedule & Hours",
"Leave & Vacation",
"Sick Leave & Recovery",
"Salary & Compensation",
"Contract & Hours",
"Onboarding",
"Offboarding",
"Workwear & Staff Pass",
"Team & Contacts",
"Personal Questions",
"Access & Login",
"Social questions",
"Unrecognized / Other",
];
if (!validCategories.includes(data.category)) {
throw new Error(
`Invalid category. Expected one of: ${validCategories.join(", ")}`
);
}
if (!Array.isArray(data.questions)) {
throw new Error("Invalid questions. Expected array of strings");
}
if (
typeof data.summary !== "string" ||
data.summary.length < 10 ||
data.summary.length > 300
) {
throw new Error(
"Invalid summary. Expected string between 10-300 characters"
);
}
if (typeof data.session_id !== "string") {
throw new Error("Invalid session_id. Expected string");
}
}
/**
* Main function to process unprocessed sessions
*/
async function processUnprocessedSessions() {
console.log("Starting to process unprocessed sessions...");
// Find sessions that have transcript content but haven't been processed
const sessionsToProcess = await prisma.session.findMany({
where: {
AND: [
{ transcriptContent: { not: null } },
{ transcriptContent: { not: "" } },
{ processed: { not: true } }, // Either false or null
],
},
select: {
id: true,
transcriptContent: true,
},
});
if (sessionsToProcess.length === 0) {
console.log("No sessions found requiring processing.");
return;
}
console.log(`Found ${sessionsToProcess.length} sessions to process.`);
let successCount = 0;
let errorCount = 0;
for (const session of sessionsToProcess) {
if (!session.transcriptContent) {
// Should not happen due to query, but good for type safety
console.warn(
`Session ${session.id} has no transcript content, skipping.`
);
continue;
}
console.log(`Processing transcript for session ${session.id}...`);
try {
const processedData = await processTranscriptWithOpenAI(
session.id,
session.transcriptContent
);
// Map sentiment string to float value for compatibility with existing data
const sentimentMap = {
positive: 0.8,
neutral: 0.0,
negative: -0.8,
};
// Update the session with processed data
await prisma.session.update({
where: { id: session.id },
data: {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: sentimentMap[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment,
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,
questions: JSON.stringify(processedData.questions),
summary: processedData.summary,
processed: true,
},
});
console.log(`Successfully processed session ${session.id}.`);
successCount++;
} catch (error) {
console.error(`Error processing session ${session.id}:`, error);
errorCount++;
}
}
console.log("Session processing complete.");
console.log(`Successfully processed: ${successCount} sessions.`);
console.log(`Failed to process: ${errorCount} sessions.`);
}
// Run the main function
processUnprocessedSessions()
.catch((e) => {
console.error("An error occurred during the script execution:", e);
process.exitCode = 1;
})
.finally(async () => {
await prisma.$disconnect();
});

View File

@ -18,11 +18,37 @@ interface OpenAIProcessedData {
session_id: string;
}
/**
* Fetches transcript content from a URL
*/
async function fetchTranscriptContent(
url: string,
username?: string,
password?: string
): Promise<string | null> {
try {
const authHeader =
username && password
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
: undefined;
const response = await fetch(url, {
headers: authHeader ? { Authorization: authHeader } : {},
});
if (!response.ok) {
console.warn(`Failed to fetch transcript from ${url}: ${response.statusText}`);
return null;
}
return await response.text();
} catch (error) {
console.warn(`Error fetching transcript from ${url}:`, error);
return null;
}
}
/**
* Processes a session transcript using OpenAI API
* @param sessionId The session ID
* @param transcript The transcript content to process
* @returns Processed data from OpenAI
*/
async function processTranscriptWithOpenAI(
sessionId: string,
@ -32,7 +58,6 @@ async function processTranscriptWithOpenAI(
throw new Error("OPENAI_API_KEY environment variable is not set");
}
// Create a system message with instructions
const systemMessage = `
You are an AI assistant tasked with analyzing chat transcripts.
Extract the following information from the transcript:
@ -91,7 +116,7 @@ async function processTranscriptWithOpenAI(
content: transcript,
},
],
temperature: 0.3, // Lower temperature for more consistent results
temperature: 0.3,
response_format: { type: "json_object" },
}),
});
@ -104,9 +129,7 @@ async function processTranscriptWithOpenAI(
const data = (await response.json()) as any;
const processedData = JSON.parse(data.choices[0].message.content);
// Validate the response against our expected schema
validateOpenAIResponse(processedData);
return processedData;
} catch (error) {
console.error(`Error processing transcript with OpenAI:`, error);
@ -116,22 +139,11 @@ async function processTranscriptWithOpenAI(
/**
* Validates the OpenAI response against our expected schema
* @param data The data to validate
*/
function validateOpenAIResponse(
data: any
): asserts data is OpenAIProcessedData {
// Check required fields
function validateOpenAIResponse(data: any): asserts data is OpenAIProcessedData {
const requiredFields = [
"language",
"messages_sent",
"sentiment",
"escalated",
"forwarded_hr",
"category",
"questions",
"summary",
"session_id",
"language", "messages_sent", "sentiment", "escalated",
"forwarded_hr", "category", "questions", "summary", "session_id"
];
for (const field of requiredFields) {
@ -140,11 +152,8 @@ function validateOpenAIResponse(
}
}
// Validate field types
if (typeof data.language !== "string" || !/^[a-z]{2}$/.test(data.language)) {
throw new Error(
"Invalid language format. Expected ISO 639-1 code (e.g., 'en')"
);
throw new Error("Invalid language format. Expected ISO 639-1 code (e.g., 'en')");
}
if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
@ -152,9 +161,7 @@ function validateOpenAIResponse(
}
if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
throw new Error(
"Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
);
throw new Error("Invalid sentiment. Expected 'positive', 'neutral', or 'negative'");
}
if (typeof data.escalated !== "boolean") {
@ -166,39 +173,22 @@ function validateOpenAIResponse(
}
const validCategories = [
"Schedule & Hours",
"Leave & Vacation",
"Sick Leave & Recovery",
"Salary & Compensation",
"Contract & Hours",
"Onboarding",
"Offboarding",
"Workwear & Staff Pass",
"Team & Contacts",
"Personal Questions",
"Access & Login",
"Social questions",
"Unrecognized / Other",
"Schedule & Hours", "Leave & Vacation", "Sick Leave & Recovery",
"Salary & Compensation", "Contract & Hours", "Onboarding", "Offboarding",
"Workwear & Staff Pass", "Team & Contacts", "Personal Questions",
"Access & Login", "Social questions", "Unrecognized / Other"
];
if (!validCategories.includes(data.category)) {
throw new Error(
`Invalid category. Expected one of: ${validCategories.join(", ")}`
);
throw new Error(`Invalid category. Expected one of: ${validCategories.join(", ")}`);
}
if (!Array.isArray(data.questions)) {
throw new Error("Invalid questions. Expected array of strings");
}
if (
typeof data.summary !== "string" ||
data.summary.length < 10 ||
data.summary.length > 300
) {
throw new Error(
"Invalid summary. Expected string between 10-300 characters"
);
if (typeof data.summary !== "string" || data.summary.length < 10 || data.summary.length > 300) {
throw new Error("Invalid summary. Expected string between 10-300 characters");
}
if (typeof data.session_id !== "string") {
@ -207,86 +197,146 @@ function validateOpenAIResponse(
}
/**
* Main function to process unprocessed sessions
* Main function to process SessionImport records that need processing
*/
async function processUnprocessedSessions() {
console.log("Starting to process unprocessed sessions...");
console.log("Starting to process unprocessed SessionImport records...");
// Find sessions that have transcript content but haven't been processed
const sessionsToProcess = await prisma.session.findMany({
// Find SessionImport records that are QUEUED and have transcript URLs
const importsToProcess = await prisma.sessionImport.findMany({
where: {
AND: [
{ transcriptContent: { not: null } },
{ transcriptContent: { not: "" } },
{ processed: { not: true } }, // Either false or null
],
status: "QUEUED",
fullTranscriptUrl: { not: null },
},
select: {
id: true,
transcriptContent: true,
include: {
company: true,
},
});
if (sessionsToProcess.length === 0) {
console.log("No sessions found requiring processing.");
if (importsToProcess.length === 0) {
console.log("No SessionImport records found requiring processing.");
return;
}
console.log(`Found ${sessionsToProcess.length} sessions to process.`);
console.log(`Found ${importsToProcess.length} SessionImport records to process.`);
let successCount = 0;
let errorCount = 0;
for (const session of sessionsToProcess) {
if (!session.transcriptContent) {
// Should not happen due to query, but good for type safety
console.warn(
`Session ${session.id} has no transcript content, skipping.`
);
for (const importRecord of importsToProcess) {
if (!importRecord.fullTranscriptUrl) {
console.warn(`SessionImport ${importRecord.id} has no transcript URL, skipping.`);
continue;
}
console.log(`Processing transcript for session ${session.id}...`);
console.log(`Processing transcript for SessionImport ${importRecord.id}...`);
try {
const processedData = await processTranscriptWithOpenAI(
session.id,
session.transcriptContent
// Mark as processing
await prisma.sessionImport.update({
where: { id: importRecord.id },
data: { status: "PROCESSING" },
});
// Fetch transcript content
const transcriptContent = await fetchTranscriptContent(
importRecord.fullTranscriptUrl,
importRecord.company.csvUsername || undefined,
importRecord.company.csvPassword || undefined
);
// Map sentiment string to float value for compatibility with existing data
const sentimentMap: Record<string, number> = {
positive: 0.8,
neutral: 0.0,
negative: -0.8,
};
if (!transcriptContent) {
throw new Error("Failed to fetch transcript content");
}
// Update the session with processed data
await prisma.session.update({
where: { id: session.id },
data: {
// Process with OpenAI
const processedData = await processTranscriptWithOpenAI(
importRecord.externalSessionId,
transcriptContent
);
// Parse dates from raw strings
const startTime = new Date(importRecord.startTimeRaw);
const endTime = new Date(importRecord.endTimeRaw);
// Create or update Session record
const session = await prisma.session.upsert({
where: { importId: importRecord.id },
update: {
startTime: isNaN(startTime.getTime()) ? new Date() : startTime,
endTime: isNaN(endTime.getTime()) ? new Date() : endTime,
ipAddress: importRecord.ipAddress,
country: importRecord.countryCode,
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: sentimentMap[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment,
sentiment: { positive: 0.8, neutral: 0.0, negative: -0.8 }[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment.toUpperCase() as "POSITIVE" | "NEUTRAL" | "NEGATIVE",
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
fullTranscriptUrl: importRecord.fullTranscriptUrl,
avgResponseTime: importRecord.avgResponseTimeSeconds,
tokens: importRecord.tokens,
tokensEur: importRecord.tokensEur,
category: processedData.category,
initialMsg: importRecord.initialMessage,
processed: true,
questions: JSON.stringify(processedData.questions),
summary: processedData.summary,
},
create: {
companyId: importRecord.companyId,
importId: importRecord.id,
startTime: isNaN(startTime.getTime()) ? new Date() : startTime,
endTime: isNaN(endTime.getTime()) ? new Date() : endTime,
ipAddress: importRecord.ipAddress,
country: importRecord.countryCode,
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: { positive: 0.8, neutral: 0.0, negative: -0.8 }[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment.toUpperCase() as "POSITIVE" | "NEUTRAL" | "NEGATIVE",
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
fullTranscriptUrl: importRecord.fullTranscriptUrl,
avgResponseTime: importRecord.avgResponseTimeSeconds,
tokens: importRecord.tokens,
tokensEur: importRecord.tokensEur,
category: processedData.category,
initialMsg: importRecord.initialMessage,
processed: true,
questions: JSON.stringify(processedData.questions),
summary: processedData.summary,
},
});
console.log(`Successfully processed session ${session.id}.`);
// Mark SessionImport as DONE
await prisma.sessionImport.update({
where: { id: importRecord.id },
data: {
status: "DONE",
processedAt: new Date(),
},
});
console.log(`Successfully processed SessionImport ${importRecord.id} -> Session ${session.id}`);
successCount++;
} catch (error) {
console.error(`Error processing session ${session.id}:`, error);
console.error(`Error processing SessionImport ${importRecord.id}:`, error);
// Mark as ERROR
await prisma.sessionImport.update({
where: { id: importRecord.id },
data: {
status: "ERROR",
errorMsg: error instanceof Error ? error.message : String(error),
},
});
errorCount++;
}
}
console.log("Session processing complete.");
console.log(`Successfully processed: ${successCount} sessions.`);
console.log(`Failed to process: ${errorCount} sessions.`);
console.log("SessionImport processing complete.");
console.log(`Successfully processed: ${successCount} records.`);
console.log(`Failed to process: ${errorCount} records.`);
}
// Run the main function

View File

@ -1,75 +0,0 @@
// Script to check processing status and trigger processing
// Usage: node scripts/test-processing-status.js
import { PrismaClient } from '@prisma/client';
const prisma = new PrismaClient();
async function checkProcessingStatus() {
try {
console.log('🔍 Checking processing status...\n');
// Get processing status
const totalSessions = await prisma.session.count();
const processedSessions = await prisma.session.count({
where: { processed: true }
});
const unprocessedSessions = await prisma.session.count({
where: { processed: false }
});
const sessionsWithMessages = await prisma.session.count({
where: {
processed: false,
messages: { some: {} }
}
});
console.log('📊 Processing Status:');
console.log(` Total sessions: ${totalSessions}`);
console.log(` ✅ Processed: ${processedSessions}`);
console.log(` ⏳ Unprocessed: ${unprocessedSessions}`);
console.log(` 📝 Unprocessed with messages: ${sessionsWithMessages}`);
const processedPercentage = ((processedSessions / totalSessions) * 100).toFixed(1);
console.log(` 📈 Processing progress: ${processedPercentage}%\n`);
// Check recent processing activity
const recentlyProcessed = await prisma.session.findMany({
where: {
processed: true,
createdAt: {
gte: new Date(Date.now() - 60 * 60 * 1000) // Last hour
}
},
orderBy: { createdAt: 'desc' },
take: 5,
select: {
id: true,
createdAt: true,
category: true,
sentiment: true
}
});
if (recentlyProcessed.length > 0) {
console.log('🕒 Recently processed sessions:');
recentlyProcessed.forEach(session => {
const timeAgo = Math.round((Date.now() - session.createdAt.getTime()) / 1000 / 60);
console.log(`${session.id.substring(0, 8)}... (${timeAgo}m ago) - ${session.category || 'No category'}`);
});
} else {
console.log('🕒 No sessions processed in the last hour');
}
console.log('\n✨ Processing system is working correctly!');
console.log('💡 The parallel processing successfully processed sessions.');
console.log('🎯 For manual triggers, you need to be logged in as an admin user.');
} catch (error) {
console.error('❌ Error checking status:', error);
} finally {
await prisma.$disconnect();
}
}
checkProcessingStatus();

View File

@ -1,20 +0,0 @@
// Direct trigger for processing scheduler (bypasses authentication)
// Usage: node scripts/trigger-processing-direct.js
import { processUnprocessedSessions } from '../lib/processingScheduler.js';
async function triggerProcessing() {
try {
console.log('🚀 Manually triggering processing scheduler...\n');
// Process with custom parameters
await processUnprocessedSessions(50, 3); // Process 50 sessions with 3 concurrent workers
console.log('\n✅ Processing trigger completed!');
} catch (error) {
console.error('❌ Error triggering processing:', error);
}
}
triggerProcessing();

View File

@ -1,39 +0,0 @@
// Custom Next.js server with scheduler initialization
const { createServer } = require("http");
const { parse } = require("url");
const next = require("next");
const { startScheduler } = require("./lib/scheduler");
const { startProcessingScheduler } = require("./lib/processingScheduler");
const dev = process.env.NODE_ENV !== "production";
const hostname = "localhost";
const port = process.env.PORT || 3000;
// Initialize Next.js
const app = next({ dev, hostname, port });
const handle = app.getRequestHandler();
app.prepare().then(() => {
// Initialize schedulers when the server starts
console.log("Starting schedulers...");
startScheduler();
startProcessingScheduler();
console.log("All schedulers initialized successfully");
createServer(async (req, res) => {
try {
// Parse the URL
const parsedUrl = parse(req.url, true);
// Let Next.js handle the request
await handle(req, res, parsedUrl);
} catch (err) {
console.error("Error occurred handling", req.url, err);
res.statusCode = 500;
res.end("Internal Server Error");
}
}).listen(port, (err) => {
if (err) throw err;
console.log(`> Ready on http://${hostname}:${port}`);
});
});

View File

@ -1,56 +0,0 @@
// Custom Next.js server with scheduler initialization
import { createServer } from "http";
import { parse } from "url";
import next from "next";
// We'll need to dynamically import these after they're compiled
let startScheduler;
let startProcessingScheduler;
const dev = process.env.NODE_ENV !== "production";
const hostname = "localhost";
const port = parseInt(process.env.PORT || "3000", 10);
// Initialize Next.js
const app = next({ dev, hostname, port });
const handle = app.getRequestHandler();
async function init() {
try {
// Dynamically import the schedulers
const scheduler = await import("./lib/scheduler.js");
const processingScheduler = await import("./lib/processingScheduler.js");
startScheduler = scheduler.startScheduler;
startProcessingScheduler = processingScheduler.startProcessingScheduler;
app.prepare().then(() => {
// Initialize schedulers when the server starts
console.log("Starting schedulers...");
startScheduler();
startProcessingScheduler();
console.log("All schedulers initialized successfully");
createServer(async (req, res) => {
try {
// Parse the URL
const parsedUrl = parse(req.url || "", true);
// Let Next.js handle the request
await handle(req, res, parsedUrl);
} catch (err) {
console.error("Error occurred handling", req.url, err);
res.statusCode = 500;
res.end("Internal Server Error");
}
}).listen(port, () => {
console.log(`> Ready on http://${hostname}:${port}`);
});
});
} catch (error) {
console.error("Failed to initialize server:", error);
process.exit(1);
}
}
init();