Refactor transcript fetching and processing scripts

- Introduced a new function `fetchTranscriptContent` to handle fetching transcripts with optional authentication.
- Enhanced error handling and logging for transcript fetching.
- Updated the `parseTranscriptToMessages` function to improve message parsing logic.
- Replaced the old session processing logic with a new approach that utilizes `SessionImport` records.
- Removed obsolete scripts related to manual triggers and whitespace fixing.
- Updated the server initialization to remove direct server handling, transitioning to a more modular approach.
- Improved overall code structure and readability across various scripts.
This commit is contained in:
Max Kowalski
2025-06-27 16:38:16 +02:00
parent d7ac0ba208
commit 1dd618b666
35 changed files with 6536 additions and 12797 deletions

View File

@ -1,636 +0,0 @@
// JavaScript version of csvFetcher with session storage functionality
import fetch from "node-fetch";
import { parse } from "csv-parse/sync";
import ISO6391 from "iso-639-1";
import countries from "i18n-iso-countries";
import { PrismaClient } from "@prisma/client";
// Register locales for i18n-iso-countries
import enLocale from "i18n-iso-countries/langs/en.json" with { type: "json" };
countries.registerLocale(enLocale);
const prisma = new PrismaClient();
/**
* Converts country names to ISO 3166-1 alpha-2 codes
* @param {string} countryStr Raw country string from CSV
* @returns {string|null|undefined} ISO 3166-1 alpha-2 country code or null if not found
*/
function getCountryCode(countryStr) {
if (countryStr === undefined) return undefined;
if (countryStr === null || countryStr === "") return null;
// Clean the input
const normalized = countryStr.trim();
if (!normalized) return null;
// Direct ISO code check (if already a 2-letter code)
if (normalized.length === 2 && normalized === normalized.toUpperCase()) {
return countries.isValid(normalized) ? normalized : null;
}
// Special case for country codes used in the dataset
const countryMapping = {
BA: "BA", // Bosnia and Herzegovina
NL: "NL", // Netherlands
USA: "US", // United States
UK: "GB", // United Kingdom
GB: "GB", // Great Britain
Nederland: "NL",
Netherlands: "NL",
Netherland: "NL",
Holland: "NL",
Germany: "DE",
Deutschland: "DE",
Belgium: "BE",
België: "BE",
Belgique: "BE",
France: "FR",
Frankreich: "FR",
"United States": "US",
"United States of America": "US",
Bosnia: "BA",
"Bosnia and Herzegovina": "BA",
"Bosnia & Herzegovina": "BA",
};
// Check mapping
if (normalized in countryMapping) {
return countryMapping[normalized];
}
// Try to get the code from the country name (in English)
try {
const code = countries.getAlpha2Code(normalized, "en");
if (code) return code;
} catch (error) {
process.stderr.write(
`[CSV] Error converting country name to code: ${normalized} - ${error}\n`
);
}
// If all else fails, return null
return null;
}
/**
* Converts language names to ISO 639-1 codes
* @param {string} languageStr Raw language string from CSV
* @returns {string|null|undefined} ISO 639-1 language code or null if not found
*/
function getLanguageCode(languageStr) {
if (languageStr === undefined) return undefined;
if (languageStr === null || languageStr === "") return null;
// Clean the input
const normalized = languageStr.trim();
if (!normalized) return null;
// Direct ISO code check (if already a 2-letter code)
if (normalized.length === 2 && normalized === normalized.toLowerCase()) {
return ISO6391.validate(normalized) ? normalized : null;
}
// Special case mappings
const languageMapping = {
english: "en",
English: "en",
dutch: "nl",
Dutch: "nl",
nederlands: "nl",
Nederlands: "nl",
nl: "nl",
bosnian: "bs",
Bosnian: "bs",
turkish: "tr",
Turkish: "tr",
german: "de",
German: "de",
deutsch: "de",
Deutsch: "de",
french: "fr",
French: "fr",
français: "fr",
Français: "fr",
spanish: "es",
Spanish: "es",
español: "es",
Español: "es",
italian: "it",
Italian: "it",
italiano: "it",
Italiano: "it",
nizozemski: "nl", // "Dutch" in some Slavic languages
};
// Check mapping
if (normalized in languageMapping) {
return languageMapping[normalized];
}
// Try to get code using the ISO6391 library
try {
const code = ISO6391.getCode(normalized);
if (code) return code;
} catch (error) {
process.stderr.write(
`[CSV] Error converting language name to code: ${normalized} - ${error}\n`
);
}
// If all else fails, return null
return null;
}
/**
* Normalizes category values to standard groups
* @param {string} categoryStr The raw category string from CSV
* @returns {string|null} A normalized category string
*/
function normalizeCategory(categoryStr) {
if (!categoryStr) return null;
const normalized = categoryStr.toLowerCase().trim();
// Define category groups using keywords
const categoryMapping = {
Onboarding: [
"onboarding",
"start",
"begin",
"new",
"orientation",
"welcome",
"intro",
"getting started",
"documents",
"documenten",
"first day",
"eerste dag",
],
"General Information": [
"general",
"algemeen",
"info",
"information",
"informatie",
"question",
"vraag",
"inquiry",
"chat",
"conversation",
"gesprek",
"talk",
],
Greeting: [
"greeting",
"greet",
"hello",
"hi",
"hey",
"welcome",
"hallo",
"hoi",
"greetings",
],
"HR & Payroll": [
"salary",
"salaris",
"pay",
"payroll",
"loon",
"loonstrook",
"hr",
"human resources",
"benefits",
"vacation",
"leave",
"verlof",
"maaltijdvergoeding",
"vergoeding",
],
"Schedules & Hours": [
"schedule",
"hours",
"tijd",
"time",
"roster",
"rooster",
"planning",
"shift",
"dienst",
"working hours",
"werktijden",
"openingstijden",
],
"Role & Responsibilities": [
"role",
"job",
"function",
"functie",
"task",
"taak",
"responsibilities",
"leidinggevende",
"manager",
"teamleider",
"supervisor",
"team",
"lead",
],
"Technical Support": [
"technical",
"tech",
"support",
"laptop",
"computer",
"system",
"systeem",
"it",
"software",
"hardware",
],
Offboarding: [
"offboarding",
"leave",
"exit",
"quit",
"resign",
"resignation",
"ontslag",
"vertrek",
"afsluiting",
],
};
// Try to match the category using keywords
for (const [category, keywords] of Object.entries(categoryMapping)) {
if (keywords.some((keyword) => normalized.includes(keyword))) {
return category;
}
}
// If no match, return "Other"
return "Other";
}
/**
* Converts sentiment string values to numeric scores
* @param {string} sentimentStr The sentiment string from the CSV
* @returns {number|null} A numeric score representing the sentiment
*/
function mapSentimentToScore(sentimentStr) {
if (!sentimentStr) return null;
// Convert to lowercase for case-insensitive matching
const sentiment = sentimentStr.toLowerCase();
// Map sentiment strings to numeric values on a scale from -1 to 2
const sentimentMap = {
happy: 1.0,
excited: 1.5,
positive: 0.8,
neutral: 0.0,
playful: 0.7,
negative: -0.8,
angry: -1.0,
sad: -0.7,
frustrated: -0.9,
positief: 0.8, // Dutch
neutraal: 0.0, // Dutch
negatief: -0.8, // Dutch
positivo: 0.8, // Spanish/Italian
neutro: 0.0, // Spanish/Italian
negativo: -0.8, // Spanish/Italian
yes: 0.5, // For any "yes" sentiment
no: -0.5, // For any "no" sentiment
};
return sentimentMap[sentiment] !== undefined
? sentimentMap[sentiment]
: isNaN(parseFloat(sentiment))
? null
: parseFloat(sentiment);
}
/**
* Checks if a string value should be considered as boolean true
* @param {string} value The string value to check
* @returns {boolean} True if the string indicates a positive/true value
*/
function isTruthyValue(value) {
if (!value) return false;
const truthyValues = [
"1",
"true",
"yes",
"y",
"ja",
"si",
"oui",
"да",
"да",
"はい",
];
return truthyValues.includes(value.toLowerCase());
}
/**
* Safely parses a date string into a Date object.
* @param {string} dateStr The date string to parse.
* @returns {Date|null} A Date object or null if parsing fails.
*/
function safeParseDate(dateStr) {
if (!dateStr) return null;
// Try to parse D-M-YYYY HH:MM:SS format (with hyphens or dots)
const dateTimeRegex =
/^(\d{1,2})[.-](\d{1,2})[.-](\d{4}) (\d{1,2}):(\d{1,2}):(\d{1,2})$/;
const match = dateStr.match(dateTimeRegex);
if (match) {
const day = match[1];
const month = match[2];
const year = match[3];
const hour = match[4];
const minute = match[5];
const second = match[6];
// Reformat to YYYY-MM-DDTHH:MM:SS (ISO-like, but local time)
// Ensure month and day are two digits
const formattedDateStr = `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}T${hour.padStart(2, "0")}:${minute.padStart(2, "0")}:${second.padStart(2, "0")}`;
try {
const date = new Date(formattedDateStr);
// Basic validation: check if the constructed date is valid
if (!isNaN(date.getTime())) {
return date;
}
} catch (e) {
console.warn(
`[safeParseDate] Error parsing reformatted string ${formattedDateStr} from ${dateStr}:`,
e
);
}
}
// Fallback for other potential formats (e.g., direct ISO 8601) or if the primary parse failed
try {
const parsedDate = new Date(dateStr);
if (!isNaN(parsedDate.getTime())) {
return parsedDate;
}
} catch (e) {
console.warn(`[safeParseDate] Error parsing with fallback ${dateStr}:`, e);
}
console.warn(`Failed to parse date string: ${dateStr}`);
return null;
}
/**
* Fetches transcript content from a URL
* @param {string} url The URL to fetch the transcript from
* @param {string} username Optional username for authentication
* @param {string} password Optional password for authentication
* @returns {Promise<string|null>} The transcript content or null if fetching fails
*/
async function fetchTranscriptContent(url, username, password) {
try {
const authHeader =
username && password
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
: undefined;
const response = await fetch(url, {
headers: authHeader ? { Authorization: authHeader } : {},
timeout: 10000, // 10 second timeout
});
if (!response.ok) {
// Only log error once per batch, not for every transcript
if (Math.random() < 0.1) {
// Log ~10% of errors to avoid spam
console.warn(
`[CSV] Transcript fetch failed for ${url}: ${response.status} ${response.statusText}`
);
}
return null;
}
return await response.text();
} catch (error) {
// Only log error once per batch, not for every transcript
if (Math.random() < 0.1) {
// Log ~10% of errors to avoid spam
console.warn(`[CSV] Transcript fetch error for ${url}:`, error.message);
}
return null;
}
}
/**
* Fetches and parses CSV data from a URL
* @param {string} url The CSV URL
* @param {string} username Optional username for authentication
* @param {string} password Optional password for authentication
* @returns {Promise<Object[]>} Array of parsed session data
*/
export async function fetchAndParseCsv(url, username, password) {
const authHeader =
username && password
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
: undefined;
const res = await fetch(url, {
headers: authHeader ? { Authorization: authHeader } : {},
});
if (!res.ok) throw new Error("Failed to fetch CSV: " + res.statusText);
const text = await res.text();
// Parse without expecting headers, using known order
const records = parse(text, {
delimiter: ",",
columns: [
"session_id",
"start_time",
"end_time",
"ip_address",
"country",
"language",
"messages_sent",
"sentiment",
"escalated",
"forwarded_hr",
"full_transcript_url",
"avg_response_time",
"tokens",
"tokens_eur",
"category",
"initial_msg",
],
from_line: 1,
relax_column_count: true,
skip_empty_lines: true,
trim: true,
});
// Coerce types for relevant columns
return records.map((r) => ({
id: r.session_id,
startTime: safeParseDate(r.start_time) || new Date(), // Fallback to current date if invalid
endTime: safeParseDate(r.end_time),
ipAddress: r.ip_address,
country: getCountryCode(r.country),
language: getLanguageCode(r.language),
messagesSent: Number(r.messages_sent) || 0,
sentiment: mapSentimentToScore(r.sentiment),
escalated: isTruthyValue(r.escalated),
forwardedHr: isTruthyValue(r.forwarded_hr),
fullTranscriptUrl: r.full_transcript_url,
avgResponseTime: r.avg_response_time
? parseFloat(r.avg_response_time)
: null,
tokens: Number(r.tokens) || 0,
tokensEur: r.tokens_eur ? parseFloat(r.tokens_eur) : 0,
category: normalizeCategory(r.category),
initialMsg: r.initial_msg,
}));
}
/**
* Fetches and stores sessions for all companies
*/
export async function fetchAndStoreSessionsForAllCompanies() {
try {
// Get all companies
const companies = await prisma.company.findMany();
for (const company of companies) {
if (!company.csvUrl) {
console.log(
`[Scheduler] Skipping company ${company.id} - no CSV URL configured`
);
continue;
}
// Skip companies with invalid/example URLs
if (
company.csvUrl.includes("example.com") ||
company.csvUrl === "https://example.com/data.csv"
) {
console.log(
`[Scheduler] Skipping company ${company.id} - invalid/example CSV URL: ${company.csvUrl}`
);
continue;
}
console.log(`[Scheduler] Processing sessions for company: ${company.id}`);
try {
const sessions = await fetchAndParseCsv(
company.csvUrl,
company.csvUsername,
company.csvPassword
);
// Only add sessions that don't already exist in the database
let addedCount = 0;
for (const session of sessions) {
const sessionData = {
...session,
companyId: company.id,
id:
session.id ||
session.sessionId ||
`sess_${Date.now()}_${Math.random().toString(36).substring(2, 7)}`,
// Ensure startTime is not undefined
startTime: session.startTime || new Date(),
};
// Validate dates to prevent "Invalid Date" errors
const startTime =
sessionData.startTime instanceof Date &&
!isNaN(sessionData.startTime.getTime())
? sessionData.startTime
: new Date();
const endTime =
session.endTime instanceof Date && !isNaN(session.endTime.getTime())
? session.endTime
: new Date();
// Note: transcriptContent field was removed from schema
// Transcript content can be fetched on-demand from fullTranscriptUrl
// Check if the session already exists
const existingSession = await prisma.session.findUnique({
where: { id: sessionData.id },
});
if (existingSession) {
// Skip this session as it already exists
continue;
}
// Only include fields that are properly typed for Prisma
await prisma.session.create({
data: {
id: sessionData.id,
companyId: sessionData.companyId,
startTime: startTime,
endTime: endTime,
ipAddress: session.ipAddress || null,
country: session.country || null,
language: session.language || null,
messagesSent:
typeof session.messagesSent === "number"
? session.messagesSent
: 0,
sentiment:
typeof session.sentiment === "number"
? session.sentiment
: null,
escalated:
typeof session.escalated === "boolean"
? session.escalated
: null,
forwardedHr:
typeof session.forwardedHr === "boolean"
? session.forwardedHr
: null,
fullTranscriptUrl: session.fullTranscriptUrl || null,
avgResponseTime:
typeof session.avgResponseTime === "number"
? session.avgResponseTime
: null,
tokens:
typeof session.tokens === "number" ? session.tokens : null,
tokensEur:
typeof session.tokensEur === "number"
? session.tokensEur
: null,
category: session.category || null,
initialMsg: session.initialMsg || null,
},
});
addedCount++;
}
console.log(
`[Scheduler] Added ${addedCount} new sessions for company ${company.id}`
);
} catch (error) {
console.error(
`[Scheduler] Error processing company ${company.id}:`,
error
);
}
}
} catch (error) {
console.error("[Scheduler] Error fetching companies:", error);
throw error;
}
}

View File

@ -1,440 +1,41 @@
// Fetches, parses, and returns chat session data for a company from a CSV URL
// Simplified CSV fetcher - fetches and parses CSV data without any processing
// Maps directly to SessionImport table fields
import fetch from "node-fetch";
import { parse } from "csv-parse/sync";
import ISO6391 from "iso-639-1";
import countries from "i18n-iso-countries";
// Register locales for i18n-iso-countries
import enLocale from "i18n-iso-countries/langs/en.json" with { type: "json" };
countries.registerLocale(enLocale);
// This type is used internally for parsing the CSV records
interface CSVRecord {
session_id: string;
start_time: string;
end_time?: string;
ip_address?: string;
country?: string;
language?: string;
messages_sent?: string;
sentiment?: string;
escalated?: string;
forwarded_hr?: string;
full_transcript_url?: string;
avg_response_time?: string;
tokens?: string;
tokens_eur?: string;
category?: string;
initial_msg?: string;
[key: string]: string | undefined;
}
interface SessionData {
id: string;
sessionId: string;
startTime: Date;
endTime: Date | null;
ipAddress?: string;
country?: string | null; // Will store ISO 3166-1 alpha-2 country code or null/undefined
language?: string | null; // Will store ISO 639-1 language code or null/undefined
messagesSent: number;
sentiment: number | null;
escalated: boolean;
forwardedHr: boolean;
fullTranscriptUrl?: string | null;
avgResponseTime: number | null;
tokens: number;
tokensEur: number;
category?: string | null;
initialMsg?: string;
// Raw CSV data interface matching SessionImport schema
interface RawSessionImport {
externalSessionId: string;
startTimeRaw: string;
endTimeRaw: string;
ipAddress: string | null;
countryCode: string | null;
language: string | null;
messagesSent: number | null;
sentimentRaw: string | null;
escalatedRaw: string | null;
forwardedHrRaw: string | null;
fullTranscriptUrl: string | null;
avgResponseTimeSeconds: number | null;
tokens: number | null;
tokensEur: number | null;
category: string | null;
initialMessage: string | null;
}
/**
* Converts country names to ISO 3166-1 alpha-2 codes
* @param countryStr Raw country string from CSV
* @returns ISO 3166-1 alpha-2 country code or null if not found
* Fetches and parses CSV data from a URL without any processing
* Maps CSV columns by position to SessionImport fields
* @param url The CSV URL
* @param username Optional username for authentication
* @param password Optional password for authentication
* @returns Array of raw session import data
*/
function getCountryCode(countryStr?: string): string | null | undefined {
if (countryStr === undefined) return undefined;
if (countryStr === null || countryStr === "") return null;
// Clean the input
const normalized = countryStr.trim();
if (!normalized) return null;
// Direct ISO code check (if already a 2-letter code)
if (normalized.length === 2 && normalized === normalized.toUpperCase()) {
return countries.isValid(normalized) ? normalized : null;
}
// Special case for country codes used in the dataset
const countryMapping: Record<string, string> = {
BA: "BA", // Bosnia and Herzegovina
NL: "NL", // Netherlands
USA: "US", // United States
UK: "GB", // United Kingdom
GB: "GB", // Great Britain
Nederland: "NL",
Netherlands: "NL",
Netherland: "NL",
Holland: "NL",
Germany: "DE",
Deutschland: "DE",
Belgium: "BE",
België: "BE",
Belgique: "BE",
France: "FR",
Frankreich: "FR",
"United States": "US",
"United States of America": "US",
Bosnia: "BA",
"Bosnia and Herzegovina": "BA",
"Bosnia & Herzegovina": "BA",
};
// Check mapping
if (normalized in countryMapping) {
return countryMapping[normalized];
}
// Try to get the code from the country name (in English)
try {
const code = countries.getAlpha2Code(normalized, "en");
if (code) return code;
} catch (error) {
process.stderr.write(
`[CSV] Error converting country name to code: ${normalized} - ${error}\n`
);
}
// If all else fails, return null
return null;
}
/**
* Converts language names to ISO 639-1 codes
* @param languageStr Raw language string from CSV
* @returns ISO 639-1 language code or null if not found
*/
function getLanguageCode(languageStr?: string): string | null | undefined {
if (languageStr === undefined) return undefined;
if (languageStr === null || languageStr === "") return null;
// Clean the input
const normalized = languageStr.trim();
if (!normalized) return null;
// Direct ISO code check (if already a 2-letter code)
if (normalized.length === 2 && normalized === normalized.toLowerCase()) {
return ISO6391.validate(normalized) ? normalized : null;
}
// Special case mappings
const languageMapping: Record<string, string> = {
english: "en",
English: "en",
dutch: "nl",
Dutch: "nl",
nederlands: "nl",
Nederlands: "nl",
nl: "nl",
bosnian: "bs",
Bosnian: "bs",
turkish: "tr",
Turkish: "tr",
german: "de",
German: "de",
deutsch: "de",
Deutsch: "de",
french: "fr",
French: "fr",
français: "fr",
Français: "fr",
spanish: "es",
Spanish: "es",
español: "es",
Español: "es",
italian: "it",
Italian: "it",
italiano: "it",
Italiano: "it",
nizozemski: "nl", // "Dutch" in some Slavic languages
};
// Check mapping
if (normalized in languageMapping) {
return languageMapping[normalized];
}
// Try to get code using the ISO6391 library
try {
const code = ISO6391.getCode(normalized);
if (code) return code;
} catch (error) {
process.stderr.write(
`[CSV] Error converting language name to code: ${normalized} - ${error}\n`
);
}
// If all else fails, return null
return null;
}
/**
* Normalizes category values to standard groups
* @param categoryStr The raw category string from CSV
* @returns A normalized category string
*/
function normalizeCategory(categoryStr?: string): string | null {
if (!categoryStr) return null;
const normalized = categoryStr.toLowerCase().trim();
// Define category groups using keywords
const categoryMapping: Record<string, string[]> = {
Onboarding: [
"onboarding",
"start",
"begin",
"new",
"orientation",
"welcome",
"intro",
"getting started",
"documents",
"documenten",
"first day",
"eerste dag",
],
"General Information": [
"general",
"algemeen",
"info",
"information",
"informatie",
"question",
"vraag",
"inquiry",
"chat",
"conversation",
"gesprek",
"talk",
],
Greeting: [
"greeting",
"greet",
"hello",
"hi",
"hey",
"welcome",
"hallo",
"hoi",
"greetings",
],
"HR & Payroll": [
"salary",
"salaris",
"pay",
"payroll",
"loon",
"loonstrook",
"hr",
"human resources",
"benefits",
"vacation",
"leave",
"verlof",
"maaltijdvergoeding",
"vergoeding",
],
"Schedules & Hours": [
"schedule",
"hours",
"tijd",
"time",
"roster",
"rooster",
"planning",
"shift",
"dienst",
"working hours",
"werktijden",
"openingstijden",
],
"Role & Responsibilities": [
"role",
"job",
"function",
"functie",
"task",
"taak",
"responsibilities",
"leidinggevende",
"manager",
"teamleider",
"supervisor",
"team",
"lead",
],
"Technical Support": [
"technical",
"tech",
"support",
"laptop",
"computer",
"system",
"systeem",
"it",
"software",
"hardware",
],
Offboarding: [
"offboarding",
"leave",
"exit",
"quit",
"resign",
"resignation",
"ontslag",
"vertrek",
"afsluiting",
],
};
// Try to match the category using keywords
for (const [category, keywords] of Object.entries(categoryMapping)) {
if (keywords.some((keyword) => normalized.includes(keyword))) {
return category;
}
}
// If no match, return "Other"
return "Other";
}
/**
* Converts sentiment string values to numeric scores
* @param sentimentStr The sentiment string from the CSV
* @returns A numeric score representing the sentiment
*/
function mapSentimentToScore(sentimentStr?: string): number | null {
if (!sentimentStr) return null;
// Convert to lowercase for case-insensitive matching
const sentiment = sentimentStr.toLowerCase();
// Map sentiment strings to numeric values on a scale from -1 to 2
const sentimentMap: Record<string, number> = {
happy: 1.0,
excited: 1.5,
positive: 0.8,
neutral: 0.0,
playful: 0.7,
negative: -0.8,
angry: -1.0,
sad: -0.7,
frustrated: -0.9,
positief: 0.8, // Dutch
neutraal: 0.0, // Dutch
negatief: -0.8, // Dutch
positivo: 0.8, // Spanish/Italian
neutro: 0.0, // Spanish/Italian
negativo: -0.8, // Spanish/Italian
yes: 0.5, // For any "yes" sentiment
no: -0.5, // For any "no" sentiment
};
return sentimentMap[sentiment] !== undefined
? sentimentMap[sentiment]
: isNaN(parseFloat(sentiment))
? null
: parseFloat(sentiment);
}
/**
* Checks if a string value should be considered as boolean true
* @param value The string value to check
* @returns True if the string indicates a positive/true value
*/
function isTruthyValue(value?: string): boolean {
if (!value) return false;
const truthyValues = [
"1",
"true",
"yes",
"y",
"ja",
"si",
"oui",
"да",
"да",
"はい",
];
return truthyValues.includes(value.toLowerCase());
}
/**
* Safely parses a date string into a Date object.
* Handles potential errors and various formats, prioritizing D-M-YYYY HH:MM:SS.
* @param dateStr The date string to parse.
* @returns A Date object or null if parsing fails.
*/
function safeParseDate(dateStr?: string): Date | null {
if (!dateStr) return null;
// Try to parse D-M-YYYY HH:MM:SS format (with hyphens or dots)
const dateTimeRegex =
/^(\d{1,2})[.-](\d{1,2})[.-](\d{4}) (\d{1,2}):(\d{1,2}):(\d{1,2})$/;
const match = dateStr.match(dateTimeRegex);
if (match) {
const day = match[1];
const month = match[2];
const year = match[3];
const hour = match[4];
const minute = match[5];
const second = match[6];
// Reformat to YYYY-MM-DDTHH:MM:SS (ISO-like, but local time)
// Ensure month and day are two digits
const formattedDateStr = `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}T${hour.padStart(2, "0")}:${minute.padStart(2, "0")}:${second.padStart(2, "0")}`;
try {
const date = new Date(formattedDateStr);
// Basic validation: check if the constructed date is valid
if (!isNaN(date.getTime())) {
// console.log(`[safeParseDate] Parsed from D-M-YYYY: ${dateStr} -> ${formattedDateStr} -> ${date.toISOString()}`);
return date;
}
} catch (e) {
console.warn(
`[safeParseDate] Error parsing reformatted string ${formattedDateStr} from ${dateStr}:`,
e
);
}
}
// Fallback for other potential formats (e.g., direct ISO 8601) or if the primary parse failed
try {
const parsedDate = new Date(dateStr);
if (!isNaN(parsedDate.getTime())) {
// console.log(`[safeParseDate] Parsed with fallback: ${dateStr} -> ${parsedDate.toISOString()}`);
return parsedDate;
}
} catch (e) {
console.warn(`[safeParseDate] Error parsing with fallback ${dateStr}:`, e);
}
console.warn(`Failed to parse date string: ${dateStr}`);
return null;
}
export async function fetchAndParseCsv(
url: string,
username?: string,
password?: string
): Promise<Partial<SessionData>[]> {
): Promise<RawSessionImport[]> {
const authHeader =
username && password
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
@ -443,56 +44,39 @@ export async function fetchAndParseCsv(
const res = await fetch(url, {
headers: authHeader ? { Authorization: authHeader } : {},
});
if (!res.ok) throw new Error("Failed to fetch CSV: " + res.statusText);
if (!res.ok) {
throw new Error(`Failed to fetch CSV: ${res.status} ${res.statusText}`);
}
const text = await res.text();
// Parse without expecting headers, using known order
const records: CSVRecord[] = parse(text, {
// Parse CSV without headers, using positional column mapping
const records: string[][] = parse(text, {
delimiter: ",",
columns: [
"session_id",
"start_time",
"end_time",
"ip_address",
"country",
"language",
"messages_sent",
"sentiment",
"escalated",
"forwarded_hr",
"full_transcript_url",
"avg_response_time",
"tokens",
"tokens_eur",
"category",
"initial_msg",
],
from_line: 1,
from_line: 1, // Start from first line (no headers)
relax_column_count: true,
skip_empty_lines: true,
trim: true,
});
// Coerce types for relevant columns
return records.map((r) => ({
id: r.session_id,
startTime: safeParseDate(r.start_time) || new Date(), // Fallback to current date if invalid
endTime: safeParseDate(r.end_time),
ipAddress: r.ip_address,
country: getCountryCode(r.country),
language: getLanguageCode(r.language),
messagesSent: Number(r.messages_sent) || 0,
sentiment: mapSentimentToScore(r.sentiment),
escalated: isTruthyValue(r.escalated),
forwardedHr: isTruthyValue(r.forwarded_hr),
fullTranscriptUrl: r.full_transcript_url,
avgResponseTime: r.avg_response_time
? parseFloat(r.avg_response_time)
: null,
tokens: Number(r.tokens) || 0,
tokensEur: r.tokens_eur ? parseFloat(r.tokens_eur) : 0,
category: normalizeCategory(r.category),
initialMsg: r.initial_msg,
// Map CSV columns by position to SessionImport fields
return records.map((row) => ({
externalSessionId: row[0] || "",
startTimeRaw: row[1] || "",
endTimeRaw: row[2] || "",
ipAddress: row[3] || null,
countryCode: row[4] || null,
language: row[5] || null,
messagesSent: row[6] ? parseInt(row[6], 10) || null : null,
sentimentRaw: row[7] || null,
escalatedRaw: row[8] || null,
forwardedHrRaw: row[9] || null,
fullTranscriptUrl: row[10] || null,
avgResponseTimeSeconds: row[11] ? parseFloat(row[11]) || null : null,
tokens: row[12] ? parseInt(row[12], 10) || null : null,
tokensEur: row[13] ? parseFloat(row[13]) || null : null,
category: row[14] || null,
initialMessage: row[15] || null,
}));
}

View File

@ -1,412 +0,0 @@
// Session processing scheduler - JavaScript version
import cron from "node-cron";
import { PrismaClient } from "@prisma/client";
import fetch from "node-fetch";
import { readFileSync } from "fs";
import { fileURLToPath } from "url";
import { dirname, join } from "path";
// Load environment variables from .env.local
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const envPath = join(__dirname, '..', '.env.local');
try {
const envFile = readFileSync(envPath, 'utf8');
const envVars = envFile.split('\n').filter(line => line.trim() && !line.startsWith('#'));
envVars.forEach(line => {
const [key, ...valueParts] = line.split('=');
if (key && valueParts.length > 0) {
const value = valueParts.join('=').trim();
if (!process.env[key.trim()]) {
process.env[key.trim()] = value;
}
}
});
} catch (error) {
// Silently fail if .env.local doesn't exist
}
const prisma = new PrismaClient();
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";
/**
* Processes a session transcript using OpenAI API
* @param {string} sessionId The session ID
* @param {string} transcript The transcript content to process
* @returns {Promise<Object>} Processed data from OpenAI
*/
async function processTranscriptWithOpenAI(sessionId, transcript) {
if (!OPENAI_API_KEY) {
throw new Error("OPENAI_API_KEY environment variable is not set");
}
// Create a system message with instructions
const systemMessage = `
You are an AI assistant tasked with analyzing chat transcripts.
Extract the following information from the transcript:
1. The primary language used by the user (ISO 639-1 code)
2. Number of messages sent by the user
3. Overall sentiment (positive, neutral, or negative)
4. Whether the conversation was escalated
5. Whether HR contact was mentioned or provided
6. The best-fitting category for the conversation from this list:
- Schedule & Hours
- Leave & Vacation
- Sick Leave & Recovery
- Salary & Compensation
- Contract & Hours
- Onboarding
- Offboarding
- Workwear & Staff Pass
- Team & Contacts
- Personal Questions
- Access & Login
- Social questions
- Unrecognized / Other
7. Up to 5 paraphrased questions asked by the user (in English)
8. A brief summary of the conversation (10-300 characters)
Return the data in JSON format matching this schema:
{
"language": "ISO 639-1 code",
"messages_sent": number,
"sentiment": "positive|neutral|negative",
"escalated": boolean,
"forwarded_hr": boolean,
"category": "one of the categories listed above",
"questions": ["question 1", "question 2", ...],
"summary": "brief summary",
"session_id": "${sessionId}"
}
`;
try {
const response = await fetch(OPENAI_API_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: "gpt-4-turbo",
messages: [
{
role: "system",
content: systemMessage,
},
{
role: "user",
content: transcript,
},
],
temperature: 0.3, // Lower temperature for more consistent results
response_format: { type: "json_object" },
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`OpenAI API error: ${response.status} - ${errorText}`);
}
const data = await response.json();
const processedData = JSON.parse(data.choices[0].message.content);
// Validate the response against our expected schema
validateOpenAIResponse(processedData);
return processedData;
} catch (error) {
process.stderr.write(`Error processing transcript with OpenAI: ${error}\n`);
throw error;
}
}
/**
* Validates the OpenAI response against our expected schema
* @param {Object} data The data to validate
*/
function validateOpenAIResponse(data) {
// Check required fields
const requiredFields = [
"language",
"messages_sent",
"sentiment",
"escalated",
"forwarded_hr",
"category",
"questions",
"summary",
"session_id",
];
for (const field of requiredFields) {
if (!(field in data)) {
throw new Error(`Missing required field: ${field}`);
}
}
// Validate field types
if (typeof data.language !== "string" || !/^[a-z]{2}$/.test(data.language)) {
throw new Error(
"Invalid language format. Expected ISO 639-1 code (e.g., 'en')"
);
}
if (typeof data.messages_sent !== "number" || data.messages_sent < 0) {
throw new Error("Invalid messages_sent. Expected non-negative number");
}
if (!["positive", "neutral", "negative"].includes(data.sentiment)) {
throw new Error(
"Invalid sentiment. Expected 'positive', 'neutral', or 'negative'"
);
}
if (typeof data.escalated !== "boolean") {
throw new Error("Invalid escalated. Expected boolean");
}
if (typeof data.forwarded_hr !== "boolean") {
throw new Error("Invalid forwarded_hr. Expected boolean");
}
const validCategories = [
"Schedule & Hours",
"Leave & Vacation",
"Sick Leave & Recovery",
"Salary & Compensation",
"Contract & Hours",
"Onboarding",
"Offboarding",
"Workwear & Staff Pass",
"Team & Contacts",
"Personal Questions",
"Access & Login",
"Social questions",
"Unrecognized / Other",
];
if (!validCategories.includes(data.category)) {
throw new Error(
`Invalid category. Expected one of: ${validCategories.join(", ")}`
);
}
if (!Array.isArray(data.questions)) {
throw new Error("Invalid questions. Expected array of strings");
}
if (
typeof data.summary !== "string" ||
data.summary.length < 10 ||
data.summary.length > 300
) {
throw new Error(
"Invalid summary. Expected string between 10-300 characters"
);
}
if (typeof data.session_id !== "string") {
throw new Error("Invalid session_id. Expected string");
}
}
/**
* Process a single session
* @param {Object} session The session to process
* @returns {Promise<Object>} Result object with success/error info
*/
async function processSingleSession(session) {
if (session.messages.length === 0) {
return {
sessionId: session.id,
success: false,
error: "Session has no messages",
};
}
try {
// Convert messages back to transcript format for OpenAI processing
const transcript = session.messages
.map(
(msg) =>
`[${new Date(msg.timestamp)
.toLocaleString("en-GB", {
day: "2-digit",
month: "2-digit",
year: "numeric",
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
})
.replace(",", "")}] ${msg.role}: ${msg.content}`
)
.join("\n");
const processedData = await processTranscriptWithOpenAI(
session.id,
transcript
);
// Map sentiment string to float value for compatibility with existing data
const sentimentMap = {
positive: 0.8,
neutral: 0.0,
negative: -0.8,
};
// Update the session with processed data
await prisma.session.update({
where: { id: session.id },
data: {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: sentimentMap[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment,
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,
questions: JSON.stringify(processedData.questions),
summary: processedData.summary,
processed: true,
},
});
return {
sessionId: session.id,
success: true,
};
} catch (error) {
return {
sessionId: session.id,
success: false,
error: error.message,
};
}
}
/**
* Process sessions in parallel with concurrency limit
* @param {Array} sessions Array of sessions to process
* @param {number} maxConcurrency Maximum number of concurrent processing tasks
* @returns {Promise<Object>} Processing results
*/
async function processSessionsInParallel(sessions, maxConcurrency = 5) {
const results = [];
const executing = [];
for (const session of sessions) {
const promise = processSingleSession(session).then((result) => {
process.stdout.write(
result.success
? `[ProcessingScheduler] ✓ Successfully processed session ${result.sessionId}\n`
: `[ProcessingScheduler] ✗ Failed to process session ${result.sessionId}: ${result.error}\n`
);
return result;
});
results.push(promise);
executing.push(promise);
if (executing.length >= maxConcurrency) {
await Promise.race(executing);
executing.splice(
executing.findIndex((p) => p === promise),
1
);
}
}
return Promise.all(results);
}
/**
* Process unprocessed sessions
* @param {number} batchSize Number of sessions to process in one batch (default: all unprocessed)
* @param {number} maxConcurrency Maximum number of concurrent processing tasks (default: 5)
*/
export async function processUnprocessedSessions(batchSize = null, maxConcurrency = 5) {
process.stdout.write(
"[ProcessingScheduler] Starting to process unprocessed sessions...\n"
);
// Find sessions that have messages but haven't been processed
const queryOptions = {
where: {
AND: [
{ messages: { some: {} } }, // Must have messages
{ processed: false }, // Only unprocessed sessions (no longer checking for null)
],
},
include: {
messages: {
orderBy: { order: "asc" },
},
},
};
// Add batch size limit if specified
if (batchSize && batchSize > 0) {
queryOptions.take = batchSize;
}
const sessionsToProcess = await prisma.session.findMany(queryOptions);
// Filter to only sessions that have messages
const sessionsWithMessages = sessionsToProcess.filter(
(session) => session.messages.length > 0
);
if (sessionsWithMessages.length === 0) {
process.stdout.write(
"[ProcessingScheduler] No sessions found requiring processing.\n"
);
return;
}
process.stdout.write(
`[ProcessingScheduler] Found ${sessionsWithMessages.length} sessions to process (max concurrency: ${maxConcurrency}).\n`
);
const startTime = Date.now();
const results = await processSessionsInParallel(sessionsWithMessages, maxConcurrency);
const endTime = Date.now();
const successCount = results.filter((r) => r.success).length;
const errorCount = results.filter((r) => !r.success).length;
process.stdout.write("[ProcessingScheduler] Session processing complete.\n");
process.stdout.write(
`[ProcessingScheduler] Successfully processed: ${successCount} sessions.\n`
);
process.stdout.write(
`[ProcessingScheduler] Failed to process: ${errorCount} sessions.\n`
);
process.stdout.write(
`[ProcessingScheduler] Total processing time: ${((endTime - startTime) / 1000).toFixed(2)}s\n`
);
}
/**
* Start the processing scheduler
*/
export function startProcessingScheduler() {
// Process unprocessed sessions every hour
cron.schedule("0 * * * *", async () => {
try {
await processUnprocessedSessions();
} catch (error) {
process.stderr.write(
`[ProcessingScheduler] Error in scheduler: ${error}\n`
);
}
});
process.stdout.write(
"[ProcessingScheduler] Started processing scheduler (runs hourly).\n"
);
}

View File

@ -278,7 +278,7 @@ async function processSingleSession(session: any): Promise<ProcessingResult> {
language: processedData.language,
messagesSent: processedData.messages_sent,
sentiment: sentimentMap[processedData.sentiment] || 0,
sentimentCategory: processedData.sentiment,
sentimentCategory: processedData.sentiment.toUpperCase() as "POSITIVE" | "NEUTRAL" | "NEGATIVE",
escalated: processedData.escalated,
forwardedHr: processedData.forwarded_hr,
category: processedData.category,

View File

@ -1,37 +0,0 @@
// Session refresh scheduler - JavaScript version
import cron from "node-cron";
import { PrismaClient } from "@prisma/client";
import { fetchAndStoreSessionsForAllCompanies } from "./csvFetcher.js";
const prisma = new PrismaClient();
/**
* Refresh sessions for all companies
*/
async function refreshSessions() {
console.log("[Scheduler] Starting session refresh...");
try {
await fetchAndStoreSessionsForAllCompanies();
console.log("[Scheduler] Session refresh completed successfully.");
} catch (error) {
console.error("[Scheduler] Error during session refresh:", error);
}
}
/**
* Start the session refresh scheduler
*/
export function startScheduler() {
// Run every 15 minutes
cron.schedule("*/15 * * * *", async () => {
try {
await refreshSessions();
} catch (error) {
console.error("[Scheduler] Error in scheduler:", error);
}
});
console.log(
"[Scheduler] Started session refresh scheduler (runs every 15 minutes)."
);
}

View File

@ -3,74 +3,82 @@ import cron from "node-cron";
import { prisma } from "./prisma";
import { fetchAndParseCsv } from "./csvFetcher";
interface SessionCreateData {
id: string;
startTime: Date;
companyId: string;
[key: string]: unknown;
}
export function startScheduler() {
cron.schedule("*/15 * * * *", async () => {
const companies = await prisma.company.findMany();
for (const company of companies) {
try {
const sessions = await fetchAndParseCsv(
const rawSessionData = await fetchAndParseCsv(
company.csvUrl,
company.csvUsername as string | undefined,
company.csvPassword as string | undefined
);
// Only add sessions that don't already exist in the database
for (const session of sessions) {
const sessionData: SessionCreateData = {
...session,
companyId: company.id,
id: session.id || session.sessionId || `sess_${Date.now()}`,
// Ensure startTime is not undefined
startTime: session.startTime || new Date(),
};
// Check if the session already exists
const existingSession = await prisma.session.findUnique({
where: { id: sessionData.id },
});
if (existingSession) {
// Skip this session as it already exists
continue;
// Create SessionImport records for new data
for (const rawSession of rawSessionData) {
try {
// Use upsert to handle duplicates gracefully
await prisma.sessionImport.upsert({
where: {
companyId_externalSessionId: {
companyId: company.id,
externalSessionId: rawSession.externalSessionId,
},
},
update: {
// Update existing record with latest data
startTimeRaw: rawSession.startTimeRaw,
endTimeRaw: rawSession.endTimeRaw,
ipAddress: rawSession.ipAddress,
countryCode: rawSession.countryCode,
language: rawSession.language,
messagesSent: rawSession.messagesSent,
sentimentRaw: rawSession.sentimentRaw,
escalatedRaw: rawSession.escalatedRaw,
forwardedHrRaw: rawSession.forwardedHrRaw,
fullTranscriptUrl: rawSession.fullTranscriptUrl,
avgResponseTimeSeconds: rawSession.avgResponseTimeSeconds,
tokens: rawSession.tokens,
tokensEur: rawSession.tokensEur,
category: rawSession.category,
initialMessage: rawSession.initialMessage,
status: "QUEUED", // Reset status for reprocessing if needed
},
create: {
companyId: company.id,
externalSessionId: rawSession.externalSessionId,
startTimeRaw: rawSession.startTimeRaw,
endTimeRaw: rawSession.endTimeRaw,
ipAddress: rawSession.ipAddress,
countryCode: rawSession.countryCode,
language: rawSession.language,
messagesSent: rawSession.messagesSent,
sentimentRaw: rawSession.sentimentRaw,
escalatedRaw: rawSession.escalatedRaw,
forwardedHrRaw: rawSession.forwardedHrRaw,
fullTranscriptUrl: rawSession.fullTranscriptUrl,
avgResponseTimeSeconds: rawSession.avgResponseTimeSeconds,
tokens: rawSession.tokens,
tokensEur: rawSession.tokensEur,
category: rawSession.category,
initialMessage: rawSession.initialMessage,
status: "QUEUED",
},
});
} catch (error) {
// Log individual session import errors but continue processing
process.stderr.write(
`[Scheduler] Failed to import session ${rawSession.externalSessionId} for company ${company.name}: ${error}\n`
);
}
// Only include fields that are properly typed for Prisma
await prisma.session.create({
data: {
id: sessionData.id,
companyId: sessionData.companyId,
startTime: sessionData.startTime,
// endTime is required in the schema, so use startTime if not available
endTime: session.endTime || new Date(),
ipAddress: session.ipAddress || null,
country: session.country || null,
language: session.language || null,
sentiment:
typeof session.sentiment === "number"
? session.sentiment
: null,
messagesSent:
typeof session.messagesSent === "number"
? session.messagesSent
: 0,
category: session.category || null,
},
});
}
// Using process.stdout.write instead of console.log to avoid ESLint warning
process.stdout.write(
`[Scheduler] Refreshed sessions for company: ${company.name}\n`
`[Scheduler] Imported ${rawSessionData.length} session records for company: ${company.name}\n`
);
} catch (e) {
// Using process.stderr.write instead of console.error to avoid ESLint warning
process.stderr.write(
`[Scheduler] Failed for company: ${company.name} - ${e}\n`
`[Scheduler] Failed to fetch CSV for company: ${company.name} - ${e}\n`
);
}
}

View File

@ -1,261 +0,0 @@
// Transcript parser utility - converts raw transcript text to structured messages
import { PrismaClient } from "@prisma/client";
const prisma = new PrismaClient();
/**
* Parses chat log string to JSON format with individual messages
* @param {string} logString - Raw transcript content
* @returns {Object} Parsed data with messages array and metadata
*/
export function parseChatLogToJSON(logString) {
// Convert to string if it's not already
const stringData =
typeof logString === "string" ? logString : String(logString);
// Split by lines and filter out empty lines
const lines = stringData.split("\n").filter((line) => line.trim() !== "");
const messages = [];
let currentMessage = null;
for (const line of lines) {
// Check if line starts with a timestamp pattern [DD.MM.YYYY HH:MM:SS]
const timestampMatch = line.match(
/^\[(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2})\] (.+?): (.*)$/
);
if (timestampMatch) {
// If we have a previous message, push it to the array
if (currentMessage) {
messages.push(currentMessage);
}
// Parse the timestamp
const [, timestamp, sender, content] = timestampMatch;
// Convert DD.MM.YYYY HH:MM:SS to ISO format
const [datePart, timePart] = timestamp.split(" ");
const [day, month, year] = datePart.split(".");
const [hour, minute, second] = timePart.split(":");
const dateObject = new Date(year, month - 1, day, hour, minute, second);
// Create new message object
currentMessage = {
timestamp: dateObject.toISOString(),
role: sender,
content: content,
};
} else if (currentMessage) {
// This is a continuation of the previous message (multiline)
currentMessage.content += "\n" + line;
}
}
// Don't forget the last message
if (currentMessage) {
messages.push(currentMessage);
}
return {
messages: messages.sort((a, b) => {
// First sort by timestamp (ascending)
const timeComparison = new Date(a.timestamp) - new Date(b.timestamp);
if (timeComparison !== 0) {
return timeComparison;
}
// If timestamps are equal, sort by role (descending)
// This puts "User" before "Assistant" when timestamps are the same
return b.role.localeCompare(a.role);
}),
totalMessages: messages.length,
};
}
/**
* Stores parsed messages in the database for a session
* @param {string} sessionId - The session ID
* @param {Array} messages - Array of parsed message objects
*/
export async function storeMessagesForSession(sessionId, messages) {
try {
// First, delete any existing messages for this session
await prisma.message.deleteMany({
where: { sessionId },
});
// Then insert the new messages
const messageData = messages.map((message, index) => ({
sessionId,
timestamp: new Date(message.timestamp),
role: message.role,
content: message.content,
order: index,
}));
if (messageData.length > 0) {
await prisma.message.createMany({
data: messageData,
});
// Extract actual end time from the latest message
const latestMessage = messages.reduce((latest, current) => {
return new Date(current.timestamp) > new Date(latest.timestamp) ? current : latest;
});
// Update the session's endTime with the actual conversation end time
await prisma.session.update({
where: { id: sessionId },
data: {
endTime: new Date(latestMessage.timestamp),
},
});
process.stdout.write(
`[TranscriptParser] Updated session ${sessionId} endTime to ${latestMessage.timestamp}\n`
);
}
process.stdout.write(
`[TranscriptParser] Stored ${messageData.length} messages for session ${sessionId}\n`
);
return messageData.length;
} catch (error) {
process.stderr.write(
`[TranscriptParser] Error storing messages for session ${sessionId}: ${error}\n`
);
throw error;
}
}
/**
* Processes and stores transcript for a single session
* @param {string} sessionId - The session ID
* @param {string} transcriptContent - Raw transcript content
* @returns {Promise<Object>} Processing result with message count
*/
export async function processTranscriptForSession(
sessionId,
transcriptContent
) {
if (!transcriptContent || transcriptContent.trim() === "") {
throw new Error("No transcript content provided");
}
try {
// Parse the transcript
const parsed = parseChatLogToJSON(transcriptContent);
// Store messages in database
const messageCount = await storeMessagesForSession(
sessionId,
parsed.messages
);
return {
sessionId,
messageCount,
totalMessages: parsed.totalMessages,
success: true,
};
} catch (error) {
process.stderr.write(
`[TranscriptParser] Error processing transcript for session ${sessionId}: ${error}\n`
);
throw error;
}
}
/**
* Processes transcripts for all sessions that have transcript content but no parsed messages
*/
export async function processAllUnparsedTranscripts() {
process.stdout.write(
"[TranscriptParser] Starting to process unparsed transcripts...\n"
);
try {
// Find sessions with transcript content but no messages
const sessionsToProcess = await prisma.session.findMany({
where: {
AND: [
{ transcriptContent: { not: null } },
{ transcriptContent: { not: "" } },
],
},
include: {
messages: true,
},
});
// Filter to only sessions without messages
const unparsedSessions = sessionsToProcess.filter(
(session) => session.messages.length === 0
);
if (unparsedSessions.length === 0) {
process.stdout.write(
"[TranscriptParser] No unparsed transcripts found.\n"
);
return { processed: 0, errors: 0 };
}
process.stdout.write(
`[TranscriptParser] Found ${unparsedSessions.length} sessions with unparsed transcripts.\n`
);
let successCount = 0;
let errorCount = 0;
for (const session of unparsedSessions) {
try {
const result = await processTranscriptForSession(
session.id,
session.transcriptContent
);
process.stdout.write(
`[TranscriptParser] Processed session ${session.id}: ${result.messageCount} messages\n`
);
successCount++;
} catch (error) {
process.stderr.write(
`[TranscriptParser] Failed to process session ${session.id}: ${error}\n`
);
errorCount++;
}
}
process.stdout.write(
`[TranscriptParser] Completed processing. Success: ${successCount}, Errors: ${errorCount}\n`
);
return { processed: successCount, errors: errorCount };
} catch (error) {
process.stderr.write(
`[TranscriptParser] Error in processAllUnparsedTranscripts: ${error}\n`
);
throw error;
}
}
/**
* Gets parsed messages for a session
* @param {string} sessionId - The session ID
* @returns {Promise<Array>} Array of message objects
*/
export async function getMessagesForSession(sessionId) {
try {
const messages = await prisma.message.findMany({
where: { sessionId },
orderBy: { order: "asc" },
});
return messages;
} catch (error) {
process.stderr.write(
`[TranscriptParser] Error getting messages for session ${sessionId}: ${error}\n`
);
throw error;
}
}

View File

@ -38,7 +38,7 @@ export interface User {
export interface Message {
id: string;
sessionId: string;
timestamp: Date;
timestamp: Date | null;
role: string; // "User", "Assistant", "System", etc.
content: string;
order: number; // Order within the conversation (0, 1, 2, ...)