mirror of
https://github.com/kjanat/livedash-node.git
synced 2026-01-16 16:12:10 +01:00
- Introduced a new function `fetchTranscriptContent` to handle fetching transcripts with optional authentication. - Enhanced error handling and logging for transcript fetching. - Updated the `parseTranscriptToMessages` function to improve message parsing logic. - Replaced the old session processing logic with a new approach that utilizes `SessionImport` records. - Removed obsolete scripts related to manual triggers and whitespace fixing. - Updated the server initialization to remove direct server handling, transitioning to a more modular approach. - Improved overall code structure and readability across various scripts.
187 lines
4.9 KiB
TypeScript
187 lines
4.9 KiB
TypeScript
import { PrismaClient } from "@prisma/client";
|
|
import fetch from "node-fetch";
|
|
|
|
const prisma = new PrismaClient();
|
|
|
|
/**
|
|
* Fetches transcript content from a URL with optional authentication
|
|
*/
|
|
async function fetchTranscriptContent(
|
|
url: string,
|
|
username?: string,
|
|
password?: string
|
|
): Promise<string | null> {
|
|
try {
|
|
const authHeader =
|
|
username && password
|
|
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
|
|
: undefined;
|
|
|
|
const response = await fetch(url, {
|
|
headers: authHeader ? { Authorization: authHeader } : {},
|
|
});
|
|
|
|
if (!response.ok) {
|
|
console.warn(`Failed to fetch transcript from ${url}: ${response.statusText}`);
|
|
return null;
|
|
}
|
|
|
|
return await response.text();
|
|
} catch (error) {
|
|
console.warn(`Error fetching transcript from ${url}:`, error);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse transcript content into individual messages
|
|
*/
|
|
function parseTranscriptToMessages(transcriptContent: string): Array<{
|
|
timestamp: Date | null;
|
|
role: string;
|
|
content: string;
|
|
order: number;
|
|
}> {
|
|
const lines = transcriptContent.split('\n').filter(line => line.trim());
|
|
const messages: Array<{
|
|
timestamp: Date | null;
|
|
role: string;
|
|
content: string;
|
|
order: number;
|
|
}> = [];
|
|
|
|
let order = 0;
|
|
|
|
for (const line of lines) {
|
|
// Try to parse lines in format: [timestamp] role: content
|
|
const match = line.match(/^\[([^\]]+)\]\s*([^:]+):\s*(.+)$/);
|
|
|
|
if (match) {
|
|
const [, timestampStr, role, content] = match;
|
|
|
|
// Try to parse the timestamp
|
|
let timestamp: Date | null = null;
|
|
try {
|
|
timestamp = new Date(timestampStr);
|
|
if (isNaN(timestamp.getTime())) {
|
|
timestamp = null;
|
|
}
|
|
} catch {
|
|
timestamp = null;
|
|
}
|
|
|
|
messages.push({
|
|
timestamp,
|
|
role: role.trim(),
|
|
content: content.trim(),
|
|
order: order++,
|
|
});
|
|
} else {
|
|
// If line doesn't match expected format, treat as content continuation
|
|
if (messages.length > 0) {
|
|
messages[messages.length - 1].content += '\n' + line;
|
|
} else {
|
|
// First line doesn't match format, create a generic message
|
|
messages.push({
|
|
timestamp: null,
|
|
role: 'unknown',
|
|
content: line,
|
|
order: order++,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
return messages;
|
|
}
|
|
|
|
/**
|
|
* Main function to fetch transcripts for sessions that don't have messages yet
|
|
*/
|
|
async function fetchTranscriptsForSessions() {
|
|
console.log("Starting to fetch transcripts for sessions without messages...");
|
|
|
|
// Find sessions that have transcript URLs but no messages
|
|
const sessionsNeedingTranscripts = await prisma.session.findMany({
|
|
where: {
|
|
AND: [
|
|
{ fullTranscriptUrl: { not: null } },
|
|
{ messages: { none: {} } }, // No messages yet
|
|
],
|
|
},
|
|
include: {
|
|
company: true,
|
|
messages: true,
|
|
},
|
|
});
|
|
|
|
if (sessionsNeedingTranscripts.length === 0) {
|
|
console.log("No sessions found that need transcript fetching.");
|
|
return;
|
|
}
|
|
|
|
console.log(`Found ${sessionsNeedingTranscripts.length} sessions that need transcript fetching.`);
|
|
let successCount = 0;
|
|
let errorCount = 0;
|
|
|
|
for (const session of sessionsNeedingTranscripts) {
|
|
if (!session.fullTranscriptUrl) {
|
|
console.warn(`Session ${session.id} has no transcript URL, skipping.`);
|
|
continue;
|
|
}
|
|
|
|
console.log(`Fetching transcript for session ${session.id}...`);
|
|
|
|
try {
|
|
// Fetch transcript content
|
|
const transcriptContent = await fetchTranscriptContent(
|
|
session.fullTranscriptUrl,
|
|
session.company.csvUsername || undefined,
|
|
session.company.csvPassword || undefined
|
|
);
|
|
|
|
if (!transcriptContent) {
|
|
throw new Error("Failed to fetch transcript content");
|
|
}
|
|
|
|
// Parse transcript into messages
|
|
const messages = parseTranscriptToMessages(transcriptContent);
|
|
|
|
if (messages.length === 0) {
|
|
throw new Error("No messages found in transcript");
|
|
}
|
|
|
|
// Create messages in database
|
|
await prisma.message.createMany({
|
|
data: messages.map(msg => ({
|
|
sessionId: session.id,
|
|
timestamp: msg.timestamp,
|
|
role: msg.role,
|
|
content: msg.content,
|
|
order: msg.order,
|
|
})),
|
|
});
|
|
|
|
console.log(`Successfully fetched transcript for session ${session.id} (${messages.length} messages)`);
|
|
successCount++;
|
|
} catch (error) {
|
|
console.error(`Error fetching transcript for session ${session.id}:`, error);
|
|
errorCount++;
|
|
}
|
|
}
|
|
|
|
console.log("Transcript fetching complete.");
|
|
console.log(`Successfully fetched: ${successCount} transcripts.`);
|
|
console.log(`Failed to fetch: ${errorCount} transcripts.`);
|
|
}
|
|
|
|
// Run the main function
|
|
fetchTranscriptsForSessions()
|
|
.catch((e) => {
|
|
console.error("An error occurred during the script execution:", e);
|
|
process.exitCode = 1;
|
|
})
|
|
.finally(async () => {
|
|
await prisma.$disconnect();
|
|
});
|