import { PrismaClient } from "@prisma/client"; import fetch from "node-fetch"; const prisma = new PrismaClient(); /** * Fetches transcript content from a URL with optional authentication */ async function fetchTranscriptContent( url: string, username?: string, password?: string ): Promise { try { const authHeader = username && password ? "Basic " + Buffer.from(`${username}:${password}`).toString("base64") : undefined; const response = await fetch(url, { headers: authHeader ? { Authorization: authHeader } : {}, }); if (!response.ok) { console.warn( `Failed to fetch transcript from ${url}: ${response.statusText}` ); return null; } return await response.text(); } catch (error) { console.warn(`Error fetching transcript from ${url}:`, error); return null; } } /** * Parse transcript content into individual messages */ function parseTranscriptToMessages(transcriptContent: string): Array<{ timestamp: Date | null; role: string; content: string; order: number; }> { const lines = transcriptContent.split("\n").filter((line) => line.trim()); const messages: Array<{ timestamp: Date | null; role: string; content: string; order: number; }> = []; let order = 0; for (const line of lines) { // Try to parse lines in format: [timestamp] role: content const match = line.match(/^\[([^\]]+)\]\s*([^:]+):\s*(.+)$/); if (match) { const [, timestampStr, role, content] = match; // Try to parse the timestamp let timestamp: Date | null = null; try { timestamp = new Date(timestampStr); if (isNaN(timestamp.getTime())) { timestamp = null; } } catch { timestamp = null; } messages.push({ timestamp, role: role.trim(), content: content.trim(), order: order++, }); } else { // If line doesn't match expected format, treat as content continuation if (messages.length > 0) { messages[messages.length - 1].content += "\n" + line; } else { // First line doesn't match format, create a generic message messages.push({ timestamp: null, role: "unknown", content: line, order: order++, }); } } } return messages; } /** * Main function to fetch transcripts for sessions that don't have messages yet */ async function fetchTranscriptsForSessions() { console.log("Starting to fetch transcripts for sessions without messages..."); // Find sessions that have transcript URLs but no messages const sessionsNeedingTranscripts = await prisma.session.findMany({ where: { AND: [ { fullTranscriptUrl: { not: null } }, { messages: { none: {} } }, // No messages yet ], }, include: { company: true, messages: true, }, }); if (sessionsNeedingTranscripts.length === 0) { console.log("No sessions found that need transcript fetching."); return; } console.log( `Found ${sessionsNeedingTranscripts.length} sessions that need transcript fetching.` ); let successCount = 0; let errorCount = 0; for (const session of sessionsNeedingTranscripts) { if (!session.fullTranscriptUrl) { console.warn(`Session ${session.id} has no transcript URL, skipping.`); continue; } console.log(`Fetching transcript for session ${session.id}...`); try { // Fetch transcript content const transcriptContent = await fetchTranscriptContent( session.fullTranscriptUrl, session.company.csvUsername || undefined, session.company.csvPassword || undefined ); if (!transcriptContent) { throw new Error("Failed to fetch transcript content"); } // Parse transcript into messages const messages = parseTranscriptToMessages(transcriptContent); if (messages.length === 0) { throw new Error("No messages found in transcript"); } // Create messages in database await prisma.message.createMany({ data: messages.map((msg) => ({ sessionId: session.id, timestamp: msg.timestamp, role: msg.role, content: msg.content, order: msg.order, })), }); console.log( `Successfully fetched transcript for session ${session.id} (${messages.length} messages)` ); successCount++; } catch (error) { console.error( `Error fetching transcript for session ${session.id}:`, error ); errorCount++; } } console.log("Transcript fetching complete."); console.log(`Successfully fetched: ${successCount} transcripts.`); console.log(`Failed to fetch: ${errorCount} transcripts.`); } // Run the main function fetchTranscriptsForSessions() .catch((e) => { console.error("An error occurred during the script execution:", e); process.exitCode = 1; }) .finally(async () => { await prisma.$disconnect(); });