Files
livedash-node/scripts/fetch_transcripts.ts
Kaj Kowalski 7f48a085bf feat: comprehensive security and architecture improvements
- Add Zod validation schemas with strong password requirements (12+ chars, complexity)
- Implement rate limiting for authentication endpoints (registration, password reset)
- Remove duplicate MetricCard component, consolidate to ui/metric-card.tsx
- Update README.md to use pnpm commands consistently
- Enhance authentication security with 12-round bcrypt hashing
- Add comprehensive input validation for all API endpoints
- Fix security vulnerabilities in user registration and password reset flows

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-28 01:52:53 +02:00

196 lines
5.0 KiB
TypeScript

import { PrismaClient } from "@prisma/client";
import fetch from "node-fetch";
const prisma = new PrismaClient();
/**
* Fetches transcript content from a URL with optional authentication
*/
async function fetchTranscriptContent(
url: string,
username?: string,
password?: string
): Promise<string | null> {
try {
const authHeader =
username && password
? "Basic " + Buffer.from(`${username}:${password}`).toString("base64")
: undefined;
const response = await fetch(url, {
headers: authHeader ? { Authorization: authHeader } : {},
});
if (!response.ok) {
console.warn(
`Failed to fetch transcript from ${url}: ${response.statusText}`
);
return null;
}
return await response.text();
} catch (error) {
console.warn(`Error fetching transcript from ${url}:`, error);
return null;
}
}
/**
* Parse transcript content into individual messages
*/
function parseTranscriptToMessages(transcriptContent: string): Array<{
timestamp: Date | null;
role: string;
content: string;
order: number;
}> {
const lines = transcriptContent.split("\n").filter((line) => line.trim());
const messages: Array<{
timestamp: Date | null;
role: string;
content: string;
order: number;
}> = [];
let order = 0;
for (const line of lines) {
// Try to parse lines in format: [timestamp] role: content
const match = line.match(/^\[([^\]]+)\]\s*([^:]+):\s*(.+)$/);
if (match) {
const [, timestampStr, role, content] = match;
// Try to parse the timestamp
let timestamp: Date | null = null;
try {
timestamp = new Date(timestampStr);
if (isNaN(timestamp.getTime())) {
timestamp = null;
}
} catch {
timestamp = null;
}
messages.push({
timestamp,
role: role.trim(),
content: content.trim(),
order: order++,
});
} else {
// If line doesn't match expected format, treat as content continuation
if (messages.length > 0) {
messages[messages.length - 1].content += "\n" + line;
} else {
// First line doesn't match format, create a generic message
messages.push({
timestamp: null,
role: "unknown",
content: line,
order: order++,
});
}
}
}
return messages;
}
/**
* Main function to fetch transcripts for sessions that don't have messages yet
*/
async function fetchTranscriptsForSessions() {
console.log("Starting to fetch transcripts for sessions without messages...");
// Find sessions that have transcript URLs but no messages
const sessionsNeedingTranscripts = await prisma.session.findMany({
where: {
AND: [
{ fullTranscriptUrl: { not: null } },
{ messages: { none: {} } }, // No messages yet
],
},
include: {
company: true,
messages: true,
},
});
if (sessionsNeedingTranscripts.length === 0) {
console.log("No sessions found that need transcript fetching.");
return;
}
console.log(
`Found ${sessionsNeedingTranscripts.length} sessions that need transcript fetching.`
);
let successCount = 0;
let errorCount = 0;
for (const session of sessionsNeedingTranscripts) {
if (!session.fullTranscriptUrl) {
console.warn(`Session ${session.id} has no transcript URL, skipping.`);
continue;
}
console.log(`Fetching transcript for session ${session.id}...`);
try {
// Fetch transcript content
const transcriptContent = await fetchTranscriptContent(
session.fullTranscriptUrl,
session.company.csvUsername || undefined,
session.company.csvPassword || undefined
);
if (!transcriptContent) {
throw new Error("Failed to fetch transcript content");
}
// Parse transcript into messages
const messages = parseTranscriptToMessages(transcriptContent);
if (messages.length === 0) {
throw new Error("No messages found in transcript");
}
// Create messages in database
await prisma.message.createMany({
data: messages.map((msg) => ({
sessionId: session.id,
timestamp: msg.timestamp,
role: msg.role,
content: msg.content,
order: msg.order,
})),
});
console.log(
`Successfully fetched transcript for session ${session.id} (${messages.length} messages)`
);
successCount++;
} catch (error) {
console.error(
`Error fetching transcript for session ${session.id}:`,
error
);
errorCount++;
}
}
console.log("Transcript fetching complete.");
console.log(`Successfully fetched: ${successCount} transcripts.`);
console.log(`Failed to fetch: ${errorCount} transcripts.`);
}
// Run the main function
fetchTranscriptsForSessions()
.catch((e) => {
console.error("An error occurred during the script execution:", e);
process.exitCode = 1;
})
.finally(async () => {
await prisma.$disconnect();
});