Files
livedash-node/lib/transcriptParser.js
Max Kowalski 9e095e1a43 Refactor code for improved readability and consistency
- Updated formatting in SessionDetails component for better readability.
- Enhanced documentation in scheduler-fixes.md to clarify issues and solutions.
- Improved error handling and logging in csvFetcher.js and processingScheduler.js.
- Standardized code formatting across various scripts and components for consistency.
- Added validation checks for CSV URLs and transcript content to prevent processing errors.
- Enhanced logging messages for better tracking of processing status and errors.
2025-06-25 17:46:23 +02:00

245 lines
6.8 KiB
JavaScript

// Transcript parser utility - converts raw transcript text to structured messages
import { PrismaClient } from "@prisma/client";
const prisma = new PrismaClient();
/**
* Parses chat log string to JSON format with individual messages
* @param {string} logString - Raw transcript content
* @returns {Object} Parsed data with messages array and metadata
*/
export function parseChatLogToJSON(logString) {
// Convert to string if it's not already
const stringData =
typeof logString === "string" ? logString : String(logString);
// Split by lines and filter out empty lines
const lines = stringData.split("\n").filter((line) => line.trim() !== "");
const messages = [];
let currentMessage = null;
for (const line of lines) {
// Check if line starts with a timestamp pattern [DD.MM.YYYY HH:MM:SS]
const timestampMatch = line.match(
/^\[(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2})\] (.+?): (.*)$/
);
if (timestampMatch) {
// If we have a previous message, push it to the array
if (currentMessage) {
messages.push(currentMessage);
}
// Parse the timestamp
const [, timestamp, sender, content] = timestampMatch;
// Convert DD.MM.YYYY HH:MM:SS to ISO format
const [datePart, timePart] = timestamp.split(" ");
const [day, month, year] = datePart.split(".");
const [hour, minute, second] = timePart.split(":");
const dateObject = new Date(year, month - 1, day, hour, minute, second);
// Create new message object
currentMessage = {
timestamp: dateObject.toISOString(),
role: sender,
content: content,
};
} else if (currentMessage) {
// This is a continuation of the previous message (multiline)
currentMessage.content += "\n" + line;
}
}
// Don't forget the last message
if (currentMessage) {
messages.push(currentMessage);
}
return {
messages: messages.sort((a, b) => {
// First sort by timestamp (ascending)
const timeComparison = new Date(a.timestamp) - new Date(b.timestamp);
if (timeComparison !== 0) {
return timeComparison;
}
// If timestamps are equal, sort by role (descending)
// This puts "User" before "Assistant" when timestamps are the same
return b.role.localeCompare(a.role);
}),
totalMessages: messages.length,
};
}
/**
* Stores parsed messages in the database for a session
* @param {string} sessionId - The session ID
* @param {Array} messages - Array of parsed message objects
*/
export async function storeMessagesForSession(sessionId, messages) {
try {
// First, delete any existing messages for this session
await prisma.message.deleteMany({
where: { sessionId },
});
// Then insert the new messages
const messageData = messages.map((message, index) => ({
sessionId,
timestamp: new Date(message.timestamp),
role: message.role,
content: message.content,
order: index,
}));
if (messageData.length > 0) {
await prisma.message.createMany({
data: messageData,
});
}
process.stdout.write(
`[TranscriptParser] Stored ${messageData.length} messages for session ${sessionId}\n`
);
return messageData.length;
} catch (error) {
process.stderr.write(
`[TranscriptParser] Error storing messages for session ${sessionId}: ${error}\n`
);
throw error;
}
}
/**
* Processes and stores transcript for a single session
* @param {string} sessionId - The session ID
* @param {string} transcriptContent - Raw transcript content
* @returns {Promise<Object>} Processing result with message count
*/
export async function processTranscriptForSession(
sessionId,
transcriptContent
) {
if (!transcriptContent || transcriptContent.trim() === "") {
throw new Error("No transcript content provided");
}
try {
// Parse the transcript
const parsed = parseChatLogToJSON(transcriptContent);
// Store messages in database
const messageCount = await storeMessagesForSession(
sessionId,
parsed.messages
);
return {
sessionId,
messageCount,
totalMessages: parsed.totalMessages,
success: true,
};
} catch (error) {
process.stderr.write(
`[TranscriptParser] Error processing transcript for session ${sessionId}: ${error}\n`
);
throw error;
}
}
/**
* Processes transcripts for all sessions that have transcript content but no parsed messages
*/
export async function processAllUnparsedTranscripts() {
process.stdout.write(
"[TranscriptParser] Starting to process unparsed transcripts...\n"
);
try {
// Find sessions with transcript content but no messages
const sessionsToProcess = await prisma.session.findMany({
where: {
AND: [
{ transcriptContent: { not: null } },
{ transcriptContent: { not: "" } },
],
},
include: {
messages: true,
},
});
// Filter to only sessions without messages
const unparsedSessions = sessionsToProcess.filter(
(session) => session.messages.length === 0
);
if (unparsedSessions.length === 0) {
process.stdout.write(
"[TranscriptParser] No unparsed transcripts found.\n"
);
return { processed: 0, errors: 0 };
}
process.stdout.write(
`[TranscriptParser] Found ${unparsedSessions.length} sessions with unparsed transcripts.\n`
);
let successCount = 0;
let errorCount = 0;
for (const session of unparsedSessions) {
try {
const result = await processTranscriptForSession(
session.id,
session.transcriptContent
);
process.stdout.write(
`[TranscriptParser] Processed session ${session.id}: ${result.messageCount} messages\n`
);
successCount++;
} catch (error) {
process.stderr.write(
`[TranscriptParser] Failed to process session ${session.id}: ${error}\n`
);
errorCount++;
}
}
process.stdout.write(
`[TranscriptParser] Completed processing. Success: ${successCount}, Errors: ${errorCount}\n`
);
return { processed: successCount, errors: errorCount };
} catch (error) {
process.stderr.write(
`[TranscriptParser] Error in processAllUnparsedTranscripts: ${error}\n`
);
throw error;
}
}
/**
* Gets parsed messages for a session
* @param {string} sessionId - The session ID
* @returns {Promise<Array>} Array of message objects
*/
export async function getMessagesForSession(sessionId) {
try {
const messages = await prisma.message.findMany({
where: { sessionId },
orderBy: { order: "asc" },
});
return messages;
} catch (error) {
process.stderr.write(
`[TranscriptParser] Error getting messages for session ${sessionId}: ${error}\n`
);
throw error;
}
}