Files
Kaj Kowalski 6b19cbcb51 Add configuration and scripts for linting, testing, and dependency management
- Introduced .pre-commit-config.yaml for pre-commit hooks using uv-pre-commit.
- Created lint.sh script to run Ruff and Black for linting and formatting.
- Added test.sh script to execute tests with coverage reporting.
- Configured .uv file for uv settings including lockfile management and dependency resolution.
- Updated Makefile with targets for virtual environment setup, dependency installation, linting, testing, formatting, and database migrations.
- Established requirements.txt with main and development dependencies for the project.
2025-05-17 20:18:21 +02:00

160 lines
5.5 KiB
Python

# dashboard/utils.py
import contextlib
import numpy as np
import pandas as pd
from django.db import models
from django.utils.timezone import make_aware
from .models import ChatSession
def process_csv_file(data_source):
"""
Process the uploaded CSV file and create ChatSession objects
Args:
data_source: DataSource model instance containing the CSV file
"""
try:
# Read the CSV file
file_path = data_source.file.path
df = pd.read_csv(file_path)
# Process each row and create ChatSession objects
for _, row in df.iterrows():
# Handle datetime fields
start_time = None
end_time = None
if "start_time" in row and pd.notna(row["start_time"]):
with contextlib.suppress(Exception):
start_time = make_aware(pd.to_datetime(row["start_time"]))
if "end_time" in row and pd.notna(row["end_time"]):
with contextlib.suppress(Exception):
end_time = make_aware(pd.to_datetime(row["end_time"]))
pass
# Convert boolean fields
escalated = str(row.get("escalated", "")).lower() in [
"true",
"yes",
"1",
"t",
"y",
]
forwarded_hr = str(row.get("forwarded_hr", "")).lower() in [
"true",
"yes",
"1",
"t",
"y",
]
# Create ChatSession object
session = ChatSession(
data_source=data_source,
session_id=str(row.get("session_id", "")),
start_time=start_time,
end_time=end_time,
ip_address=row.get("ip_address") if pd.notna(row.get("ip_address", np.nan)) else None,
country=str(row.get("country", "")),
language=str(row.get("language", "")),
messages_sent=int(row.get("messages_sent", 0)) if pd.notna(row.get("messages_sent", np.nan)) else 0,
sentiment=str(row.get("sentiment", "")),
escalated=escalated,
forwarded_hr=forwarded_hr,
full_transcript=str(row.get("full_transcript", "")),
avg_response_time=float(row.get("avg_response_time", 0))
if pd.notna(row.get("avg_response_time", np.nan))
else None,
tokens=int(row.get("tokens", 0)) if pd.notna(row.get("tokens", np.nan)) else 0,
tokens_eur=float(row.get("tokens_eur", 0)) if pd.notna(row.get("tokens_eur", np.nan)) else None,
category=str(row.get("category", "")),
initial_msg=str(row.get("initial_msg", "")),
user_rating=str(row.get("user_rating", "")),
)
session.save()
return True, f"Successfully processed {len(df)} records."
except Exception as e:
return False, f"Error processing CSV file: {str(e)}"
def generate_dashboard_data(data_sources):
"""
Generate aggregated data for dashboard visualization
Args:
data_sources: QuerySet of DataSource objects
Returns:
dict: Dictionary containing aggregated data for various charts
"""
# Get all chat sessions for the selected data sources
chat_sessions = ChatSession.objects.filter(data_source__in=data_sources)
if not chat_sessions.exists():
return {
"total_sessions": 0,
"avg_response_time": 0,
"total_tokens": 0,
"total_cost": 0,
"sentiment_data": [],
"country_data": [],
"category_data": [],
"time_series_data": [],
}
# Basic statistics
total_sessions = chat_sessions.count()
avg_response_time = (
chat_sessions.filter(avg_response_time__isnull=False).aggregate(avg=models.Avg("avg_response_time"))["avg"] or 0
)
total_tokens = chat_sessions.aggregate(sum=models.Sum("tokens"))["sum"] or 0
total_cost = chat_sessions.filter(tokens_eur__isnull=False).aggregate(sum=models.Sum("tokens_eur"))["sum"] or 0
# Sentiment distribution
sentiment_data = (
chat_sessions.exclude(sentiment="").values("sentiment").annotate(count=models.Count("id")).order_by("-count")
)
# Country distribution
country_data = (
chat_sessions.exclude(country="")
.values("country")
.annotate(count=models.Count("id"))
.order_by("-count")[:10] # Top 10 countries
)
# Category distribution
category_data = (
chat_sessions.exclude(category="").values("category").annotate(count=models.Count("id")).order_by("-count")
)
# Time series data (sessions per day)
time_series_query = (
chat_sessions.filter(start_time__isnull=False)
.annotate(date=models.functions.TruncDate("start_time"))
.values("date")
.annotate(count=models.Count("id"))
.order_by("date")
)
time_series_data = [
{"date": entry["date"].strftime("%Y-%m-%d"), "count": entry["count"]} for entry in time_series_query
]
return {
"total_sessions": total_sessions,
"avg_response_time": round(avg_response_time, 2),
"total_tokens": total_tokens,
"total_cost": round(total_cost, 2),
"sentiment_data": list(sentiment_data),
"country_data": list(country_data),
"category_data": list(category_data),
"time_series_data": time_series_data,
}