mirror of
https://github.com/kjanat/livegraphs-django.git
synced 2026-01-16 12:32:10 +01:00
- Introduced .pre-commit-config.yaml for pre-commit hooks using uv-pre-commit. - Created lint.sh script to run Ruff and Black for linting and formatting. - Added test.sh script to execute tests with coverage reporting. - Configured .uv file for uv settings including lockfile management and dependency resolution. - Updated Makefile with targets for virtual environment setup, dependency installation, linting, testing, formatting, and database migrations. - Established requirements.txt with main and development dependencies for the project.
160 lines
5.5 KiB
Python
160 lines
5.5 KiB
Python
# dashboard/utils.py
|
|
|
|
import contextlib
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from django.db import models
|
|
from django.utils.timezone import make_aware
|
|
|
|
from .models import ChatSession
|
|
|
|
|
|
def process_csv_file(data_source):
|
|
"""
|
|
Process the uploaded CSV file and create ChatSession objects
|
|
|
|
Args:
|
|
data_source: DataSource model instance containing the CSV file
|
|
"""
|
|
try:
|
|
# Read the CSV file
|
|
file_path = data_source.file.path
|
|
df = pd.read_csv(file_path)
|
|
|
|
# Process each row and create ChatSession objects
|
|
for _, row in df.iterrows():
|
|
# Handle datetime fields
|
|
start_time = None
|
|
end_time = None
|
|
if "start_time" in row and pd.notna(row["start_time"]):
|
|
with contextlib.suppress(Exception):
|
|
start_time = make_aware(pd.to_datetime(row["start_time"]))
|
|
|
|
if "end_time" in row and pd.notna(row["end_time"]):
|
|
with contextlib.suppress(Exception):
|
|
end_time = make_aware(pd.to_datetime(row["end_time"]))
|
|
pass
|
|
|
|
# Convert boolean fields
|
|
escalated = str(row.get("escalated", "")).lower() in [
|
|
"true",
|
|
"yes",
|
|
"1",
|
|
"t",
|
|
"y",
|
|
]
|
|
forwarded_hr = str(row.get("forwarded_hr", "")).lower() in [
|
|
"true",
|
|
"yes",
|
|
"1",
|
|
"t",
|
|
"y",
|
|
]
|
|
|
|
# Create ChatSession object
|
|
session = ChatSession(
|
|
data_source=data_source,
|
|
session_id=str(row.get("session_id", "")),
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
ip_address=row.get("ip_address") if pd.notna(row.get("ip_address", np.nan)) else None,
|
|
country=str(row.get("country", "")),
|
|
language=str(row.get("language", "")),
|
|
messages_sent=int(row.get("messages_sent", 0)) if pd.notna(row.get("messages_sent", np.nan)) else 0,
|
|
sentiment=str(row.get("sentiment", "")),
|
|
escalated=escalated,
|
|
forwarded_hr=forwarded_hr,
|
|
full_transcript=str(row.get("full_transcript", "")),
|
|
avg_response_time=float(row.get("avg_response_time", 0))
|
|
if pd.notna(row.get("avg_response_time", np.nan))
|
|
else None,
|
|
tokens=int(row.get("tokens", 0)) if pd.notna(row.get("tokens", np.nan)) else 0,
|
|
tokens_eur=float(row.get("tokens_eur", 0)) if pd.notna(row.get("tokens_eur", np.nan)) else None,
|
|
category=str(row.get("category", "")),
|
|
initial_msg=str(row.get("initial_msg", "")),
|
|
user_rating=str(row.get("user_rating", "")),
|
|
)
|
|
session.save()
|
|
|
|
return True, f"Successfully processed {len(df)} records."
|
|
|
|
except Exception as e:
|
|
return False, f"Error processing CSV file: {str(e)}"
|
|
|
|
|
|
def generate_dashboard_data(data_sources):
|
|
"""
|
|
Generate aggregated data for dashboard visualization
|
|
|
|
Args:
|
|
data_sources: QuerySet of DataSource objects
|
|
|
|
Returns:
|
|
dict: Dictionary containing aggregated data for various charts
|
|
"""
|
|
# Get all chat sessions for the selected data sources
|
|
chat_sessions = ChatSession.objects.filter(data_source__in=data_sources)
|
|
|
|
if not chat_sessions.exists():
|
|
return {
|
|
"total_sessions": 0,
|
|
"avg_response_time": 0,
|
|
"total_tokens": 0,
|
|
"total_cost": 0,
|
|
"sentiment_data": [],
|
|
"country_data": [],
|
|
"category_data": [],
|
|
"time_series_data": [],
|
|
}
|
|
|
|
# Basic statistics
|
|
total_sessions = chat_sessions.count()
|
|
avg_response_time = (
|
|
chat_sessions.filter(avg_response_time__isnull=False).aggregate(avg=models.Avg("avg_response_time"))["avg"] or 0
|
|
)
|
|
total_tokens = chat_sessions.aggregate(sum=models.Sum("tokens"))["sum"] or 0
|
|
total_cost = chat_sessions.filter(tokens_eur__isnull=False).aggregate(sum=models.Sum("tokens_eur"))["sum"] or 0
|
|
|
|
# Sentiment distribution
|
|
sentiment_data = (
|
|
chat_sessions.exclude(sentiment="").values("sentiment").annotate(count=models.Count("id")).order_by("-count")
|
|
)
|
|
|
|
# Country distribution
|
|
country_data = (
|
|
chat_sessions.exclude(country="")
|
|
.values("country")
|
|
.annotate(count=models.Count("id"))
|
|
.order_by("-count")[:10] # Top 10 countries
|
|
)
|
|
|
|
# Category distribution
|
|
category_data = (
|
|
chat_sessions.exclude(category="").values("category").annotate(count=models.Count("id")).order_by("-count")
|
|
)
|
|
|
|
# Time series data (sessions per day)
|
|
time_series_query = (
|
|
chat_sessions.filter(start_time__isnull=False)
|
|
.annotate(date=models.functions.TruncDate("start_time"))
|
|
.values("date")
|
|
.annotate(count=models.Count("id"))
|
|
.order_by("date")
|
|
)
|
|
|
|
time_series_data = [
|
|
{"date": entry["date"].strftime("%Y-%m-%d"), "count": entry["count"]} for entry in time_series_query
|
|
]
|
|
|
|
return {
|
|
"total_sessions": total_sessions,
|
|
"avg_response_time": round(avg_response_time, 2),
|
|
"total_tokens": total_tokens,
|
|
"total_cost": round(total_cost, 2),
|
|
"sentiment_data": list(sentiment_data),
|
|
"country_data": list(country_data),
|
|
"category_data": list(category_data),
|
|
"time_series_data": time_series_data,
|
|
}
|