Initial commit

2026-03-03 01:41:28 +01:00 · 2025-05-17 00:57:08 +02:00
commit fe69bdbc94
71 changed files with 6585 additions and 0 deletions
--- a/dashboard_project/dashboard/utils.py
+++ b/dashboard_project/dashboard/utils.py
@@ -0,0 +1,161 @@
+# dashboard/utils.py
+
+import numpy as np
+import pandas as pd
+from django.db import models
+from django.utils.timezone import make_aware
+
+from .models import ChatSession
+
+
+def process_csv_file(data_source):
+    """
+    Process the uploaded CSV file and create ChatSession objects
+
+    Args:
+        data_source: DataSource model instance containing the CSV file
+    """
+    try:
+        # Read the CSV file
+        file_path = data_source.file.path
+        df = pd.read_csv(file_path)
+
+        # Process each row and create ChatSession objects
+        for _, row in df.iterrows():
+            # Handle datetime fields
+            start_time = None
+            end_time = None
+
+            if "start_time" in row and pd.notna(row["start_time"]):
+                try:
+                    start_time = make_aware(pd.to_datetime(row["start_time"]))
+                except Exception:
+                    pass
+
+            if "end_time" in row and pd.notna(row["end_time"]):
+                try:
+                    end_time = make_aware(pd.to_datetime(row["end_time"]))
+                except Exception:
+                    pass
+
+            # Convert boolean fields
+            escalated = str(row.get("escalated", "")).lower() in [
+                "true",
+                "yes",
+                "1",
+                "t",
+                "y",
+            ]
+            forwarded_hr = str(row.get("forwarded_hr", "")).lower() in [
+                "true",
+                "yes",
+                "1",
+                "t",
+                "y",
+            ]
+
+            # Create ChatSession object
+            session = ChatSession(
+                data_source=data_source,
+                session_id=str(row.get("session_id", "")),
+                start_time=start_time,
+                end_time=end_time,
+                ip_address=row.get("ip_address") if pd.notna(row.get("ip_address", np.nan)) else None,
+                country=str(row.get("country", "")),
+                language=str(row.get("language", "")),
+                messages_sent=int(row.get("messages_sent", 0)) if pd.notna(row.get("messages_sent", np.nan)) else 0,
+                sentiment=str(row.get("sentiment", "")),
+                escalated=escalated,
+                forwarded_hr=forwarded_hr,
+                full_transcript=str(row.get("full_transcript", "")),
+                avg_response_time=float(row.get("avg_response_time", 0))
+                if pd.notna(row.get("avg_response_time", np.nan))
+                else None,
+                tokens=int(row.get("tokens", 0)) if pd.notna(row.get("tokens", np.nan)) else 0,
+                tokens_eur=float(row.get("tokens_eur", 0)) if pd.notna(row.get("tokens_eur", np.nan)) else None,
+                category=str(row.get("category", "")),
+                initial_msg=str(row.get("initial_msg", "")),
+                user_rating=str(row.get("user_rating", "")),
+            )
+            session.save()
+
+        return True, f"Successfully processed {len(df)} records."
+
+    except Exception as e:
+        return False, f"Error processing CSV file: {str(e)}"
+
+
+def generate_dashboard_data(data_sources):
+    """
+    Generate aggregated data for dashboard visualization
+
+    Args:
+        data_sources: QuerySet of DataSource objects
+
+    Returns:
+        dict: Dictionary containing aggregated data for various charts
+    """
+    # Get all chat sessions for the selected data sources
+    chat_sessions = ChatSession.objects.filter(data_source__in=data_sources)
+
+    if not chat_sessions.exists():
+        return {
+            "total_sessions": 0,
+            "avg_response_time": 0,
+            "total_tokens": 0,
+            "total_cost": 0,
+            "sentiment_data": [],
+            "country_data": [],
+            "category_data": [],
+            "time_series_data": [],
+        }
+
+    # Basic statistics
+    total_sessions = chat_sessions.count()
+    avg_response_time = (
+        chat_sessions.filter(avg_response_time__isnull=False).aggregate(avg=models.Avg("avg_response_time"))["avg"] or 0
+    )
+    total_tokens = chat_sessions.aggregate(sum=models.Sum("tokens"))["sum"] or 0
+    total_cost = chat_sessions.filter(tokens_eur__isnull=False).aggregate(sum=models.Sum("tokens_eur"))["sum"] or 0
+
+    # Sentiment distribution
+    sentiment_data = (
+        chat_sessions.exclude(sentiment="").values("sentiment").annotate(count=models.Count("id")).order_by("-count")
+    )
+
+    # Country distribution
+    country_data = (
+        chat_sessions.exclude(country="")
+        .values("country")
+        .annotate(count=models.Count("id"))
+        .order_by("-count")[:10]  # Top 10 countries
+    )
+
+    # Category distribution
+    category_data = (
+        chat_sessions.exclude(category="").values("category").annotate(count=models.Count("id")).order_by("-count")
+    )
+
+    # Time series data (sessions per day)
+    time_series_query = (
+        chat_sessions.filter(start_time__isnull=False)
+        .annotate(date=models.functions.TruncDate("start_time"))
+        .values("date")
+        .annotate(count=models.Count("id"))
+        .order_by("date")
+    )
+
+    time_series_data = [
+        {"date": entry["date"].strftime("%Y-%m-%d"), "count": entry["count"]} for entry in time_series_query
+    ]
+
+    return {
+        "total_sessions": total_sessions,
+        "avg_response_time": round(avg_response_time, 2),
+        "total_tokens": total_tokens,
+        "total_cost": round(total_cost, 2),
+        "sentiment_data": list(sentiment_data),
+        "country_data": list(country_data),
+        "category_data": list(category_data),
+        "time_series_data": time_series_data,
+    }