Implement data integration tasks with Celery, including periodic fetching and manual refresh of chat data; add utility functions for data processing and transcript handling; create views and URLs for manual data refresh; establish Redis and Celery configuration; enhance error handling and logging; introduce scripts for data cleanup and fixing dashboard data; update documentation for Redis and Celery setup and troubleshooting.

This commit is contained in:
2025-05-18 13:33:11 +00:00
parent e8f2d2adc2
commit 8bbbb109bd
63 changed files with 4601 additions and 164 deletions

View File

@ -0,0 +1,42 @@
# dashboard_project/scripts/cleanup_duplicates.py
import os
import sys
# Add the project root to sys.path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dashboard_project.settings")
import django # noqa: I001
django.setup()
from dashboard.models import ChatSession # noqa: E402, I001
from django.db.models import Count # noqa: E402
def cleanup_duplicates():
print("Looking for duplicate ChatSessions...")
duplicates = ChatSession.objects.values("session_id", "data_source").annotate(count=Count("id")).filter(count__gt=1)
total_deleted = 0
for dup in duplicates:
session_id = dup["session_id"]
data_source = dup["data_source"]
# Get all ids for this duplicate group, order by id (keep the first, delete the rest)
ids = list(
ChatSession.objects.filter(session_id=session_id, data_source=data_source)
.order_by("id")
.values_list("id", flat=True)
)
# Keep the first, delete the rest
to_delete = ids[1:]
deleted, _ = ChatSession.objects.filter(id__in=to_delete).delete()
total_deleted += deleted
print(f"Removed {deleted} duplicates for session_id={session_id}, data_source={data_source}")
print(f"Done. Total duplicates removed: {total_deleted}")
if __name__ == "__main__":
cleanup_duplicates()