mirror of
https://github.com/kjanat/livegraphs-django.git
synced 2026-01-16 08:42:07 +01:00
Implement data integration tasks with Celery, including periodic fetching and manual refresh of chat data; add utility functions for data processing and transcript handling; create views and URLs for manual data refresh; establish Redis and Celery configuration; enhance error handling and logging; introduce scripts for data cleanup and fixing dashboard data; update documentation for Redis and Celery setup and troubleshooting.
This commit is contained in:
0
dashboard_project/data_integration/__init__.py
Normal file
0
dashboard_project/data_integration/__init__.py
Normal file
125
dashboard_project/data_integration/admin.py
Normal file
125
dashboard_project/data_integration/admin.py
Normal file
@ -0,0 +1,125 @@
|
||||
from django.contrib import admin
|
||||
from django.utils.html import format_html
|
||||
|
||||
from .models import ChatMessage, ChatSession, ExternalDataSource
|
||||
from .tasks import refresh_specific_source
|
||||
|
||||
|
||||
@admin.register(ExternalDataSource)
|
||||
class ExternalDataSourceAdmin(admin.ModelAdmin):
|
||||
list_display = (
|
||||
"name",
|
||||
"api_url",
|
||||
"is_active",
|
||||
"last_synced",
|
||||
"status_badge",
|
||||
"sync_interval",
|
||||
"refresh_action",
|
||||
)
|
||||
list_filter = ("is_active",)
|
||||
search_fields = ("name", "api_url")
|
||||
readonly_fields = ("last_synced", "error_count", "last_error")
|
||||
fieldsets = (
|
||||
(None, {"fields": ("name", "api_url", "is_active")}),
|
||||
(
|
||||
"Authentication",
|
||||
{
|
||||
"fields": ("auth_username", "auth_password"),
|
||||
"description": "Credentials can also be provided via environment variables.",
|
||||
},
|
||||
),
|
||||
("Sync Settings", {"fields": ("sync_interval", "timeout")}),
|
||||
("Status", {"fields": ("last_synced", "error_count", "last_error")}),
|
||||
)
|
||||
|
||||
@admin.display(description="Status")
|
||||
def status_badge(self, obj):
|
||||
"""Display a colored status badge"""
|
||||
status = obj.get_status()
|
||||
if status == "Active":
|
||||
return format_html(
|
||||
'<span style="color: white; background-color: green; padding: 3px 8px; border-radius: 10px;">{}</span>',
|
||||
status,
|
||||
)
|
||||
elif status == "Inactive":
|
||||
return format_html(
|
||||
'<span style="color: white; background-color: gray; padding: 3px 8px; border-radius: 10px;">{}</span>',
|
||||
status,
|
||||
)
|
||||
elif "Error" in status:
|
||||
return format_html(
|
||||
'<span style="color: white; background-color: red; padding: 3px 8px; border-radius: 10px;">{}</span>',
|
||||
status,
|
||||
)
|
||||
else:
|
||||
return format_html(
|
||||
'<span style="color: white; background-color: orange; padding: 3px 8px; border-radius: 10px;">{}</span>',
|
||||
status,
|
||||
)
|
||||
|
||||
@admin.display(description="Actions")
|
||||
def refresh_action(self, obj):
|
||||
"""Button to manually refresh a data source"""
|
||||
if obj.is_active:
|
||||
url = f"/admin/data_integration/externaldatasource/refresh/{obj.id}/"
|
||||
return format_html('<a class="button" href="{}">Refresh Now</a>', url)
|
||||
return "Inactive"
|
||||
|
||||
def refresh_source(self, request, source_id):
|
||||
"""Run a task to refresh the source data"""
|
||||
task = refresh_specific_source.delay(source_id)
|
||||
self.message_user(request, f"Data refresh task started (Task ID: {task.id})")
|
||||
|
||||
def get_urls(self):
|
||||
from django.urls import path
|
||||
|
||||
urls = super().get_urls()
|
||||
custom_urls = [
|
||||
path(
|
||||
"refresh/<int:source_id>/",
|
||||
self.admin_site.admin_view(self.refresh_source),
|
||||
name="data_integration_externaldatasource_refresh",
|
||||
),
|
||||
]
|
||||
return custom_urls + urls
|
||||
|
||||
|
||||
@admin.register(ChatSession)
|
||||
class ChatSessionAdmin(admin.ModelAdmin):
|
||||
list_display = (
|
||||
"session_id",
|
||||
"start_time",
|
||||
"end_time",
|
||||
"country",
|
||||
"language",
|
||||
"messages_sent",
|
||||
"sentiment",
|
||||
)
|
||||
list_filter = ("country", "language", "sentiment")
|
||||
search_fields = ("session_id", "country", "ip_address")
|
||||
readonly_fields = ("session_id",)
|
||||
|
||||
|
||||
@admin.register(ChatMessage)
|
||||
class ChatMessageAdmin(admin.ModelAdmin):
|
||||
list_display = ("session", "sender", "timestamp", "message_preview")
|
||||
list_filter = ("sender", "timestamp")
|
||||
search_fields = ("message", "session__session_id")
|
||||
readonly_fields = ("safe_html_display",)
|
||||
|
||||
@admin.display(description="Message")
|
||||
def message_preview(self, obj):
|
||||
"""Show a preview of the message"""
|
||||
if len(obj.message) > 50:
|
||||
return obj.message[:50] + "..."
|
||||
return obj.message
|
||||
|
||||
@admin.display(description="Sanitized HTML Preview")
|
||||
def safe_html_display(self, obj):
|
||||
"""Display the sanitized HTML"""
|
||||
if obj.safe_html_message:
|
||||
return format_html(
|
||||
'<div style="padding: 10px; border: 1px solid #ccc; background-color: #f9f9f9;">{}</div>',
|
||||
obj.safe_html_message,
|
||||
)
|
||||
return "No HTML content"
|
||||
6
dashboard_project/data_integration/apps.py
Normal file
6
dashboard_project/data_integration/apps.py
Normal file
@ -0,0 +1,6 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class DataIntegrationConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "data_integration"
|
||||
@ -0,0 +1,27 @@
|
||||
from data_integration.models import ExternalDataSource
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Create default external data source configuration"
|
||||
|
||||
def handle(self, *_args, **_options):
|
||||
if not ExternalDataSource.objects.exists():
|
||||
source = ExternalDataSource.objects.create( # nosec: B106
|
||||
name="Notso AI Chat API",
|
||||
api_url="https://HOST/COMPANY/chats",
|
||||
auth_username="DEFAULT_USERNAME", # Will be set via environment variables
|
||||
auth_password="DEFAULT_PASSWORD", # Will be set via environment variables
|
||||
is_active=True,
|
||||
sync_interval=int(self.get_env_var("CHAT_DATA_FETCH_INTERVAL", "3600")),
|
||||
timeout=int(self.get_env_var("FETCH_DATA_TIMEOUT", "300")),
|
||||
)
|
||||
self.stdout.write(self.style.SUCCESS(f"Created default external data source: {source.name}"))
|
||||
else:
|
||||
self.stdout.write(self.style.SUCCESS("External data source already exists, no action taken."))
|
||||
|
||||
def get_env_var(self, name, default):
|
||||
"""Get environment variable or return default"""
|
||||
import os
|
||||
|
||||
return os.environ.get(name, default)
|
||||
@ -0,0 +1,11 @@
|
||||
from data_integration.utils import fetch_and_store_chat_data
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Fetches chat data from the external API and stores it in the database"
|
||||
|
||||
def handle(self, *_args, **_options): # Mark as unused
|
||||
self.stdout.write(self.style.SUCCESS("Starting data fetch..."))
|
||||
fetch_and_store_chat_data()
|
||||
self.stdout.write(self.style.SUCCESS("Successfully fetched and stored chat data."))
|
||||
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Migration Fix Script for ExternalDataSource
|
||||
|
||||
This management command adds the missing fields to ExternalDataSource
|
||||
model directly using SQL, which is useful if Django migrations
|
||||
are having issues.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import connection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Fix missing columns in ExternalDataSource table"
|
||||
|
||||
def handle(self, *args, **options): # noqa: ARG002
|
||||
self.stdout.write("Checking ExternalDataSource schema...")
|
||||
|
||||
# Check if columns exist
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute("PRAGMA table_info(data_integration_externaldatasource)")
|
||||
columns = [col[1] for col in cursor.fetchall()]
|
||||
|
||||
missing_columns = []
|
||||
if "error_count" not in columns:
|
||||
missing_columns.append("error_count")
|
||||
if "last_error" not in columns:
|
||||
missing_columns.append("last_error")
|
||||
if "sync_interval" not in columns:
|
||||
missing_columns.append("sync_interval")
|
||||
if "timeout" not in columns:
|
||||
missing_columns.append("timeout")
|
||||
|
||||
if not missing_columns:
|
||||
self.stdout.write(self.style.SUCCESS("✅ All columns exist in ExternalDataSource table"))
|
||||
return
|
||||
|
||||
self.stdout.write(f"Missing columns: {', '.join(missing_columns)}")
|
||||
self.stdout.write("Adding missing columns...")
|
||||
|
||||
try:
|
||||
# Add missing columns with SQLite
|
||||
for col in missing_columns:
|
||||
if col == "error_count":
|
||||
cursor.execute(
|
||||
"ALTER TABLE data_integration_externaldatasource ADD COLUMN error_count integer DEFAULT 0"
|
||||
)
|
||||
elif col == "last_error":
|
||||
cursor.execute(
|
||||
"ALTER TABLE data_integration_externaldatasource ADD COLUMN last_error varchar(255) NULL"
|
||||
)
|
||||
elif col == "sync_interval":
|
||||
cursor.execute(
|
||||
"ALTER TABLE data_integration_externaldatasource ADD COLUMN sync_interval integer DEFAULT 3600"
|
||||
)
|
||||
elif col == "timeout":
|
||||
cursor.execute(
|
||||
"ALTER TABLE data_integration_externaldatasource ADD COLUMN timeout integer DEFAULT 300"
|
||||
)
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f"✅ Successfully added missing columns: {', '.join(missing_columns)}")
|
||||
)
|
||||
|
||||
# Verify columns were added
|
||||
cursor.execute("PRAGMA table_info(data_integration_externaldatasource)")
|
||||
updated_columns = [col[1] for col in cursor.fetchall()]
|
||||
self.stdout.write(f"Current columns: {', '.join(updated_columns)}")
|
||||
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Error adding columns: {e}"))
|
||||
self.stdout.write(self.style.WARNING("Consider running Django migrations instead:"))
|
||||
self.stdout.write(" python manage.py makemigrations data_integration")
|
||||
self.stdout.write(" python manage.py migrate data_integration")
|
||||
@ -0,0 +1,47 @@
|
||||
import logging
|
||||
|
||||
from data_integration.tasks import test_task
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.utils import timezone
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Test Celery configuration by executing a simple task"
|
||||
|
||||
def handle(self, *args, **options): # noqa: ARG002
|
||||
self.stdout.write(f"Testing Celery configuration at {timezone.now()}")
|
||||
|
||||
try:
|
||||
# Run the test task
|
||||
self.stdout.write("Submitting test task to Celery...")
|
||||
result = test_task.delay()
|
||||
task_id = result.id
|
||||
|
||||
self.stdout.write(f"Task submitted with ID: {task_id}")
|
||||
self.stdout.write("Waiting for task result (this may take a few seconds)...")
|
||||
|
||||
# Try to get the result with a timeout
|
||||
try:
|
||||
task_result = result.get(timeout=10) # 10 second timeout
|
||||
self.stdout.write(self.style.SUCCESS(f"✅ Task completed successfully with result: {task_result}"))
|
||||
return
|
||||
except TimeoutError:
|
||||
self.stdout.write(
|
||||
self.style.WARNING(
|
||||
"⚠️ Task did not complete within the timeout period. "
|
||||
"This might be normal if Celery worker isn't running."
|
||||
)
|
||||
)
|
||||
|
||||
self.stdout.write(
|
||||
"To check task status, run Celery worker in another terminal with:\n"
|
||||
" make celery\n"
|
||||
f"And then check status of task {task_id}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Error testing Celery: {e}"))
|
||||
self.stdout.write("Make sure the Celery broker (Redis or SQLite) is properly configured.")
|
||||
self.stdout.write("To start Celery, run:\n make celery")
|
||||
@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Test the ExternalDataSource Model Schema
|
||||
|
||||
This management command tests if the ExternalDataSource schema has been correctly updated.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from data_integration.models import ExternalDataSource
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Test ExternalDataSource model fields"
|
||||
|
||||
def handle(self, *args, **options): # noqa: ARG002
|
||||
self.stdout.write("Testing ExternalDataSource schema...")
|
||||
|
||||
try:
|
||||
# Get or create a test source
|
||||
source, created = ExternalDataSource.objects.get_or_create(
|
||||
name="Test Source",
|
||||
defaults={
|
||||
"api_url": "https://example.com/api",
|
||||
"is_active": False,
|
||||
},
|
||||
)
|
||||
|
||||
if created:
|
||||
self.stdout.write(f"Created test source with ID: {source.id}")
|
||||
else:
|
||||
self.stdout.write(f"Using existing test source with ID: {source.id}")
|
||||
|
||||
# Test setting each field
|
||||
fields_to_test = {
|
||||
"error_count": 0,
|
||||
"last_error": "Test error message",
|
||||
"sync_interval": 7200,
|
||||
"timeout": 600,
|
||||
}
|
||||
|
||||
for field, value in fields_to_test.items():
|
||||
try:
|
||||
setattr(source, field, value)
|
||||
self.stdout.write(self.style.SUCCESS(f"✅ Successfully set {field} = {value}"))
|
||||
except AttributeError:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Field {field} doesn't exist on the model"))
|
||||
|
||||
try:
|
||||
source.save()
|
||||
self.stdout.write(self.style.SUCCESS("✅ Successfully saved with all fields"))
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Error saving model: {e}"))
|
||||
|
||||
# Read back the values to verify
|
||||
refreshed_source = ExternalDataSource.objects.get(id=source.id)
|
||||
self.stdout.write("\nVerifying saved values:")
|
||||
for field, expected_value in fields_to_test.items():
|
||||
actual_value = getattr(refreshed_source, field, "MISSING")
|
||||
if actual_value == expected_value:
|
||||
self.stdout.write(self.style.SUCCESS(f"✅ {field} = {actual_value} (correct)"))
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR(f"❌ {field} = {actual_value} (expected: {expected_value})"))
|
||||
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Test failed: {e}"))
|
||||
@ -0,0 +1,117 @@
|
||||
import bleach
|
||||
from bleach.css_sanitizer import CSSSanitizer
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Test the HTML sanitizer with CSS Sanitizer"
|
||||
|
||||
def handle(self, *args, **options): # noqa: ARG002
|
||||
# Create a test HTML string with various style attributes
|
||||
test_html = """
|
||||
<div style="color: red; background-color: yellow; transform: rotate(30deg);">
|
||||
<p style="font-size: 16px; margin: 10px;">
|
||||
This is a <span style="font-weight: bold; color: blue;">styled</span> paragraph.
|
||||
</p>
|
||||
<script>alert('XSS attack');</script>
|
||||
<a href="javascript:alert('Evil');" style="text-decoration: none;">Dangerous Link</a>
|
||||
<img src="x" onerror="alert('XSS')" style="border: 1px solid red;">
|
||||
</div>
|
||||
"""
|
||||
|
||||
# Create CSS sanitizer with allowed properties
|
||||
css_sanitizer = CSSSanitizer(
|
||||
allowed_css_properties=[
|
||||
"color",
|
||||
"background-color",
|
||||
"font-family",
|
||||
"font-size",
|
||||
"font-weight",
|
||||
"font-style",
|
||||
"text-decoration",
|
||||
"text-align",
|
||||
"margin",
|
||||
"margin-left",
|
||||
"margin-right",
|
||||
"margin-top",
|
||||
"margin-bottom",
|
||||
"padding",
|
||||
"padding-left",
|
||||
"padding-right",
|
||||
"padding-top",
|
||||
"padding-bottom",
|
||||
"border",
|
||||
"border-radius",
|
||||
"width",
|
||||
"height",
|
||||
"line-height",
|
||||
]
|
||||
)
|
||||
|
||||
# Clean the HTML
|
||||
cleaned_html = bleach.clean(
|
||||
test_html,
|
||||
tags=[
|
||||
"b",
|
||||
"i",
|
||||
"u",
|
||||
"em",
|
||||
"strong",
|
||||
"a",
|
||||
"br",
|
||||
"p",
|
||||
"ul",
|
||||
"ol",
|
||||
"li",
|
||||
"span",
|
||||
"div",
|
||||
"pre",
|
||||
"code",
|
||||
"blockquote",
|
||||
],
|
||||
attributes={
|
||||
"a": ["href", "title", "target"],
|
||||
"span": ["style", "class"],
|
||||
"div": ["style", "class"],
|
||||
"p": ["style", "class"],
|
||||
"pre": ["style", "class"],
|
||||
},
|
||||
css_sanitizer=css_sanitizer,
|
||||
strip=True,
|
||||
)
|
||||
|
||||
# Print the results
|
||||
self.stdout.write(self.style.SUCCESS("Original HTML:"))
|
||||
self.stdout.write(test_html)
|
||||
self.stdout.write("\n\n")
|
||||
self.stdout.write(self.style.SUCCESS("Cleaned HTML:"))
|
||||
self.stdout.write(cleaned_html)
|
||||
self.stdout.write("\n\n")
|
||||
|
||||
# Check if unsafe attributes and styles were removed
|
||||
self.stdout.write(self.style.SUCCESS("Security Checks:"))
|
||||
|
||||
if "script" not in cleaned_html:
|
||||
self.stdout.write(self.style.SUCCESS("✓ Script tags removed"))
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR("✗ Script tags found"))
|
||||
|
||||
if "javascript:" not in cleaned_html:
|
||||
self.stdout.write(self.style.SUCCESS("✓ JavaScript URLs removed"))
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR("✗ JavaScript URLs found"))
|
||||
|
||||
if "onerror" not in cleaned_html:
|
||||
self.stdout.write(self.style.SUCCESS("✓ Event handlers removed"))
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR("✗ Event handlers found"))
|
||||
|
||||
if "transform" not in cleaned_html:
|
||||
self.stdout.write(self.style.SUCCESS("✓ Unsafe CSS properties removed"))
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR("✗ Unsafe CSS properties found"))
|
||||
|
||||
if "img" not in cleaned_html:
|
||||
self.stdout.write(self.style.SUCCESS("✓ Unsupported tags removed"))
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR("✗ Unsupported tags found"))
|
||||
@ -0,0 +1,68 @@
|
||||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Test Redis connection for Celery"
|
||||
|
||||
def handle(self, *args, **options): # noqa: ARG002
|
||||
self.stdout.write("Testing Redis connection...")
|
||||
|
||||
try:
|
||||
import redis
|
||||
|
||||
# Get Redis configuration from settings
|
||||
redis_host = getattr(settings, "REDIS_HOST", "localhost")
|
||||
redis_port = int(getattr(settings, "REDIS_PORT", 6379))
|
||||
redis_db = int(getattr(settings, "REDIS_DB", 0))
|
||||
|
||||
# Override from environment if set
|
||||
import os
|
||||
|
||||
if "REDIS_URL" in os.environ:
|
||||
self.stdout.write(f"REDIS_URL environment variable found: {os.environ['REDIS_URL']}")
|
||||
|
||||
# Try to connect and ping
|
||||
redis_client = redis.Redis(host=redis_host, port=redis_port, db=redis_db, socket_connect_timeout=2)
|
||||
|
||||
ping_result = redis_client.ping()
|
||||
|
||||
if ping_result:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"✅ Redis connection successful! Connected to {redis_host}:{redis_port}/{redis_db}"
|
||||
)
|
||||
)
|
||||
self.stdout.write(f"Broker URL: {settings.CELERY_BROKER_URL}")
|
||||
self.stdout.write(f"Result backend: {settings.CELERY_RESULT_BACKEND}")
|
||||
|
||||
# Try to set and get a value
|
||||
test_key = "test_redis_connection"
|
||||
test_value = "success"
|
||||
redis_client.set(test_key, test_value)
|
||||
retrieved_value = redis_client.get(test_key)
|
||||
|
||||
if retrieved_value and retrieved_value.decode() == test_value:
|
||||
self.stdout.write(self.style.SUCCESS("✅ Redis SET/GET test passed!"))
|
||||
else:
|
||||
self.stdout.write(
|
||||
self.style.WARNING(
|
||||
f"⚠️ Redis SET/GET test failed: Got {retrieved_value} instead of {test_value}"
|
||||
)
|
||||
)
|
||||
|
||||
# Clean up
|
||||
redis_client.delete(test_key)
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR("❌ Redis ping failed!"))
|
||||
except redis.exceptions.ConnectionError as e:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Redis connection error: {e}"))
|
||||
self.stdout.write("Celery will use SQLite fallback if configured.")
|
||||
except ImportError:
|
||||
self.stdout.write(self.style.ERROR("❌ Redis package not installed. Install with: pip install redis"))
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Error: {e}"))
|
||||
@ -0,0 +1,99 @@
|
||||
# Generated by Django 5.2.1 on 2025-05-17 21:14
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
initial = True
|
||||
|
||||
dependencies = []
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="ChatSession",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("session_id", models.CharField(max_length=255, unique=True)),
|
||||
("start_time", models.DateTimeField()),
|
||||
("end_time", models.DateTimeField()),
|
||||
("ip_address", models.GenericIPAddressField(blank=True, null=True)),
|
||||
("country", models.CharField(blank=True, max_length=255, null=True)),
|
||||
("language", models.CharField(blank=True, max_length=255, null=True)),
|
||||
("messages_sent", models.IntegerField(blank=True, null=True)),
|
||||
("sentiment", models.CharField(blank=True, max_length=255, null=True)),
|
||||
("escalated", models.BooleanField(blank=True, null=True)),
|
||||
("forwarded_hr", models.BooleanField(blank=True, null=True)),
|
||||
(
|
||||
"full_transcript_url",
|
||||
models.URLField(blank=True, max_length=1024, null=True),
|
||||
),
|
||||
("avg_response_time", models.FloatField(blank=True, null=True)),
|
||||
("tokens", models.IntegerField(blank=True, null=True)),
|
||||
("tokens_eur", models.FloatField(blank=True, null=True)),
|
||||
("category", models.CharField(blank=True, max_length=255, null=True)),
|
||||
("initial_msg", models.TextField(blank=True, null=True)),
|
||||
("user_rating", models.IntegerField(blank=True, null=True)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="ExternalDataSource",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("name", models.CharField(default="External API", max_length=255)),
|
||||
("api_url", models.URLField(default="https://proto.notso.ai/XY/chats")),
|
||||
(
|
||||
"auth_username",
|
||||
models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
(
|
||||
"auth_password",
|
||||
models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
("last_synced", models.DateTimeField(blank=True, null=True)),
|
||||
("is_active", models.BooleanField(default=True)),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="ChatMessage",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("timestamp", models.DateTimeField(auto_now_add=True)),
|
||||
("sender", models.CharField(max_length=255)),
|
||||
("message", models.TextField()),
|
||||
("safe_html_message", models.TextField(blank=True, null=True)),
|
||||
(
|
||||
"session",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="messages",
|
||||
to="data_integration.chatsession",
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
||||
@ -0,0 +1,43 @@
|
||||
# Generated by Django 5.2.1 on 2025-05-17 22:33
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("data_integration", "0001_initial"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="externaldatasource",
|
||||
name="error_count",
|
||||
field=models.IntegerField(default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="externaldatasource",
|
||||
name="last_error",
|
||||
field=models.CharField(blank=True, max_length=255, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="externaldatasource",
|
||||
name="sync_interval",
|
||||
field=models.IntegerField(
|
||||
default=3600,
|
||||
help_text="Sync interval in seconds. Default is 3600 (1 hour)",
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="externaldatasource",
|
||||
name="timeout",
|
||||
field=models.IntegerField(
|
||||
default=300,
|
||||
help_text="Timeout in seconds for each sync operation. Default is 300 (5 minutes)",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name="externaldatasource",
|
||||
name="api_url",
|
||||
field=models.URLField(default="https://proto.notso.ai/jumbo/chats"),
|
||||
),
|
||||
]
|
||||
78
dashboard_project/data_integration/models.py
Normal file
78
dashboard_project/data_integration/models.py
Normal file
@ -0,0 +1,78 @@
|
||||
import os
|
||||
|
||||
from django.db import models
|
||||
|
||||
|
||||
class ChatSession(models.Model):
|
||||
session_id = models.CharField(max_length=255, unique=True)
|
||||
start_time = models.DateTimeField()
|
||||
end_time = models.DateTimeField()
|
||||
ip_address = models.GenericIPAddressField(null=True, blank=True)
|
||||
country = models.CharField(max_length=255, null=True, blank=True)
|
||||
language = models.CharField(max_length=255, null=True, blank=True)
|
||||
messages_sent = models.IntegerField(null=True, blank=True)
|
||||
sentiment = models.CharField(max_length=255, null=True, blank=True)
|
||||
escalated = models.BooleanField(null=True, blank=True)
|
||||
forwarded_hr = models.BooleanField(null=True, blank=True)
|
||||
full_transcript_url = models.URLField(max_length=1024, null=True, blank=True)
|
||||
avg_response_time = models.FloatField(null=True, blank=True)
|
||||
tokens = models.IntegerField(null=True, blank=True)
|
||||
tokens_eur = models.FloatField(null=True, blank=True)
|
||||
category = models.CharField(max_length=255, null=True, blank=True)
|
||||
initial_msg = models.TextField(null=True, blank=True)
|
||||
user_rating = models.IntegerField(null=True, blank=True)
|
||||
|
||||
def __str__(self):
|
||||
return self.session_id
|
||||
|
||||
|
||||
class ChatMessage(models.Model):
|
||||
session = models.ForeignKey(ChatSession, related_name="messages", on_delete=models.CASCADE)
|
||||
timestamp = models.DateTimeField(auto_now_add=True) # Changed to auto_now_add for simplicity
|
||||
sender = models.CharField(max_length=255) # "User" or "Assistant"
|
||||
message = models.TextField()
|
||||
safe_html_message = models.TextField(blank=True, null=True) # For storing sanitized HTML
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.session.session_id} - {self.sender} at {self.timestamp}"
|
||||
|
||||
|
||||
class ExternalDataSource(models.Model):
|
||||
name = models.CharField(max_length=255, default="External API")
|
||||
api_url = models.URLField(default="https://proto.notso.ai/jumbo/chats")
|
||||
auth_username = models.CharField(max_length=255, blank=True, null=True)
|
||||
auth_password = models.CharField(
|
||||
max_length=255, blank=True, null=True
|
||||
) # Consider using a more secure way to store credentials
|
||||
last_synced = models.DateTimeField(null=True, blank=True)
|
||||
is_active = models.BooleanField(default=True)
|
||||
error_count = models.IntegerField(default=0)
|
||||
last_error = models.CharField(max_length=255, blank=True, null=True)
|
||||
sync_interval = models.IntegerField(default=3600, help_text="Sync interval in seconds. Default is 3600 (1 hour)")
|
||||
timeout = models.IntegerField(
|
||||
default=300,
|
||||
help_text="Timeout in seconds for each sync operation. Default is 300 (5 minutes)",
|
||||
)
|
||||
|
||||
def get_auth_username(self):
|
||||
"""Get username from environment variable if set, otherwise use stored value"""
|
||||
env_username = os.environ.get("EXTERNAL_API_USERNAME")
|
||||
return env_username if env_username else self.auth_username
|
||||
|
||||
def get_auth_password(self):
|
||||
"""Get password from environment variable if set, otherwise use stored value"""
|
||||
env_password = os.environ.get("EXTERNAL_API_PASSWORD")
|
||||
return env_password if env_password else self.auth_password
|
||||
|
||||
def get_status(self):
|
||||
"""Get the status of this data source"""
|
||||
if not self.is_active:
|
||||
return "Inactive"
|
||||
if not self.last_synced:
|
||||
return "Never synced"
|
||||
if self.error_count > 0:
|
||||
return f"Error ({self.error_count})"
|
||||
return "Active"
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
116
dashboard_project/data_integration/tasks.py
Normal file
116
dashboard_project/data_integration/tasks.py
Normal file
@ -0,0 +1,116 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
from celery import shared_task
|
||||
from django.db import utils as django_db_utils
|
||||
from django.utils import timezone
|
||||
|
||||
from .models import ExternalDataSource
|
||||
from .utils import fetch_and_store_chat_data
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@shared_task(name="data_integration.tasks.test_task", bind=True)
|
||||
def test_task(self):
|
||||
"""A simple test task to verify Celery is working without external dependencies."""
|
||||
logger.info("Test task executed at %s (task_id: %s)", timezone.now(), self.request.id)
|
||||
return "Test task completed successfully!"
|
||||
|
||||
|
||||
@shared_task(
|
||||
name="data_integration.tasks.periodic_fetch_chat_data",
|
||||
bind=True,
|
||||
autoretry_for=(Exception,),
|
||||
retry_kwargs={"max_retries": 3, "countdown": 60},
|
||||
soft_time_limit=int(os.environ.get("FETCH_DATA_TIMEOUT", 300)), # 5 minutes default
|
||||
)
|
||||
def periodic_fetch_chat_data(self):
|
||||
"""Periodically fetch and process chat data from external sources.
|
||||
|
||||
This task:
|
||||
1. Fetches data from all active external data sources
|
||||
2. Processes and stores the data in the database
|
||||
3. Updates the last_synced timestamp on each source
|
||||
4. Handles errors with retries
|
||||
"""
|
||||
logger.info("Starting periodic chat data fetch (task_id: %s)...", self.request.id)
|
||||
try:
|
||||
# Get all active data sources
|
||||
active_sources = ExternalDataSource.objects.filter(is_active=True)
|
||||
|
||||
if not active_sources.exists():
|
||||
logger.warning("No active external data sources found. Skipping fetch.")
|
||||
return "No active data sources found"
|
||||
|
||||
successful_sources = []
|
||||
failed_sources = []
|
||||
|
||||
for source in active_sources:
|
||||
try:
|
||||
logger.info(f"Processing source: {source.name} (ID: {source.id})")
|
||||
fetch_and_store_chat_data(source_id=source.id)
|
||||
source.last_synced = timezone.now()
|
||||
# Check if error_count field exists in the model
|
||||
update_fields = ["last_synced"]
|
||||
try:
|
||||
source.error_count = 0
|
||||
source.last_error = None
|
||||
update_fields.extend(["error_count", "last_error"])
|
||||
except AttributeError:
|
||||
# Fields might not exist yet if migrations haven't been applied
|
||||
logger.warning("New fields not available. Run migrations to enable error tracking.")
|
||||
source.save(update_fields=update_fields)
|
||||
successful_sources.append(source.name)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching data from source {source.name}: {e}", exc_info=True)
|
||||
try:
|
||||
source.error_count = getattr(source, "error_count", 0) + 1
|
||||
source.last_error = str(e)[:255] # Truncate to fit in the field
|
||||
source.save(update_fields=["error_count", "last_error"])
|
||||
except (AttributeError, django_db_utils.OperationalError):
|
||||
# If fields don't exist, just update last_synced
|
||||
logger.warning("Could not update error fields. Run migrations to enable error tracking.")
|
||||
source.last_synced = timezone.now()
|
||||
source.save(update_fields=["last_synced"])
|
||||
failed_sources.append(source.name)
|
||||
|
||||
if failed_sources and not successful_sources:
|
||||
# If all sources failed, we should raise an exception to trigger retry
|
||||
raise Exception(f"All data sources failed: {', '.join(failed_sources)}")
|
||||
|
||||
result_message = f"Completed: {len(successful_sources)} successful, {len(failed_sources)} failed"
|
||||
logger.info(result_message)
|
||||
return result_message
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during periodic chat data fetch: {e}", exc_info=True)
|
||||
raise # Re-raise to trigger Celery retry
|
||||
|
||||
|
||||
@shared_task(name="data_integration.tasks.refresh_specific_source", bind=True)
|
||||
def refresh_specific_source(self, source_id):
|
||||
"""Manually refresh a specific data source.
|
||||
|
||||
Args:
|
||||
source_id: ID of the ExternalDataSource to refresh
|
||||
"""
|
||||
logger.info(f"Starting manual refresh of data source ID: {source_id} (task_id: {self.request.id})")
|
||||
try:
|
||||
source = ExternalDataSource.objects.get(id=source_id)
|
||||
fetch_and_store_chat_data(source_id=source_id)
|
||||
source.last_synced = timezone.now()
|
||||
source.error_count = 0
|
||||
source.last_error = None
|
||||
source.save(update_fields=["last_synced", "error_count", "last_error"])
|
||||
logger.info(f"Manual refresh of data source {source.name} completed successfully")
|
||||
return f"Successfully refreshed data source: {source.name}"
|
||||
except ExternalDataSource.DoesNotExist:
|
||||
logger.error(f"Data source with ID {source_id} does not exist")
|
||||
return f"Error: Data source with ID {source_id} does not exist"
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error during manual refresh of data source {source_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
return f"Error: {str(e)}"
|
||||
1
dashboard_project/data_integration/tests.py
Normal file
1
dashboard_project/data_integration/tests.py
Normal file
@ -0,0 +1 @@
|
||||
# Create your tests here.
|
||||
14
dashboard_project/data_integration/urls.py
Normal file
14
dashboard_project/data_integration/urls.py
Normal file
@ -0,0 +1,14 @@
|
||||
from django.urls import path
|
||||
|
||||
from . import views
|
||||
|
||||
app_name = "data_integration"
|
||||
|
||||
urlpatterns = [
|
||||
path("manual-refresh/", views.manual_data_refresh, name="manual_data_refresh"),
|
||||
path(
|
||||
"refresh/<int:source_id>/",
|
||||
views.refresh_specific_datasource,
|
||||
name="refresh_specific_datasource",
|
||||
),
|
||||
]
|
||||
340
dashboard_project/data_integration/utils.py
Normal file
340
dashboard_project/data_integration/utils.py
Normal file
@ -0,0 +1,340 @@
|
||||
import csv
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
import bleach
|
||||
import requests
|
||||
from bleach.css_sanitizer import CSSSanitizer
|
||||
from django.utils.timezone import make_aware
|
||||
|
||||
from .models import ChatMessage, ChatSession, ExternalDataSource
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
EXPECTED_HEADERS = [
|
||||
"session_id",
|
||||
"start_time",
|
||||
"end_time",
|
||||
"ip_address",
|
||||
"country",
|
||||
"language",
|
||||
"messages_sent",
|
||||
"sentiment",
|
||||
"escalated",
|
||||
"forwarded_hr",
|
||||
"full_transcript",
|
||||
"avg_response_time",
|
||||
"tokens",
|
||||
"tokens_eur",
|
||||
"category",
|
||||
"initial_msg",
|
||||
"user_rating",
|
||||
]
|
||||
|
||||
|
||||
def fetch_and_store_chat_data(source_id=None):
|
||||
"""Fetch chat data from an external API and store it in the database.
|
||||
|
||||
Args:
|
||||
source_id: Optional ID of specific ExternalDataSource to use.
|
||||
If None, will use the first active source.
|
||||
|
||||
Returns:
|
||||
dict: Stats about the operation (sessions created, updated, errors)
|
||||
"""
|
||||
if source_id:
|
||||
source = ExternalDataSource.objects.filter(id=source_id, is_active=True).first()
|
||||
if not source:
|
||||
logger.error(f"Data source with ID {source_id} not found or not active.")
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Data source with ID {source_id} not found or not active.",
|
||||
}
|
||||
else:
|
||||
source = ExternalDataSource.objects.filter(is_active=True).first()
|
||||
if not source:
|
||||
logger.warning("No active data source found.")
|
||||
return {"success": False, "error": "No active data source found."}
|
||||
|
||||
stats = {
|
||||
"sessions_created": 0,
|
||||
"sessions_updated": 0,
|
||||
"transcripts_processed": 0,
|
||||
"errors": 0,
|
||||
"success": True,
|
||||
}
|
||||
|
||||
try:
|
||||
# Fetch data from API with timeout from source settings or default
|
||||
timeout = getattr(source, "timeout", 30)
|
||||
response = requests.get(
|
||||
source.api_url,
|
||||
auth=((source.get_auth_username(), source.get_auth_password()) if source.get_auth_username() else None),
|
||||
timeout=timeout,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException as e:
|
||||
error_msg = f"Error fetching data from API {source.api_url}: {e}"
|
||||
logger.error(error_msg)
|
||||
return {"success": False, "error": error_msg}
|
||||
|
||||
# Process CSV data
|
||||
csv_data = response.content.decode("utf-8").splitlines()
|
||||
reader = csv.reader(csv_data)
|
||||
# Skip header if present, or use predefined if not
|
||||
# header = next(reader) # Assuming the first row is a header
|
||||
# For this specific case, we know the header is missing.
|
||||
header = EXPECTED_HEADERS
|
||||
|
||||
for row in reader:
|
||||
if not row: # Skip empty rows
|
||||
continue
|
||||
try:
|
||||
# Fix for zip() argument mismatch: pad the row with empty strings if needed
|
||||
padded_row = row + [""] * (len(header) - len(row))
|
||||
data = dict(zip(header, padded_row, strict=False))
|
||||
|
||||
try:
|
||||
# Try European date format (DD.MM.YYYY) first
|
||||
start_time = make_aware(datetime.strptime(data["start_time"], "%d.%m.%Y %H:%M:%S"))
|
||||
except ValueError:
|
||||
# Fallback to ISO format (YYYY-MM-DD)
|
||||
start_time = make_aware(datetime.strptime(data["start_time"], "%Y-%m-%d %H:%M:%S"))
|
||||
|
||||
try:
|
||||
# Try European date format (DD.MM.YYYY) first
|
||||
end_time = make_aware(datetime.strptime(data["end_time"], "%d.%m.%Y %H:%M:%S"))
|
||||
except ValueError:
|
||||
# Fallback to ISO format (YYYY-MM-DD)
|
||||
end_time = make_aware(datetime.strptime(data["end_time"], "%Y-%m-%d %H:%M:%S"))
|
||||
|
||||
messages_sent = int(data["messages_sent"]) if data["messages_sent"] else None
|
||||
escalated = data["escalated"].lower() == "true" if data["escalated"] else None
|
||||
forwarded_hr = data["forwarded_hr"].lower() == "true" if data["forwarded_hr"] else None
|
||||
avg_response_time = float(data["avg_response_time"]) if data["avg_response_time"] else None
|
||||
tokens = int(data["tokens"]) if data["tokens"] else None
|
||||
tokens_eur = float(data["tokens_eur"]) if data["tokens_eur"] else None
|
||||
user_rating = int(data["user_rating"]) if data["user_rating"] and data["user_rating"].isdigit() else None
|
||||
|
||||
session, created = ChatSession.objects.update_or_create(
|
||||
session_id=data["session_id"],
|
||||
defaults={
|
||||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
"ip_address": data.get("ip_address"),
|
||||
"country": data.get("country"),
|
||||
"language": data.get("language"),
|
||||
"messages_sent": messages_sent,
|
||||
"sentiment": data.get("sentiment"),
|
||||
"escalated": escalated,
|
||||
"forwarded_hr": forwarded_hr,
|
||||
"full_transcript_url": data.get("full_transcript"),
|
||||
"avg_response_time": avg_response_time,
|
||||
"tokens": tokens,
|
||||
"tokens_eur": tokens_eur,
|
||||
"category": data.get("category"),
|
||||
"initial_msg": data.get("initial_msg"),
|
||||
"user_rating": user_rating,
|
||||
},
|
||||
)
|
||||
|
||||
if created:
|
||||
stats["sessions_created"] += 1
|
||||
logger.info(f"Created session: {session.session_id}")
|
||||
else:
|
||||
stats["sessions_updated"] += 1
|
||||
logger.info(f"Updated session: {session.session_id}")
|
||||
|
||||
# Fetch and process transcript if URL is present
|
||||
if session.full_transcript_url:
|
||||
transcript_result = fetch_and_store_transcript(session, timeout)
|
||||
if transcript_result["success"]:
|
||||
stats["transcripts_processed"] += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing row: {row}. Error: {e}", exc_info=True)
|
||||
stats["errors"] += 1
|
||||
continue
|
||||
|
||||
source.last_synced = make_aware(datetime.now())
|
||||
source.save()
|
||||
logger.info("Data sync complete. Stats: {stats}")
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def fetch_and_store_transcript(session, timeout=30):
|
||||
"""Fetch and process transcript for a chat session.
|
||||
|
||||
Args:
|
||||
session: The ChatSession object
|
||||
timeout: Timeout in seconds for the request
|
||||
|
||||
Returns:
|
||||
dict: Result of the operation
|
||||
"""
|
||||
result = {"success": False, "messages_created": 0, "error": None}
|
||||
|
||||
try:
|
||||
transcript_response = requests.get(session.full_transcript_url, timeout=timeout)
|
||||
transcript_response.raise_for_status()
|
||||
transcript_content = transcript_response.content.decode("utf-8")
|
||||
messages_created = parse_and_store_transcript_messages(session, transcript_content)
|
||||
|
||||
result["success"] = True
|
||||
result["messages_created"] = messages_created
|
||||
return result
|
||||
except requests.RequestException as e:
|
||||
error_msg = f"Error fetching transcript for session {session.session_id}: {e}"
|
||||
logger.error(error_msg)
|
||||
result["error"] = error_msg
|
||||
return result
|
||||
except Exception as e:
|
||||
error_msg = f"Error processing transcript for session {session.session_id}: {e}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
result["error"] = error_msg
|
||||
return result
|
||||
|
||||
|
||||
def parse_and_store_transcript_messages(session, transcript_content):
|
||||
"""Parse and store messages from a transcript.
|
||||
|
||||
Args:
|
||||
session: The ChatSession object
|
||||
transcript_content: The raw transcript content
|
||||
|
||||
Returns:
|
||||
int: Number of messages created
|
||||
"""
|
||||
lines = transcript_content.splitlines()
|
||||
current_sender = None
|
||||
current_message_lines = []
|
||||
messages_created = 0
|
||||
|
||||
# First, delete existing messages for this session to avoid duplicates
|
||||
existing_count = ChatMessage.objects.filter(session=session).count()
|
||||
if existing_count > 0:
|
||||
logger.info(f"Deleting {existing_count} existing messages for session {session.session_id}")
|
||||
ChatMessage.objects.filter(session=session).delete()
|
||||
|
||||
for line in lines:
|
||||
if line.startswith("User:"):
|
||||
if (
|
||||
current_sender
|
||||
and current_message_lines
|
||||
and save_message(session, current_sender, "\n".join(current_message_lines))
|
||||
):
|
||||
messages_created += 1
|
||||
current_sender = "User"
|
||||
current_message_lines = [line.replace("User:", "").strip()]
|
||||
elif line.startswith("Assistant:"):
|
||||
if (
|
||||
current_sender
|
||||
and current_message_lines
|
||||
and save_message(session, current_sender, "\n".join(current_message_lines))
|
||||
):
|
||||
messages_created += 1
|
||||
current_sender = "Assistant"
|
||||
current_message_lines = [line.replace("Assistant:", "").strip()]
|
||||
elif current_sender:
|
||||
current_message_lines.append(line.strip())
|
||||
|
||||
# Save the last message
|
||||
if (
|
||||
current_sender
|
||||
and current_message_lines
|
||||
and save_message(session, current_sender, "\n".join(current_message_lines))
|
||||
):
|
||||
messages_created += 1
|
||||
|
||||
logger.info(f"Created {messages_created} messages for session {session.session_id}")
|
||||
return messages_created
|
||||
|
||||
|
||||
def save_message(session, sender, message_text):
|
||||
"""Save a message for a chat session.
|
||||
|
||||
Args:
|
||||
session: The ChatSession object
|
||||
sender: The sender of the message ("User" or "Assistant")
|
||||
message_text: The message text, which may contain HTML
|
||||
|
||||
Returns:
|
||||
bool: True if message was created, False otherwise
|
||||
"""
|
||||
if not message_text.strip():
|
||||
return False
|
||||
|
||||
try:
|
||||
# Create a CSS sanitizer with allowed CSS properties
|
||||
css_sanitizer = CSSSanitizer(
|
||||
allowed_css_properties=[
|
||||
"color",
|
||||
"background-color",
|
||||
"font-family",
|
||||
"font-size",
|
||||
"font-weight",
|
||||
"font-style",
|
||||
"text-decoration",
|
||||
"text-align",
|
||||
"margin",
|
||||
"margin-left",
|
||||
"margin-right",
|
||||
"margin-top",
|
||||
"margin-bottom",
|
||||
"padding",
|
||||
"padding-left",
|
||||
"padding-right",
|
||||
"padding-top",
|
||||
"padding-bottom",
|
||||
"border",
|
||||
"border-radius",
|
||||
"width",
|
||||
"height",
|
||||
"line-height",
|
||||
]
|
||||
)
|
||||
|
||||
# Sanitize HTML content before saving if necessary
|
||||
safe_html = bleach.clean(
|
||||
message_text,
|
||||
tags=[
|
||||
"b",
|
||||
"i",
|
||||
"u",
|
||||
"em",
|
||||
"strong",
|
||||
"a",
|
||||
"br",
|
||||
"p",
|
||||
"ul",
|
||||
"ol",
|
||||
"li",
|
||||
"span",
|
||||
"div",
|
||||
"pre",
|
||||
"code",
|
||||
"blockquote",
|
||||
],
|
||||
attributes={
|
||||
"a": ["href", "title", "target"],
|
||||
"span": ["style", "class"],
|
||||
"div": ["style", "class"],
|
||||
"p": ["style", "class"],
|
||||
"pre": ["style", "class"],
|
||||
},
|
||||
css_sanitizer=css_sanitizer,
|
||||
strip=True,
|
||||
)
|
||||
|
||||
ChatMessage.objects.create(
|
||||
session=session,
|
||||
sender=sender,
|
||||
message=message_text,
|
||||
safe_html_message=safe_html,
|
||||
)
|
||||
logger.debug(f"Stored message for session {session.session_id} from {sender}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving message for session {session.session_id}: {e}", exc_info=True)
|
||||
return False
|
||||
54
dashboard_project/data_integration/views.py
Normal file
54
dashboard_project/data_integration/views.py
Normal file
@ -0,0 +1,54 @@
|
||||
from django.contrib import messages
|
||||
from django.contrib.admin.views.decorators import staff_member_required
|
||||
from django.contrib.auth.decorators import login_required, user_passes_test
|
||||
from django.shortcuts import get_object_or_404, redirect
|
||||
|
||||
from .models import ExternalDataSource
|
||||
from .tasks import periodic_fetch_chat_data, refresh_specific_source
|
||||
from .utils import fetch_and_store_chat_data
|
||||
|
||||
# Create your views here.
|
||||
|
||||
|
||||
def is_superuser(user):
|
||||
return user.is_superuser
|
||||
|
||||
|
||||
@login_required
|
||||
@user_passes_test(is_superuser)
|
||||
def manual_data_refresh(request):
|
||||
if request.method == "POST":
|
||||
try:
|
||||
# Try to use Celery first
|
||||
try:
|
||||
# Asynchronous with Celery
|
||||
periodic_fetch_chat_data.delay()
|
||||
messages.success(
|
||||
request,
|
||||
"Manual data refresh triggered successfully. The data will be updated shortly.",
|
||||
)
|
||||
except Exception:
|
||||
# Fall back to synchronous if Celery is not available
|
||||
fetch_and_store_chat_data()
|
||||
messages.success(
|
||||
request,
|
||||
"Manual data refresh completed successfully (synchronous mode).",
|
||||
)
|
||||
except Exception as e:
|
||||
messages.error(request, f"Failed to refresh data: {e}")
|
||||
return redirect(request.headers.get("referer", "dashboard")) # Redirect to previous page or dashboard
|
||||
|
||||
|
||||
@staff_member_required
|
||||
def refresh_specific_datasource(request, source_id):
|
||||
"""View to trigger refresh of a specific data source. Used as a backup for admin URLs."""
|
||||
source = get_object_or_404(ExternalDataSource, pk=source_id)
|
||||
|
||||
try:
|
||||
# Try to use Celery
|
||||
task = refresh_specific_source.delay(source_id)
|
||||
messages.success(request, f"Data refresh task started for {source.name} (Task ID: {task.id})")
|
||||
except Exception as e:
|
||||
messages.error(request, f"Failed to refresh data source {source.name}: {e}")
|
||||
|
||||
return redirect(request.headers.get("referer", "/admin/data_integration/externaldatasource/"))
|
||||
Reference in New Issue
Block a user