Implement data integration tasks with Celery, including periodic fetching and manual refresh of chat data; add utility functions for data processing and transcript handling; create views and URLs for manual data refresh; establish Redis and Celery configuration; enhance error handling and logging; introduce scripts for data cleanup and fixing dashboard data; update documentation for Redis and Celery setup and troubleshooting.

This commit is contained in:
2025-05-18 13:33:11 +00:00
parent e8f2d2adc2
commit 8bbbb109bd
63 changed files with 4601 additions and 164 deletions

View File

@ -0,0 +1,27 @@
from data_integration.models import ExternalDataSource
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = "Create default external data source configuration"
def handle(self, *_args, **_options):
if not ExternalDataSource.objects.exists():
source = ExternalDataSource.objects.create( # nosec: B106
name="Notso AI Chat API",
api_url="https://HOST/COMPANY/chats",
auth_username="DEFAULT_USERNAME", # Will be set via environment variables
auth_password="DEFAULT_PASSWORD", # Will be set via environment variables
is_active=True,
sync_interval=int(self.get_env_var("CHAT_DATA_FETCH_INTERVAL", "3600")),
timeout=int(self.get_env_var("FETCH_DATA_TIMEOUT", "300")),
)
self.stdout.write(self.style.SUCCESS(f"Created default external data source: {source.name}"))
else:
self.stdout.write(self.style.SUCCESS("External data source already exists, no action taken."))
def get_env_var(self, name, default):
"""Get environment variable or return default"""
import os
return os.environ.get(name, default)

View File

@ -0,0 +1,11 @@
from data_integration.utils import fetch_and_store_chat_data
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = "Fetches chat data from the external API and stores it in the database"
def handle(self, *_args, **_options): # Mark as unused
self.stdout.write(self.style.SUCCESS("Starting data fetch..."))
fetch_and_store_chat_data()
self.stdout.write(self.style.SUCCESS("Successfully fetched and stored chat data."))

View File

@ -0,0 +1,79 @@
#!/usr/bin/env python
"""
Migration Fix Script for ExternalDataSource
This management command adds the missing fields to ExternalDataSource
model directly using SQL, which is useful if Django migrations
are having issues.
"""
import logging
from django.core.management.base import BaseCommand
from django.db import connection
logger = logging.getLogger(__name__)
class Command(BaseCommand):
help = "Fix missing columns in ExternalDataSource table"
def handle(self, *args, **options): # noqa: ARG002
self.stdout.write("Checking ExternalDataSource schema...")
# Check if columns exist
with connection.cursor() as cursor:
cursor.execute("PRAGMA table_info(data_integration_externaldatasource)")
columns = [col[1] for col in cursor.fetchall()]
missing_columns = []
if "error_count" not in columns:
missing_columns.append("error_count")
if "last_error" not in columns:
missing_columns.append("last_error")
if "sync_interval" not in columns:
missing_columns.append("sync_interval")
if "timeout" not in columns:
missing_columns.append("timeout")
if not missing_columns:
self.stdout.write(self.style.SUCCESS("✅ All columns exist in ExternalDataSource table"))
return
self.stdout.write(f"Missing columns: {', '.join(missing_columns)}")
self.stdout.write("Adding missing columns...")
try:
# Add missing columns with SQLite
for col in missing_columns:
if col == "error_count":
cursor.execute(
"ALTER TABLE data_integration_externaldatasource ADD COLUMN error_count integer DEFAULT 0"
)
elif col == "last_error":
cursor.execute(
"ALTER TABLE data_integration_externaldatasource ADD COLUMN last_error varchar(255) NULL"
)
elif col == "sync_interval":
cursor.execute(
"ALTER TABLE data_integration_externaldatasource ADD COLUMN sync_interval integer DEFAULT 3600"
)
elif col == "timeout":
cursor.execute(
"ALTER TABLE data_integration_externaldatasource ADD COLUMN timeout integer DEFAULT 300"
)
self.stdout.write(
self.style.SUCCESS(f"✅ Successfully added missing columns: {', '.join(missing_columns)}")
)
# Verify columns were added
cursor.execute("PRAGMA table_info(data_integration_externaldatasource)")
updated_columns = [col[1] for col in cursor.fetchall()]
self.stdout.write(f"Current columns: {', '.join(updated_columns)}")
except Exception as e:
self.stdout.write(self.style.ERROR(f"❌ Error adding columns: {e}"))
self.stdout.write(self.style.WARNING("Consider running Django migrations instead:"))
self.stdout.write(" python manage.py makemigrations data_integration")
self.stdout.write(" python manage.py migrate data_integration")

View File

@ -0,0 +1,47 @@
import logging
from data_integration.tasks import test_task
from django.core.management.base import BaseCommand
from django.utils import timezone
logger = logging.getLogger(__name__)
class Command(BaseCommand):
help = "Test Celery configuration by executing a simple task"
def handle(self, *args, **options): # noqa: ARG002
self.stdout.write(f"Testing Celery configuration at {timezone.now()}")
try:
# Run the test task
self.stdout.write("Submitting test task to Celery...")
result = test_task.delay()
task_id = result.id
self.stdout.write(f"Task submitted with ID: {task_id}")
self.stdout.write("Waiting for task result (this may take a few seconds)...")
# Try to get the result with a timeout
try:
task_result = result.get(timeout=10) # 10 second timeout
self.stdout.write(self.style.SUCCESS(f"✅ Task completed successfully with result: {task_result}"))
return
except TimeoutError:
self.stdout.write(
self.style.WARNING(
"⚠️ Task did not complete within the timeout period. "
"This might be normal if Celery worker isn't running."
)
)
self.stdout.write(
"To check task status, run Celery worker in another terminal with:\n"
" make celery\n"
f"And then check status of task {task_id}"
)
except Exception as e:
self.stdout.write(self.style.ERROR(f"❌ Error testing Celery: {e}"))
self.stdout.write("Make sure the Celery broker (Redis or SQLite) is properly configured.")
self.stdout.write("To start Celery, run:\n make celery")

View File

@ -0,0 +1,69 @@
#!/usr/bin/env python
"""
Test the ExternalDataSource Model Schema
This management command tests if the ExternalDataSource schema has been correctly updated.
"""
import logging
from data_integration.models import ExternalDataSource
from django.core.management.base import BaseCommand
logger = logging.getLogger(__name__)
class Command(BaseCommand):
help = "Test ExternalDataSource model fields"
def handle(self, *args, **options): # noqa: ARG002
self.stdout.write("Testing ExternalDataSource schema...")
try:
# Get or create a test source
source, created = ExternalDataSource.objects.get_or_create(
name="Test Source",
defaults={
"api_url": "https://example.com/api",
"is_active": False,
},
)
if created:
self.stdout.write(f"Created test source with ID: {source.id}")
else:
self.stdout.write(f"Using existing test source with ID: {source.id}")
# Test setting each field
fields_to_test = {
"error_count": 0,
"last_error": "Test error message",
"sync_interval": 7200,
"timeout": 600,
}
for field, value in fields_to_test.items():
try:
setattr(source, field, value)
self.stdout.write(self.style.SUCCESS(f"✅ Successfully set {field} = {value}"))
except AttributeError:
self.stdout.write(self.style.ERROR(f"❌ Field {field} doesn't exist on the model"))
try:
source.save()
self.stdout.write(self.style.SUCCESS("✅ Successfully saved with all fields"))
except Exception as e:
self.stdout.write(self.style.ERROR(f"❌ Error saving model: {e}"))
# Read back the values to verify
refreshed_source = ExternalDataSource.objects.get(id=source.id)
self.stdout.write("\nVerifying saved values:")
for field, expected_value in fields_to_test.items():
actual_value = getattr(refreshed_source, field, "MISSING")
if actual_value == expected_value:
self.stdout.write(self.style.SUCCESS(f"{field} = {actual_value} (correct)"))
else:
self.stdout.write(self.style.ERROR(f"{field} = {actual_value} (expected: {expected_value})"))
except Exception as e:
self.stdout.write(self.style.ERROR(f"❌ Test failed: {e}"))

View File

@ -0,0 +1,117 @@
import bleach
from bleach.css_sanitizer import CSSSanitizer
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = "Test the HTML sanitizer with CSS Sanitizer"
def handle(self, *args, **options): # noqa: ARG002
# Create a test HTML string with various style attributes
test_html = """
<div style="color: red; background-color: yellow; transform: rotate(30deg);">
<p style="font-size: 16px; margin: 10px;">
This is a <span style="font-weight: bold; color: blue;">styled</span> paragraph.
</p>
<script>alert('XSS attack');</script>
<a href="javascript:alert('Evil');" style="text-decoration: none;">Dangerous Link</a>
<img src="x" onerror="alert('XSS')" style="border: 1px solid red;">
</div>
"""
# Create CSS sanitizer with allowed properties
css_sanitizer = CSSSanitizer(
allowed_css_properties=[
"color",
"background-color",
"font-family",
"font-size",
"font-weight",
"font-style",
"text-decoration",
"text-align",
"margin",
"margin-left",
"margin-right",
"margin-top",
"margin-bottom",
"padding",
"padding-left",
"padding-right",
"padding-top",
"padding-bottom",
"border",
"border-radius",
"width",
"height",
"line-height",
]
)
# Clean the HTML
cleaned_html = bleach.clean(
test_html,
tags=[
"b",
"i",
"u",
"em",
"strong",
"a",
"br",
"p",
"ul",
"ol",
"li",
"span",
"div",
"pre",
"code",
"blockquote",
],
attributes={
"a": ["href", "title", "target"],
"span": ["style", "class"],
"div": ["style", "class"],
"p": ["style", "class"],
"pre": ["style", "class"],
},
css_sanitizer=css_sanitizer,
strip=True,
)
# Print the results
self.stdout.write(self.style.SUCCESS("Original HTML:"))
self.stdout.write(test_html)
self.stdout.write("\n\n")
self.stdout.write(self.style.SUCCESS("Cleaned HTML:"))
self.stdout.write(cleaned_html)
self.stdout.write("\n\n")
# Check if unsafe attributes and styles were removed
self.stdout.write(self.style.SUCCESS("Security Checks:"))
if "script" not in cleaned_html:
self.stdout.write(self.style.SUCCESS("✓ Script tags removed"))
else:
self.stdout.write(self.style.ERROR("✗ Script tags found"))
if "javascript:" not in cleaned_html:
self.stdout.write(self.style.SUCCESS("✓ JavaScript URLs removed"))
else:
self.stdout.write(self.style.ERROR("✗ JavaScript URLs found"))
if "onerror" not in cleaned_html:
self.stdout.write(self.style.SUCCESS("✓ Event handlers removed"))
else:
self.stdout.write(self.style.ERROR("✗ Event handlers found"))
if "transform" not in cleaned_html:
self.stdout.write(self.style.SUCCESS("✓ Unsafe CSS properties removed"))
else:
self.stdout.write(self.style.ERROR("✗ Unsafe CSS properties found"))
if "img" not in cleaned_html:
self.stdout.write(self.style.SUCCESS("✓ Unsupported tags removed"))
else:
self.stdout.write(self.style.ERROR("✗ Unsupported tags found"))

View File

@ -0,0 +1,68 @@
import logging
from django.conf import settings
from django.core.management.base import BaseCommand
logger = logging.getLogger(__name__)
class Command(BaseCommand):
help = "Test Redis connection for Celery"
def handle(self, *args, **options): # noqa: ARG002
self.stdout.write("Testing Redis connection...")
try:
import redis
# Get Redis configuration from settings
redis_host = getattr(settings, "REDIS_HOST", "localhost")
redis_port = int(getattr(settings, "REDIS_PORT", 6379))
redis_db = int(getattr(settings, "REDIS_DB", 0))
# Override from environment if set
import os
if "REDIS_URL" in os.environ:
self.stdout.write(f"REDIS_URL environment variable found: {os.environ['REDIS_URL']}")
# Try to connect and ping
redis_client = redis.Redis(host=redis_host, port=redis_port, db=redis_db, socket_connect_timeout=2)
ping_result = redis_client.ping()
if ping_result:
self.stdout.write(
self.style.SUCCESS(
f"✅ Redis connection successful! Connected to {redis_host}:{redis_port}/{redis_db}"
)
)
self.stdout.write(f"Broker URL: {settings.CELERY_BROKER_URL}")
self.stdout.write(f"Result backend: {settings.CELERY_RESULT_BACKEND}")
# Try to set and get a value
test_key = "test_redis_connection"
test_value = "success"
redis_client.set(test_key, test_value)
retrieved_value = redis_client.get(test_key)
if retrieved_value and retrieved_value.decode() == test_value:
self.stdout.write(self.style.SUCCESS("✅ Redis SET/GET test passed!"))
else:
self.stdout.write(
self.style.WARNING(
f"⚠️ Redis SET/GET test failed: Got {retrieved_value} instead of {test_value}"
)
)
# Clean up
redis_client.delete(test_key)
else:
self.stdout.write(self.style.ERROR("❌ Redis ping failed!"))
except redis.exceptions.ConnectionError as e:
self.stdout.write(self.style.ERROR(f"❌ Redis connection error: {e}"))
self.stdout.write("Celery will use SQLite fallback if configured.")
except ImportError:
self.stdout.write(self.style.ERROR("❌ Redis package not installed. Install with: pip install redis"))
except Exception as e:
self.stdout.write(self.style.ERROR(f"❌ Error: {e}"))