mirror of
https://github.com/kjanat/livegraphs-django.git
synced 2026-01-16 12:12:10 +01:00
Implement data integration tasks with Celery, including periodic fetching and manual refresh of chat data; add utility functions for data processing and transcript handling; create views and URLs for manual data refresh; establish Redis and Celery configuration; enhance error handling and logging; introduce scripts for data cleanup and fixing dashboard data; update documentation for Redis and Celery setup and troubleshooting.
This commit is contained in:
@ -0,0 +1,27 @@
|
||||
from data_integration.models import ExternalDataSource
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Create default external data source configuration"
|
||||
|
||||
def handle(self, *_args, **_options):
|
||||
if not ExternalDataSource.objects.exists():
|
||||
source = ExternalDataSource.objects.create( # nosec: B106
|
||||
name="Notso AI Chat API",
|
||||
api_url="https://HOST/COMPANY/chats",
|
||||
auth_username="DEFAULT_USERNAME", # Will be set via environment variables
|
||||
auth_password="DEFAULT_PASSWORD", # Will be set via environment variables
|
||||
is_active=True,
|
||||
sync_interval=int(self.get_env_var("CHAT_DATA_FETCH_INTERVAL", "3600")),
|
||||
timeout=int(self.get_env_var("FETCH_DATA_TIMEOUT", "300")),
|
||||
)
|
||||
self.stdout.write(self.style.SUCCESS(f"Created default external data source: {source.name}"))
|
||||
else:
|
||||
self.stdout.write(self.style.SUCCESS("External data source already exists, no action taken."))
|
||||
|
||||
def get_env_var(self, name, default):
|
||||
"""Get environment variable or return default"""
|
||||
import os
|
||||
|
||||
return os.environ.get(name, default)
|
||||
@ -0,0 +1,11 @@
|
||||
from data_integration.utils import fetch_and_store_chat_data
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Fetches chat data from the external API and stores it in the database"
|
||||
|
||||
def handle(self, *_args, **_options): # Mark as unused
|
||||
self.stdout.write(self.style.SUCCESS("Starting data fetch..."))
|
||||
fetch_and_store_chat_data()
|
||||
self.stdout.write(self.style.SUCCESS("Successfully fetched and stored chat data."))
|
||||
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Migration Fix Script for ExternalDataSource
|
||||
|
||||
This management command adds the missing fields to ExternalDataSource
|
||||
model directly using SQL, which is useful if Django migrations
|
||||
are having issues.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import connection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Fix missing columns in ExternalDataSource table"
|
||||
|
||||
def handle(self, *args, **options): # noqa: ARG002
|
||||
self.stdout.write("Checking ExternalDataSource schema...")
|
||||
|
||||
# Check if columns exist
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute("PRAGMA table_info(data_integration_externaldatasource)")
|
||||
columns = [col[1] for col in cursor.fetchall()]
|
||||
|
||||
missing_columns = []
|
||||
if "error_count" not in columns:
|
||||
missing_columns.append("error_count")
|
||||
if "last_error" not in columns:
|
||||
missing_columns.append("last_error")
|
||||
if "sync_interval" not in columns:
|
||||
missing_columns.append("sync_interval")
|
||||
if "timeout" not in columns:
|
||||
missing_columns.append("timeout")
|
||||
|
||||
if not missing_columns:
|
||||
self.stdout.write(self.style.SUCCESS("✅ All columns exist in ExternalDataSource table"))
|
||||
return
|
||||
|
||||
self.stdout.write(f"Missing columns: {', '.join(missing_columns)}")
|
||||
self.stdout.write("Adding missing columns...")
|
||||
|
||||
try:
|
||||
# Add missing columns with SQLite
|
||||
for col in missing_columns:
|
||||
if col == "error_count":
|
||||
cursor.execute(
|
||||
"ALTER TABLE data_integration_externaldatasource ADD COLUMN error_count integer DEFAULT 0"
|
||||
)
|
||||
elif col == "last_error":
|
||||
cursor.execute(
|
||||
"ALTER TABLE data_integration_externaldatasource ADD COLUMN last_error varchar(255) NULL"
|
||||
)
|
||||
elif col == "sync_interval":
|
||||
cursor.execute(
|
||||
"ALTER TABLE data_integration_externaldatasource ADD COLUMN sync_interval integer DEFAULT 3600"
|
||||
)
|
||||
elif col == "timeout":
|
||||
cursor.execute(
|
||||
"ALTER TABLE data_integration_externaldatasource ADD COLUMN timeout integer DEFAULT 300"
|
||||
)
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f"✅ Successfully added missing columns: {', '.join(missing_columns)}")
|
||||
)
|
||||
|
||||
# Verify columns were added
|
||||
cursor.execute("PRAGMA table_info(data_integration_externaldatasource)")
|
||||
updated_columns = [col[1] for col in cursor.fetchall()]
|
||||
self.stdout.write(f"Current columns: {', '.join(updated_columns)}")
|
||||
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Error adding columns: {e}"))
|
||||
self.stdout.write(self.style.WARNING("Consider running Django migrations instead:"))
|
||||
self.stdout.write(" python manage.py makemigrations data_integration")
|
||||
self.stdout.write(" python manage.py migrate data_integration")
|
||||
@ -0,0 +1,47 @@
|
||||
import logging
|
||||
|
||||
from data_integration.tasks import test_task
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.utils import timezone
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Test Celery configuration by executing a simple task"
|
||||
|
||||
def handle(self, *args, **options): # noqa: ARG002
|
||||
self.stdout.write(f"Testing Celery configuration at {timezone.now()}")
|
||||
|
||||
try:
|
||||
# Run the test task
|
||||
self.stdout.write("Submitting test task to Celery...")
|
||||
result = test_task.delay()
|
||||
task_id = result.id
|
||||
|
||||
self.stdout.write(f"Task submitted with ID: {task_id}")
|
||||
self.stdout.write("Waiting for task result (this may take a few seconds)...")
|
||||
|
||||
# Try to get the result with a timeout
|
||||
try:
|
||||
task_result = result.get(timeout=10) # 10 second timeout
|
||||
self.stdout.write(self.style.SUCCESS(f"✅ Task completed successfully with result: {task_result}"))
|
||||
return
|
||||
except TimeoutError:
|
||||
self.stdout.write(
|
||||
self.style.WARNING(
|
||||
"⚠️ Task did not complete within the timeout period. "
|
||||
"This might be normal if Celery worker isn't running."
|
||||
)
|
||||
)
|
||||
|
||||
self.stdout.write(
|
||||
"To check task status, run Celery worker in another terminal with:\n"
|
||||
" make celery\n"
|
||||
f"And then check status of task {task_id}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Error testing Celery: {e}"))
|
||||
self.stdout.write("Make sure the Celery broker (Redis or SQLite) is properly configured.")
|
||||
self.stdout.write("To start Celery, run:\n make celery")
|
||||
@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Test the ExternalDataSource Model Schema
|
||||
|
||||
This management command tests if the ExternalDataSource schema has been correctly updated.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from data_integration.models import ExternalDataSource
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Test ExternalDataSource model fields"
|
||||
|
||||
def handle(self, *args, **options): # noqa: ARG002
|
||||
self.stdout.write("Testing ExternalDataSource schema...")
|
||||
|
||||
try:
|
||||
# Get or create a test source
|
||||
source, created = ExternalDataSource.objects.get_or_create(
|
||||
name="Test Source",
|
||||
defaults={
|
||||
"api_url": "https://example.com/api",
|
||||
"is_active": False,
|
||||
},
|
||||
)
|
||||
|
||||
if created:
|
||||
self.stdout.write(f"Created test source with ID: {source.id}")
|
||||
else:
|
||||
self.stdout.write(f"Using existing test source with ID: {source.id}")
|
||||
|
||||
# Test setting each field
|
||||
fields_to_test = {
|
||||
"error_count": 0,
|
||||
"last_error": "Test error message",
|
||||
"sync_interval": 7200,
|
||||
"timeout": 600,
|
||||
}
|
||||
|
||||
for field, value in fields_to_test.items():
|
||||
try:
|
||||
setattr(source, field, value)
|
||||
self.stdout.write(self.style.SUCCESS(f"✅ Successfully set {field} = {value}"))
|
||||
except AttributeError:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Field {field} doesn't exist on the model"))
|
||||
|
||||
try:
|
||||
source.save()
|
||||
self.stdout.write(self.style.SUCCESS("✅ Successfully saved with all fields"))
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Error saving model: {e}"))
|
||||
|
||||
# Read back the values to verify
|
||||
refreshed_source = ExternalDataSource.objects.get(id=source.id)
|
||||
self.stdout.write("\nVerifying saved values:")
|
||||
for field, expected_value in fields_to_test.items():
|
||||
actual_value = getattr(refreshed_source, field, "MISSING")
|
||||
if actual_value == expected_value:
|
||||
self.stdout.write(self.style.SUCCESS(f"✅ {field} = {actual_value} (correct)"))
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR(f"❌ {field} = {actual_value} (expected: {expected_value})"))
|
||||
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Test failed: {e}"))
|
||||
@ -0,0 +1,117 @@
|
||||
import bleach
|
||||
from bleach.css_sanitizer import CSSSanitizer
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Test the HTML sanitizer with CSS Sanitizer"
|
||||
|
||||
def handle(self, *args, **options): # noqa: ARG002
|
||||
# Create a test HTML string with various style attributes
|
||||
test_html = """
|
||||
<div style="color: red; background-color: yellow; transform: rotate(30deg);">
|
||||
<p style="font-size: 16px; margin: 10px;">
|
||||
This is a <span style="font-weight: bold; color: blue;">styled</span> paragraph.
|
||||
</p>
|
||||
<script>alert('XSS attack');</script>
|
||||
<a href="javascript:alert('Evil');" style="text-decoration: none;">Dangerous Link</a>
|
||||
<img src="x" onerror="alert('XSS')" style="border: 1px solid red;">
|
||||
</div>
|
||||
"""
|
||||
|
||||
# Create CSS sanitizer with allowed properties
|
||||
css_sanitizer = CSSSanitizer(
|
||||
allowed_css_properties=[
|
||||
"color",
|
||||
"background-color",
|
||||
"font-family",
|
||||
"font-size",
|
||||
"font-weight",
|
||||
"font-style",
|
||||
"text-decoration",
|
||||
"text-align",
|
||||
"margin",
|
||||
"margin-left",
|
||||
"margin-right",
|
||||
"margin-top",
|
||||
"margin-bottom",
|
||||
"padding",
|
||||
"padding-left",
|
||||
"padding-right",
|
||||
"padding-top",
|
||||
"padding-bottom",
|
||||
"border",
|
||||
"border-radius",
|
||||
"width",
|
||||
"height",
|
||||
"line-height",
|
||||
]
|
||||
)
|
||||
|
||||
# Clean the HTML
|
||||
cleaned_html = bleach.clean(
|
||||
test_html,
|
||||
tags=[
|
||||
"b",
|
||||
"i",
|
||||
"u",
|
||||
"em",
|
||||
"strong",
|
||||
"a",
|
||||
"br",
|
||||
"p",
|
||||
"ul",
|
||||
"ol",
|
||||
"li",
|
||||
"span",
|
||||
"div",
|
||||
"pre",
|
||||
"code",
|
||||
"blockquote",
|
||||
],
|
||||
attributes={
|
||||
"a": ["href", "title", "target"],
|
||||
"span": ["style", "class"],
|
||||
"div": ["style", "class"],
|
||||
"p": ["style", "class"],
|
||||
"pre": ["style", "class"],
|
||||
},
|
||||
css_sanitizer=css_sanitizer,
|
||||
strip=True,
|
||||
)
|
||||
|
||||
# Print the results
|
||||
self.stdout.write(self.style.SUCCESS("Original HTML:"))
|
||||
self.stdout.write(test_html)
|
||||
self.stdout.write("\n\n")
|
||||
self.stdout.write(self.style.SUCCESS("Cleaned HTML:"))
|
||||
self.stdout.write(cleaned_html)
|
||||
self.stdout.write("\n\n")
|
||||
|
||||
# Check if unsafe attributes and styles were removed
|
||||
self.stdout.write(self.style.SUCCESS("Security Checks:"))
|
||||
|
||||
if "script" not in cleaned_html:
|
||||
self.stdout.write(self.style.SUCCESS("✓ Script tags removed"))
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR("✗ Script tags found"))
|
||||
|
||||
if "javascript:" not in cleaned_html:
|
||||
self.stdout.write(self.style.SUCCESS("✓ JavaScript URLs removed"))
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR("✗ JavaScript URLs found"))
|
||||
|
||||
if "onerror" not in cleaned_html:
|
||||
self.stdout.write(self.style.SUCCESS("✓ Event handlers removed"))
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR("✗ Event handlers found"))
|
||||
|
||||
if "transform" not in cleaned_html:
|
||||
self.stdout.write(self.style.SUCCESS("✓ Unsafe CSS properties removed"))
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR("✗ Unsafe CSS properties found"))
|
||||
|
||||
if "img" not in cleaned_html:
|
||||
self.stdout.write(self.style.SUCCESS("✓ Unsupported tags removed"))
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR("✗ Unsupported tags found"))
|
||||
@ -0,0 +1,68 @@
|
||||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Test Redis connection for Celery"
|
||||
|
||||
def handle(self, *args, **options): # noqa: ARG002
|
||||
self.stdout.write("Testing Redis connection...")
|
||||
|
||||
try:
|
||||
import redis
|
||||
|
||||
# Get Redis configuration from settings
|
||||
redis_host = getattr(settings, "REDIS_HOST", "localhost")
|
||||
redis_port = int(getattr(settings, "REDIS_PORT", 6379))
|
||||
redis_db = int(getattr(settings, "REDIS_DB", 0))
|
||||
|
||||
# Override from environment if set
|
||||
import os
|
||||
|
||||
if "REDIS_URL" in os.environ:
|
||||
self.stdout.write(f"REDIS_URL environment variable found: {os.environ['REDIS_URL']}")
|
||||
|
||||
# Try to connect and ping
|
||||
redis_client = redis.Redis(host=redis_host, port=redis_port, db=redis_db, socket_connect_timeout=2)
|
||||
|
||||
ping_result = redis_client.ping()
|
||||
|
||||
if ping_result:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"✅ Redis connection successful! Connected to {redis_host}:{redis_port}/{redis_db}"
|
||||
)
|
||||
)
|
||||
self.stdout.write(f"Broker URL: {settings.CELERY_BROKER_URL}")
|
||||
self.stdout.write(f"Result backend: {settings.CELERY_RESULT_BACKEND}")
|
||||
|
||||
# Try to set and get a value
|
||||
test_key = "test_redis_connection"
|
||||
test_value = "success"
|
||||
redis_client.set(test_key, test_value)
|
||||
retrieved_value = redis_client.get(test_key)
|
||||
|
||||
if retrieved_value and retrieved_value.decode() == test_value:
|
||||
self.stdout.write(self.style.SUCCESS("✅ Redis SET/GET test passed!"))
|
||||
else:
|
||||
self.stdout.write(
|
||||
self.style.WARNING(
|
||||
f"⚠️ Redis SET/GET test failed: Got {retrieved_value} instead of {test_value}"
|
||||
)
|
||||
)
|
||||
|
||||
# Clean up
|
||||
redis_client.delete(test_key)
|
||||
else:
|
||||
self.stdout.write(self.style.ERROR("❌ Redis ping failed!"))
|
||||
except redis.exceptions.ConnectionError as e:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Redis connection error: {e}"))
|
||||
self.stdout.write("Celery will use SQLite fallback if configured.")
|
||||
except ImportError:
|
||||
self.stdout.write(self.style.ERROR("❌ Redis package not installed. Install with: pip install redis"))
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f"❌ Error: {e}"))
|
||||
Reference in New Issue
Block a user