diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 464af2f..d69f99e 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -61,20 +61,64 @@ "customizations": { "vscode": { "extensions": [ + "bierner.github-markdown-preview", + "bierner.markdown-mermaid", + "bierner.markdown-preview-github-styles", "charliermarsh.ruff", + "CS50.ddb50", + "DavidAnson.vscode-markdownlint", "esbenp.prettier-vscode", "GitHub.copilot-chat", "GitHub.copilot-workspace", + "GitHub.remotehub", + "github.vscode-github-actions", "ms-vscode.copilot-mermaid-diagram", "ms-vscode.vscode-copilot-data-analysis", "ms-vscode.vscode-copilot-vision", + "ms-vscode.vscode-github-issue-notebooks", "ms-vscode.vscode-websearchforcopilot", "PyCQA.bandit-pycqa", + "samuelcolvin.jinjahtml", + "shd101wyy.markdown-preview-enhanced", "tamasfe.even-better-toml", "timonwong.shellcheck", - "trunk.io" + "trunk.io", + "VisualStudioExptTeam.intellicode-api-usage-examples", + "yzhang.markdown-all-in-one" ], "settings": { + "github.copilot.chat.codeGeneration.instructions": [ + { + "text": "This dev container includes an up-to-date version of Git, built from source as needed, pre-installed and available on the `PATH`." + }, + { + "text": "This dev container includes the Docker CLI (`docker`) pre-installed and available on the `PATH` for running and managing containers using a dedicated Docker daemon running inside the dev container." + }, + { + "text": "This dev container includes an up-to-date version of Git, built from source as needed, pre-installed and available on the `PATH`." + }, + { + "text": "This dev container includes Go and common Go utilities pre-installed and available on the `PATH`, along with the Go language extension for Go development." + }, + { + "text": "This dev container includes `node`, `npm` and `eslint` pre-installed and available on the `PATH` for Node.js and JavaScript development." + }, + { + "text": "This dev container includes `node`, `npm` and `eslint` pre-installed and available on the `PATH` for Node.js and JavaScript development." + }, + { + "text": "This dev container includes `python3` and `pip3` pre-installed and available on the `PATH`, along with the Python language extensions for Python development." + }, + { + "text": "This dev container includes an SSH server so that you can use an external terminal, sftp, or SSHFS to interact with it. The first time you've started the container, you will want to set a password for your user. With each connection to the container, you'll want to forward the SSH port to your local machine and use a local terminal or other tool to connect using the password you set." + }, + { + "text": "This dev container includes the GitHub CLI (`gh`), which is pre-installed and available on the `PATH`. IMPORTANT: `gh api -f` does not support object values, use multiple `-f` flags with hierarchical keys and string values instead. When using GitHub actions `actions/upload-artifact` or `actions/download-artifact` use v4 or later." + }, + { + "text": "This workspace is in a dev container running on \"Ubuntu 22.04.5 LTS\".\n\nUse `\"$BROWSER\" ` to open a webpage in the host's default browser.\n\nSome of the command line tools available on the `PATH`: `apt`, `dpkg`, `docker`, `git`, `gh`, `curl`, `wget`, `ssh`, `scp`, `rsync`, `gpg`, `ps`, `lsof`, `netstat`, `top`, `tree`, `find`, `grep`, `zip`, `unzip`, `tar`, `gzip`, `bzip2`, `xz`" + } + ], "[css]": { "editor.defaultFormatter": "esbenp.prettier-vscode", "editor.formatOnSave": true @@ -87,6 +131,10 @@ "editor.defaultFormatter": "esbenp.prettier-vscode", "editor.formatOnSave": true }, + "[markdown]": { + "editor.defaultFormatter": "DavidAnson.vscode-markdownlint", + "editor.formatOnSave": true + }, "[python]": { "editor.codeActionsOnSave": { "source.fixAll": "explicit", @@ -130,14 +178,19 @@ "fileMatch": ["*/devcontainer.json"], "url": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json" } - ] + ], + "markdownlint.config": { + "MD007": { + "indent": 4 + } + } } } }, // Use 'forwardPorts' to make a list of ports inside the container available locally. "forwardPorts": [6379, 8001], // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "sudo apt update && sudo apt full-upgrade -y && sudo apt autoremove -y; sudo apt install -y ruby-foreman; npm i -g prettier prettier-plugin-jinja-template; redis-server --daemonize yes; uname -a; export UV_LINK_MODE=copy; uv python install; uv pip install -Ur pyproject.toml" + "postCreateCommand": "bash .devcontainer/postCreateCommand.sh" // Configure tool-specific properties. // "customizations": {}, // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. diff --git a/.devcontainer/postCreateCommand.sh b/.devcontainer/postCreateCommand.sh new file mode 100644 index 0000000..fde5088 --- /dev/null +++ b/.devcontainer/postCreateCommand.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +sudo apt update +sudo apt full-upgrade -y +sudo apt autoremove -y; + +sudo apt install -y ruby-foreman; +npm install --save-dev prettier prettier-plugin-jinja-template markdownlint-cli2; + +# Install ohmyposh +curl -s https://ohmyposh.dev/install.sh | bash -s + +if ! command -v oh-my-posh &> /dev/null; then + echo "Failed to install oh-my-posh. Please check the installation script." +else + echo "oh-my-posh installed successfully." + echo "Setting up oh-my-posh theme..." + # Install fonts using proper bash array syntax + fonts=("Hack" "AnonymousPro" "Noto") + for font in "${fonts[@]}"; do + oh-my-posh font install "$font" + done +fi + +# Setup shell completions +echo "Setting up shell completions..." + +# if uv, uvx, ruff, or oh-my-posh are not available, don't add shell completions for that tool +if ! command -v uv &> /dev/null; then + echo "uv not found, skipping shell completion setup for uv" +else + echo "uv found, setting up shell completion" + uv generate-shell-completion bash > ~/.cache/uv-completion.bash +fi + +if ! command -v uvx &> /dev/null; then + echo "uvx not found, skipping shell completion setup for uvx" +else + echo "uvx found, setting up shell completion" + uvx --generate-shell-completion bash > ~/.cache/uvx-completion.bash +fi + +if ! command -v ruff &> /dev/null; then + echo "ruff not found, skipping shell completion setup for ruff" +else + echo "ruff found, setting up shell completion" + ruff generate-shell-completion bash > ~/.cache/ruff-completion.bash +fi + +if ! command -v oh-my-posh &> /dev/null; then + echo "oh-my-posh not found, skipping shell completion setup for oh-my-posh" +else + echo "oh-my-posh found, setting up shell completion" + oh-my-posh init bash --config ~/.cache/oh-my-posh/themes/paradox.omp.json > ~/.cache/oh-my-posh-completion.bash +fi + +# Check if ~/.bashrc already contains the completion setup +if ! grep -q 'uv generate-shell-completion' ~/.bashrc; then + echo "Adding shell completions to ~/.bashrc" + cat << EOF >> ~/.bashrc + +# Shell completions +if [ -f ~/.cache/uv-completion.bash ]; then + source ~/.cache/uv-completion.bash +fi +if [ -f ~/.cache/uvx-completion.bash ]; then + source ~/.cache/uvx-completion.bash +fi +if [ -f ~/.cache/ruff-completion.bash ]; then + source ~/.cache/ruff-completion.bash +fi +if [ -f ~/.cache/oh-my-posh-completion.bash ]; then + source ~/.cache/oh-my-posh-completion.bash +fi +export UV_LINK_MODE=copy; + +EOF + echo "Shell completions added to ~/.bashrc" +else + echo "Shell completions already present in ~/.bashrc" +fi + +uv python install +uv pip install -Ur pyproject.toml --group dev + +redis-server --daemonize yes; diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..3a1b621 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,265 @@ +# Instructions for Copilot + +## General Instructions + +- Use clear and concise language. +- Provide code examples where applicable. +- Write clean code with Django best practices. +- Use comments to explain complex logic. +- Use packages and libraries where appropriate and possible to avoid reinventing the wheel. +- Update [TODO](TODO.md), [README](README.md) as fits. + +## uv + +UV is a fast Python package and project manager written in Rust. Use UV to manage dependencies, virtual environments, and run Python scripts with improved performance. + +### Running Python Scripts + +- Execute a Python script with uv: + + ```bash + uv run python ${FILE}.py + ``` + +- Run a script with a specific Python version: + + ```bash + uv run python3.8 ${FILE}.py + ``` + +- Run a script with arguments: + + ```bash + uv run python ${FILE}.py --arg1 value1 --arg2 value2 + ``` + +- Add dependencies to standalone scripts: + + ```bash + uv add --script ${FILE}.py + ``` + +- Remove dependencies from a script: + + ```bash + uv remove --script ${FILE}.py + ``` + +### Package Management + +- Install packages: + + ```bash + uv pip install + ``` + +- Install from requirements file: + + ```bash + uv pip install -r requirements.txt + ``` + +- Add a package to current project: + + ```bash + uv add + ``` + +- Remove a package: + + ```bash + uv remove + ``` + +### Virtual Environment Management + +- Create and activate a virtual environment: + + ```bash + uv venv .venv + source .venv/bin/activate # Linux/macOS + ``` + +- Install project dependencies into an environment: + + ```bash + uv pip sync + ``` + +- Lock dependencies for reproducible environments: + + ```bash + uv lock + ``` + +### Project Management + +- Create a new Python project: + + ```bash + uv init + ``` + +- Build a project into distribution archives: + + ```bash + uv build + ``` + +- View dependency tree: + + ```bash + uv tree + ``` + +- Publish package to PyPI: + + ```bash + uv publish + ``` + +### Python Version Management + +- Install specific Python version: + + ```bash + uv python install 3.11 + ``` + +- List available Python versions: + + ```bash + uv python list + ``` + +- Find installed Python version: + + ```bash + uv python find + ``` + +- Pin project to specific Python version: + + ```bash + uv python pin 3.11 + ``` + +### Performance Benefits + +- UV offers significantly faster package installations than pip +- Built-in caching improves repeated operations +- Compatible with existing Python tooling ecosystem +- Reliable dependency resolution to avoid conflicts + +## Project Structure + +This section provides a comprehensive overview of the LiveGraphsDjango project structure and the function of each key file. Please update this section whenever there are noteworthy changes to the structure or to a file's function. + +```tree +LiveGraphsDjango/ +├── dashboard_project/ # Main Django project directory +│ ├── __init__.py # Python package declaration +│ ├── __main__.py # Entry point for running module as script +│ ├── asgi.py # ASGI configuration for async web servers +│ ├── manage.py # Django command-line utility +│ ├── wsgi.py # WSGI configuration for web servers +│ ├── accounts/ # User authentication and company management +│ │ ├── admin.py # Admin interface for accounts +│ │ ├── forms.py # User registration and login forms +│ │ ├── models.py # User and Company models +│ │ ├── urls.py # URL routing for accounts +│ │ └── views.py # View functions for user authentication +│ ├── dashboard/ # Core dashboard functionality +│ │ ├── admin.py # Admin interface for dashboard components +│ │ ├── forms.py # Dashboard configuration forms +│ │ ├── models.py # Dashboard, DataSource models +│ │ ├── signals.py # Signal handlers for dashboard events +│ │ ├── urls.py # URL routing for dashboard +│ │ ├── utils.py # Utility functions for dashboard +│ │ ├── views.py # Main dashboard view functions +│ │ ├── views_export.py # Data export views (CSV, JSON, Excel) +│ │ ├── management/ # Custom management commands +│ │ │ └── commands/ # Django management commands +│ │ ├── migrations/ # Database migrations +│ │ └── templatetags/ # Custom template tags +│ ├── dashboard_project/ # Project settings and configuration +│ │ ├── settings.py # Django settings +│ │ ├── urls.py # Main URL configuration +│ │ └── celery.py # Celery configuration for async tasks +│ ├── data_integration/ # External data integration +│ │ ├── admin.py # Admin interface for data sources +│ │ ├── models.py # ExternalDataSource, ChatSession models +│ │ ├── tasks.py # Celery tasks for data fetching +│ │ ├── urls.py # URL routing for data integration +│ │ ├── utils.py # Data fetching and transformation utilities +│ │ └── views.py # Views for data source management +│ ├── media/ # User-uploaded files +│ │ └── data_sources/ # Uploaded CSV data sources +│ ├── scripts/ # Utility scripts +│ │ ├── cleanup_duplicates.py # Script to remove duplicate data +│ │ └── fix_dashboard_data.py # Script to fix corrupt dashboard data +│ ├── static/ # Static assets (CSS, JS, images) +│ │ ├── css/ # Stylesheets +│ │ ├── img/ # Images +│ │ └── js/ # JavaScript files +│ └── templates/ # HTML templates +│ ├── base.html # Base template with common layout +│ ├── accounts/ # Account-related templates +│ └── dashboard/ # Dashboard-related templates +├── docs/ # Project documentation +│ ├── CELERY_REDIS.md # Celery and Redis setup guide +│ └── TROUBLESHOOTING.md # Common issues and solutions +├── examples/ # Example data files +│ ├── 132f3a8c-3ba5-4d89-ae04-cd83f1bc5272.txt # Sample transcript +│ ├── jumbo.csv # Sample chat data +│ ├── sample.csv # Generic sample data +│ └── sessions.csv # Sample session data +├── nginx/ # Nginx configuration +│ └── conf.d/ # Nginx site configs +├── .github/ # GitHub-specific files +│ └── copilot-instructions.md # Instructions for GitHub Copilot (this file) +├── dev.sh # Development environment setup script +├── docker-compose.yml # Docker Compose configuration +├── Dockerfile # Docker image definition +├── IMPLEMENTATION_SUMMARY.md # Implementation details and status +├── Makefile # Common commands for the project +├── Procfile # Heroku deployment configuration +├── PROJECT_OVERVIEW.md # Project overview and architecture +├── pyproject.toml # Python project configuration +├── QUICK_START_GUIDE.md # Getting started guide +├── README.md # Project introduction and overview +├── requirements.txt # Python dependencies +├── start.sh # Production startup script +└── TODO.md # Pending tasks and features +``` + +### Key Component Relationships + +1. **Multi-Tenant Architecture**: + +- Companies are the top-level organizational unit +- Users belong to Companies and have different permission levels +- DataSources are owned by Companies +- Dashboards display analytics based on DataSources + +2. **Data Integration Flow**: + +- External APIs are configured via ExternalDataSource models +- Data is fetched, parsed, and stored as ChatSessions and ChatMessages +- Dashboard views aggregate and visualize this data + +3. **Export Functionality**: + +- Export available in CSV, JSON, and Excel formats +- Filtering options to customize exported data + +### Important Note + +**Please update this section whenever:** + +1. New files or directories are added to the project +2. The function of existing files changes significantly +3. New relationships between components are established +4. The architecture of the application changes + +This ensures that anyone working with GitHub Copilot has an up-to-date understanding of the project structure. diff --git a/.markdownlint.json b/.markdownlint.json new file mode 100644 index 0000000..8b742bc --- /dev/null +++ b/.markdownlint.json @@ -0,0 +1,17 @@ +{ + "default": true, + "MD007": { + "indent": 4, + "start_indented": false, + "start_indent": 4 + }, + "MD013": false, + "MD029": false, + "MD030": { + "ul_single": 3, + "ol_single": 2, + "ul_multi": 3, + "ol_multi": 2 + }, + "MD033": false +} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2a844df..131e73d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -57,6 +57,12 @@ repos: # types_or: [javascript, jsx, ts, tsx, css, scss, json, yaml, markdown] # exclude: '.*\.html$' + - repo: https://github.com/DavidAnson/markdownlint-cli2 + rev: v0.18.1 + hooks: + - id: markdownlint-cli2 + args: [--fix] + # Ruff for linting and formatting - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.11.10 diff --git a/.prettierignore b/.prettierignore index e2e7e83..35562c7 100644 --- a/.prettierignore +++ b/.prettierignore @@ -48,5 +48,9 @@ docker-compose.override.yml *.swp *.swo -# Ignore all SQLite3 files: -**/*.sqlite3 +# Ignore all SQLite files: +*.sqlite3 +*.sqlite + +# Ignore markdown +*.md diff --git a/.prettierrc b/.prettierrc index 6aec78f..98b51e3 100644 --- a/.prettierrc +++ b/.prettierrc @@ -18,6 +18,15 @@ "options": { "parser": "jinja-template" } + }, + { + "files": ["*.md", "*.markdown"], + "options": { + "tabWidth": 2, + "useTabs": false, + "proseWrap": "preserve", + "printWidth": 100 + } } ], "plugins": ["prettier-plugin-jinja-template"] diff --git a/.uv b/.uv index 647b153..04e0726 100644 --- a/.uv +++ b/.uv @@ -16,3 +16,6 @@ environment-checks = ["python", "dependencies"] # How to resolve dependencies not specified with exact versions dependency-resolution = "strict" + +# If the cache and target directories are on different filesystems, hardlinking may not be supported. +link-mode = "copy" diff --git a/.vscode/settings.json b/.vscode/settings.json index 8c4fed7..2657d61 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -43,5 +43,12 @@ "notebook.source.organizeImports": "explicit" }, "notebook.formatOnSave.enabled": true, - "prettier.requireConfig": true + "prettier.requireConfig": true, + "markdownlint.config": { + "default": true, + }, + "[markdown]": { + "editor.defaultFormatter": "DavidAnson.vscode-markdownlint", + "editor.formatOnSave": true + } } diff --git a/IMPLEMENTATION_SUMMARY.md b/IMPLEMENTATION_SUMMARY.md deleted file mode 100644 index 5a4028c..0000000 --- a/IMPLEMENTATION_SUMMARY.md +++ /dev/null @@ -1,134 +0,0 @@ -# Chat Analytics Dashboard: Implementation Summary - -## Core Features Implemented - -1. **Multi-Tenant Architecture**: - - - Companies have isolated data and user access - - Users belong to specific companies - - Role-based permissions (admin, company admin, regular user) - -2. **Data Management**: - - - CSV file upload and processing - - Data source management - - Chat session records with comprehensive metadata - -3. **Dashboard Visualization**: - - - Interactive charts using Plotly.js - - Key metrics and KPIs - - Time-series analysis - - Geographic distribution - - Sentiment analysis - - Category distribution - -4. **Search and Analysis**: - - - Full-text search across chat sessions - - Filtering by various attributes - - Detailed view of individual chat sessions - - Transcript viewing - -5. **User Management**: - - - User registration and authentication - - Profile management - - Password change functionality - - Role assignment - -6. **Admin Interface**: - - - Company management - - User administration - - Data source oversight - - System-wide configuration - -7. **Responsive Design**: - - Mobile-friendly interface using Bootstrap 5 - - Consistent layout and navigation - - Accessible UI components - -## Technical Implementation - -### Backend (Django) - -- **Custom User Model**: Extended for company association and roles -- **Database Models**: Structured for efficient data storage and queries -- **View Logic**: Separation of concerns with dedicated view functions -- **Form Handling**: Validated data input and file uploads -- **Data Processing**: CSV parsing and structured storage -- **Template Context**: Prepared data for frontend rendering -- **URL Routing**: Clean URL structure -- **Access Control**: Permission checks throughout - -### Frontend - -- **Bootstrap 5**: For responsive layout and UI components -- **Plotly.js**: For interactive charts and visualizations -- **jQuery**: For AJAX functionality -- **Font Awesome**: For icons -- **Custom CSS**: For styling enhancements - -### Data Flow - -1. **Upload Process**: - - - File validation - - CSV parsing - - Data normalization - - Record creation - - Association with company - -2. **Dashboard Generation**: - - - Data aggregation - - Statistical calculations - - Chart data preparation - - JSON serialization for frontend - -3. **User Authentication**: - - Login/registration handling - - Session management - - Permission checks - - Access control based on company - -### Deployment Configuration - -- **Docker**: Containerization for consistent deployment -- **Docker Compose**: Multi-container orchestration -- **Nginx**: Web server and static file serving -- **PostgreSQL**: Production-ready database -- **Gunicorn**: WSGI HTTP server - -## API Structure - -While the current implementation does not have a formal REST API, the foundation is in place for adding one in the future: - -1. **Dashboard API**: Already implemented for chart data (JSON responses) -2. **Data Source API**: Potential endpoint for uploading data programmatically -3. **Chat Session API**: Could expose data for external integration - -## Testing and Development - -- **Sample Data Generation**: Management command to create test data -- **Local Development Setup**: Easy configuration with sqlite -- **Production Deployment**: Docker-based for scalability - -## Security Considerations - -- **Authentication**: Django's secure authentication system -- **Data Isolation**: Company-specific queries prevent data leakage -- **Password Management**: Secure password handling -- **CSRF Protection**: Django's built-in CSRF protection -- **Input Validation**: Form validation for all user inputs - -## Future Extensions - -The architecture supports easy extension for: - -1. **API Integration**: Direct connection to chat platforms -2. **Real-time Updates**: WebSockets for live dashboard updates -3. **Advanced Analytics**: Machine learning integration -4. **Customizable Reports**: Report generation and scheduling -5. **Enhanced Visualization**: More chart types and interactive features diff --git a/PRETTIER_SETUP.md b/PRETTIER_SETUP.md deleted file mode 100644 index 60fc5ff..0000000 --- a/PRETTIER_SETUP.md +++ /dev/null @@ -1,97 +0,0 @@ -# Prettier for Django Templates - -This project uses Prettier with the `prettier-plugin-django-annotations` plugin to format HTML templates with Django template syntax. - -## Setup - -The project is already configured with Prettier integration in pre-commit hooks. The configuration includes: - -1. `.prettierrc` - Configuration file with Django HTML support -2. `.prettierignore` - Files to exclude from formatting -3. Pre-commit hook for automatic formatting on commits - -### Manual Installation - -To use Prettier locally (outside of pre-commit hooks), you'll need to install the dependencies: - -```bash -# Using npm -npm install - -# Or install just the required packages -npm install --save-dev prettier prettier-plugin-django-annotations -``` - -## Usage - -### With Pre-commit - -Prettier will automatically run as part of the pre-commit hooks when you commit changes. - -To manually run the pre-commit hooks on all files: - -```bash -pre-commit run prettier --all-files -``` - -### Using npm Scripts - -The package.json includes npm scripts for formatting: - -```bash -# Format all static files -npm run format - -# Check formatting without modifying files -npm run format:check -``` - -### Command Line - -You can also run Prettier directly: - -```bash -# Format a specific file -npx prettier --write path/to/template.html - -# Format all HTML files -npx prettier --write "dashboard_project/templates/**/*.html" -``` - -## VSCode Integration - -For VSCode users, install the Prettier extension and add these settings to your `.vscode/settings.json`: - -```json -{ - "editor.defaultFormatter": "esbenp.prettier-vscode", - "[html]": { - "editor.defaultFormatter": "esbenp.prettier-vscode", - "editor.formatOnSave": true - }, - "prettier.requireConfig": true -} -``` - -## Ignoring Parts of Templates - -If you need to prevent Prettier from formatting a section of your template: - -```html -{# prettier-ignore #} -
This section will not be formatted by Prettier.
- - -
- This works too. -
-``` - -## Django Template Support - -The `prettier-plugin-django-annotations` plugin provides special handling for Django templates, including: - -- Proper formatting of Django template tags (`{% %}`) -- Support for Django template comments (`{# #}`) -- Preservation of Django template variable output (`{{ }}`) -- Special handling for Django template syntax inside HTML attributes diff --git a/PROJECT_OVERVIEW.md b/PROJECT_OVERVIEW.md deleted file mode 100644 index 274c061..0000000 --- a/PROJECT_OVERVIEW.md +++ /dev/null @@ -1,128 +0,0 @@ -# Chat Analytics Dashboard Project - -## Overview - -This Django project creates a multi-tenant dashboard application for analyzing chat session data. Companies can upload their chat data (in CSV format) and view analytics and metrics through an interactive dashboard. The application supports user authentication, role-based access control, and separate data isolation for different companies. - -## Project Structure - -The project consists of two main Django apps: - -1. **accounts**: Handles user authentication, company management, and user roles -2. **dashboard**: Manages data sources, chat sessions, and dashboard visualization - -## Key Features - -- **Multi-company Support**: Each company has their own private dashboards and data -- **User Management**: Different user roles (admin, company admin, regular user) -- **CSV File Upload**: Upload and process CSV files containing chat session data -- **Interactive Dashboard**: Visualize chat data with charts and metrics -- **Search Functionality**: Find specific chat sessions based on various criteria -- **Data Exploration**: Drill down into individual chat sessions for detailed analysis - -## Setup and Installation - -### Requirements - -- Python 3.8+ -- Django 4.0+ -- Other dependencies listed in `requirements.txt` - -### Installation Steps - -1. Clone the repository -2. Set up a virtual environment -3. Install dependencies with `pip install -r requirements.txt` -4. Run database migrations with `python manage.py migrate` -5. Create a superuser with `python manage.py createsuperuser` -6. Start the development server with `python manage.py runserver` - -### Creating Sample Data - -To quickly populate the application with sample data, run: - -```sh -python manage.py create_sample_data -``` - -This will create: - -- An admin user (username: admin, password: admin123) -- Three sample companies -- Company admin users for each company -- Regular users for each company -- Sample chat data for each company -- Default dashboards for each company - -## Models - -### Accounts App - -- **CustomUser**: Extends Django's User model with company association and role -- **Company**: Represents a company with users and data sources - -### Dashboard App - -- **DataSource**: Represents an uploaded CSV file with chat data -- **ChatSession**: Stores individual chat session data parsed from CSV -- **Dashboard**: Allows configuration of custom dashboards with selected data sources - -## Usage Flow - -1. **Admin Setup**: - - - Admin creates companies - - Admin creates users and assigns them to companies - -2. **Company Admin**: - - - Uploads CSV files with chat data - - Creates and configures dashboards - - Manages company users - -3. **Regular Users**: - - View dashboards - - Search and explore chat data - - Analyze chat metrics - -## CSV Format - -The application expects CSV files with the following columns: - -- **session_id**: Unique identifier for each chat session -- **start_time**: When the chat session started -- **end_time**: When the chat session ended -- **ip_address**: User's IP address -- **country**: User's country -- **language**: Language used in the chat -- **messages_sent**: Number of messages in the conversation -- **sentiment**: Sentiment analysis result (Positive, Neutral, Negative) -- **escalated**: Whether the chat was escalated -- **forwarded_hr**: Whether the chat was forwarded to HR -- **full_transcript**: Complete chat transcript -- **avg_response_time**: Average response time in seconds -- **tokens**: Number of tokens used (for AI chat systems) -- **tokens_eur**: Cost of tokens in EUR -- **category**: Chat category or topic -- **initial_msg**: First message from the user -- **user_rating**: User satisfaction rating - -## Deployment - -For production deployment, the project includes: - -- **Dockerfile**: For containerizing the application -- **docker-compose.yml**: For orchestrating the application with PostgreSQL and Nginx -- **Nginx Configuration**: For serving the application and static files - -## Future Enhancements - -- **API Integration**: Direct integration with chat systems -- **Real-time Updates**: Live dashboard updates as new chats occur -- **Advanced Analytics**: More detailed and customizable metrics -- **Export Functionality**: Export reports and analysis -- **Customizable Themes**: Company-specific branding - -## Support - -For any issues or questions, please create an issue in the repository or contact the project maintainers. diff --git a/QUICK_START_GUIDE.md b/QUICK_START_GUIDE.md deleted file mode 100644 index d62a4a0..0000000 --- a/QUICK_START_GUIDE.md +++ /dev/null @@ -1,249 +0,0 @@ -# Chat Analytics Dashboard: Quick Start Guide - -## Getting Started - -This guide will help you quickly set up and start using the Chat Analytics Dashboard. - -### Installation - -#### Option 1: Local Development - -1. **Clone the repository**: - - ```sh - git clone - cd dashboard_project - ``` - -2. **Set up a virtual environment**: - - ```sh - uv venv - source .venv/bin/activate # On Windows: .venv\Scripts\activate - ``` - -3. **Install dependencies**: # from pyproject.toml - - ```sh - uv pip install -r requirements.txt - ``` - -4. **Set up the database**: - - ```sh - python manage.py migrate - ``` - -5. **Create admin user**: - - ```sh - python manage.py createsuperuser - ``` - -6. **Start the development server**: - - ```sh - python manage.py runserver - ``` - -7. **Access the application**: - Open your browser and go to - -#### Option 2: Docker Deployment - -1. **Clone the repository**: - - ```sh - git clone - cd dashboard_project - ``` - -2. **Build and start the containers**: - - ```sh - docker-compose up -d --build - ``` - -3. **Create admin user**: - - ```sh - docker-compose exec web python manage.py createsuperuser - ``` - -4. **Access the application**: - Open your browser and go to - -### Creating Sample Data (Optional) - -To quickly populate the system with sample data: - -```sh -python manage.py create_sample_data -``` - -This will create: - -- Admin user (username: admin, password: admin123) -- Three companies with users -- Sample chat data and dashboards - -## Basic Usage - -### Admin Tasks - -1. **Access Admin Panel**: - - - Go to - - Login with your admin credentials - -2. **Create a Company**: - - - Go to Companies > Add Company - - Fill in the company details and save - -3. **Create Users**: - - Go to Users > Add User - - Fill in user details - - Assign the user to a company - - Set appropriate permissions (staff status, company admin) - -### Company Admin Tasks - -1. **Login to Dashboard**: - - - Go to - - Login with your company admin credentials - -2. **Upload Chat Data**: - - - Click on "Upload Data" in the sidebar - - Fill in the data source details - - Select a CSV file containing chat data - - Click "Upload" - -3. **Create a Dashboard**: - - Click on "New Dashboard" in the sidebar - - Fill in the dashboard details - - Select data sources to include - - Click "Create Dashboard" - -### Regular User Tasks - -1. **View Dashboard**: - - - Login with your user credentials - - The dashboard will show automatically - - Select different dashboards from the sidebar - -2. **Search Chat Sessions**: - - - Click on "Search" in the top navigation - - Enter search terms - - Use filters to refine results - -3. **View Session Details**: - - In search results, click the eye icon for a session - - View complete session information and transcript - -## CSV Format - -Your CSV files should include the following columns: - -| Column | Description | Type | -| ------------------- | ------------------------------- | -------- | -| `session_id` | Unique ID for the chat | String | -| `start_time` | Session start time | Datetime | -| `end_time` | Session end time | Datetime | -| `ip_address` | User's IP address | String | -| `country` | User's country | String | -| `language` | Chat language | String | -| `messages_sent` | Number of messages | Integer | -| `sentiment` | Sentiment analysis result | String | -| `escalated` | Whether chat was escalated | Boolean | -| `forwarded_hr` | Whether chat was sent to HR | Boolean | -| `full_transcript` | Complete chat text | Text | -| `avg_response_time` | Average response time (seconds) | Float | -| `tokens` | Number of tokens used | Integer | -| `tokens_eur` | Cost in EUR | Float | -| `category` | Chat category | String | -| `initial_msg` | First user message | Text | -| `user_rating` | User satisfaction rating | String | - -Example CSV row: - -```csv -acme_1,2023-05-01 10:30:00,2023-05-01 10:45:00,192.168.1.1,USA,English,10,Positive,FALSE,FALSE,"User: Hello\nAgent: Hi there!",2.5,500,0.01,Support,Hello I need help,Good -``` - -## Dashboard Features - -### Overview Panel - -The main dashboard shows: - -- Total chat sessions -- Average response time -- Total tokens used -- Total cost - -### Charts - -The dashboard includes: - -- **Sessions Over Time**: Line chart showing chat volume trends -- **Sentiment Analysis**: Pie chart of positive/negative/neutral chats -- **Top Countries**: Bar chart of user countries -- **Categories**: Distribution of chat categories - -### Data Source Details - -View details for each data source: - -- Upload date and time -- Total sessions -- Source description -- List of all chat sessions from the source - -### Session Details - -For each chat session, you can view: - -- Session metadata (time, location, etc.) -- Full chat transcript -- Performance metrics -- User sentiment and rating - -## Troubleshooting - -### CSV Upload Issues - -If your CSV upload fails: - -- Ensure all required columns are present -- Check date formats (should be YYYY-MM-DD HH:MM:SS) -- Verify boolean values (TRUE/FALSE, Yes/No, 1/0) -- Check for special characters in text fields - -### Access Issues - -If you can't access certain features: - -- Verify your user role (admin, company admin, or regular user) -- Ensure you're assigned to the correct company -- Check if you're trying to access another company's data - -### Empty Dashboard - -If your dashboard is empty: - -- Verify that data sources have been uploaded -- Check that the dashboard is configured to use those data sources -- Ensure the CSV was processed successfully - -## Getting Help - -If you encounter any issues: - -- Check the documentation -- Contact your system administrator -- File an issue in the project repository diff --git a/README.md b/README.md index 2e2f4d5..5ccea97 100644 --- a/README.md +++ b/README.md @@ -2,203 +2,433 @@ A Django application that creates an analytics dashboard for chat session data. The application allows different companies to have their own dashboards and view their own data. -## Features +## Project Overview -- Multi-company support with user authentication -- CSV file upload and processing -- Interactive dashboard with charts and visualizations -- Detailed data views for chat sessions -- Search functionality to find specific chat sessions -- Admin interface for managing users and companies -- Responsive design using Bootstrap 5 +This Django project creates a multi-tenant dashboard application for analyzing chat session data. Companies can upload their chat data (in CSV format) and view analytics and metrics through an interactive dashboard. The application supports user authentication, role-based access control, and separate data isolation for different companies. + +### Project Structure + +The project consists of two main Django apps: + +1. **accounts**: Handles user authentication, company management, and user roles +2. **dashboard**: Manages data sources, chat sessions, and dashboard visualization +3. **data_integration**: Handles external API data integration + +### Key Features + +- **Multi-company Support**: Each company has their own private dashboards and data +- **User Management**: Different user roles (admin, company admin, regular user) +- **CSV File Upload**: Upload and process CSV files containing chat session data +- **Interactive Dashboard**: Visualize chat data with charts and metrics +- **Search Functionality**: Find specific chat sessions based on various criteria +- **Data Export**: Export data in CSV, JSON, and Excel formats +- **Data Exploration**: Drill down into individual chat sessions for detailed analysis +- **Responsive Design**: Mobile-friendly interface using Bootstrap 5 ## Requirements -- Python 3.13+ -- Django 5.2+ -- UV package manager (recommended) -- Other dependencies listed in [`pyproject.toml`](./pyproject.toml) +- Python 3.13+ +- Django 5.2+ +- UV package manager (recommended) +- Other dependencies listed in [`pyproject.toml`](./pyproject.toml) ## Setup ### Local Development -1. Clone the repository: +1. Clone the repository: - ```sh - git clone - cd LiveGraphsDjango - ``` + ```sh + git clone + cd LiveGraphsDjango + ``` -2. Install uv if you don't have it yet: +2. Install uv if you don't have it yet: - ```sh - # Install using pip - pip install uv + ```sh + # Install using pip + pip install uv - # Or with curl (Unix/macOS) - curl -sSf https://install.pypa.io/get-uv.py | python3 - + # Or with curl (Unix/macOS) + curl -sSf https://install.pypa.io/get-uv.py | python3 - - # Or on Windows with PowerShell - irm https://install.pypa.io/get-uv.ps1 | iex - ``` + # Or on Windows with PowerShell + irm https://install.pypa.io/get-uv.ps1 | iex + ``` -3. Create a virtual environment and activate it: +3. Create a virtual environment and activate it: - ```sh - uv venv - source .venv/bin/activate # On Windows: .venv\Scripts\activate - ``` + ```sh + uv venv + source .venv/bin/activate # On Windows: .venv\Scripts\activate + ``` -4. Install dependencies: +4. Install dependencies: - ```sh - # Install all dependencies including dev dependencies - uv pip install -e ".[dev]" + ```sh + # Install all dependencies including dev dependencies + uv pip install -e ".[dev]" - # Or just runtime dependencies - uv pip install -e . - ``` + # Or just runtime dependencies + uv pip install -e . + ``` -5. Run migrations: +5. Run migrations: - ```sh - cd dashboard_project - python manage.py makemigrations - python manage.py migrate - ``` + ```sh + cd dashboard_project + python manage.py makemigrations + python manage.py migrate + ``` -6. Create a superuser: +6. Create a superuser: - ```sh - python manage.py createsuperuser - ``` + ```sh + python manage.py createsuperuser + ``` -7. Set up environment variables: +7. Set up environment variables: - ```sh - # Copy the sample .env file - cp .env.sample .env + ```sh + # Copy the sample .env file + cp .env.sample .env - # Edit the .env file with your credentials - nano .env - ``` + # Edit the .env file with your credentials + nano .env + ``` - Be sure to update: + Be sure to update: - - `EXTERNAL_API_USERNAME` and `EXTERNAL_API_PASSWORD` for the data integration API - - `DJANGO_SECRET_KEY` for production environments - - Redis URL if using a different configuration for Celery + - `EXTERNAL_API_USERNAME` and `EXTERNAL_API_PASSWORD` for the data integration API + - `DJANGO_SECRET_KEY` for production environments + - Redis URL if using a different configuration for Celery -8. Start Celery for background tasks: +8. Start Celery for background tasks: - ```sh - # In a separate terminal - cd dashboard_project - celery -A dashboard_project worker --loglevel=info + ```sh + # In a separate terminal + cd dashboard_project + celery -A dashboard_project worker --loglevel=info - # Start the Celery Beat scheduler in another terminal - cd dashboard_project - celery -A dashboard_project beat --scheduler django_celery_beat.schedulers:DatabaseScheduler - ``` + # Start the Celery Beat scheduler in another terminal + cd dashboard_project + celery -A dashboard_project beat --scheduler django_celery_beat.schedulers:DatabaseScheduler + ``` - Alternative without Redis (using SQLite): + Alternative without Redis (using SQLite): - ```sh - # Set environment variables to use SQLite instead of Redis - export CELERY_BROKER_URL=sqla+sqlite:///celery.sqlite - export CELERY_RESULT_BACKEND=db+sqlite:///results.sqlite + ```sh + # Set environment variables to use SQLite instead of Redis + export CELERY_BROKER_URL=sqla+sqlite:///celery.sqlite + export CELERY_RESULT_BACKEND=db+sqlite:///results.sqlite - # In a separate terminal - cd dashboard_project - celery -A dashboard_project worker --loglevel=info + # In a separate terminal + cd dashboard_project + celery -A dashboard_project worker --loglevel=info - # Start the Celery Beat scheduler in another terminal with the same env vars - cd dashboard_project - celery -A dashboard_project beat --scheduler django_celery_beat.schedulers:DatabaseScheduler - ``` + # Start the Celery Beat scheduler in another terminal with the same env vars + cd dashboard_project + celery -A dashboard_project beat --scheduler django_celery_beat.schedulers:DatabaseScheduler + ``` -9. Run the development server: +9. Run the development server: - ```sh - python manage.py runserver - ``` + ```sh + python manage.py runserver + ``` -10. Access the application at +10. Access the application at ### Development Workflow with UV UV offers several advantages over traditional pip, including faster dependency resolution and installation: -1. Running linting and formatting: +1. Running linting and formatting: - ```sh - # Using the convenience script - ./.scripts/lint.sh + ```sh + # Using the convenience script + ./.scripts/lint.sh - # Or directly - uv run -m ruff check dashboard_project - uv run -m ruff format dashboard_project - uv run -m black dashboard_project - ``` + # Or directly + uv run -m ruff check dashboard_project + uv run -m ruff format dashboard_project + uv run -m black dashboard_project + ``` -2. Running tests: +2. Running tests: - ```sh - # Using the convenience script - ./.scripts/test.sh + ```sh + # Using the convenience script + ./.scripts/test.sh - # Or directly - uv run -m pytest - ``` + # Or directly + uv run -m pytest + ``` -3. Adding new dependencies: +3. Adding new dependencies: - ```sh - # Add to project - uv pip install package_name + ```sh + # Add to project + uv pip install package_name - # Then update pyproject.toml manually - # And update the lockfile - uv pip freeze > requirements.lock - ``` + # Then update pyproject.toml manually + # And update the lockfile + uv pip freeze > requirements.lock + ``` -4. Updating the lockfile: +4. Updating the lockfile: - ```sh - uv pip compile pyproject.toml -o uv.lock - ``` + ```sh + uv pip compile pyproject.toml -o uv.lock + ``` ### Using Docker -1. Clone the repository: +1. Clone the repository: - ```sh - git clone - cd dashboard_project - ``` + ```sh + git clone + cd dashboard_project + ``` -2. Build and run with Docker Compose: +2. Build and run with Docker Compose: - ```sh - docker-compose up -d --build - ``` + ```sh + docker-compose up -d --build + ``` -3. Create a superuser: +3. Create a superuser: - ```sh - docker-compose exec web python manage.py createsuperuser - ``` +```sh +docker-compose exec web python manage.py createsuperuser +``` -4. Access the application at +4. Access the application at -## Usage +## Development Tools -1. Login as the superuser you created. -2. Go to the admin interface () and create companies and users. -3. Assign users to companies. -4. Upload CSV files for each company. -5. View the analytics dashboard. +### Prettier for Django Templates + +This project uses Prettier with the `prettier-plugin-django-annotations` plugin to format HTML templates with Django template syntax. + +#### Prettier Configuration + +The project is already configured with Prettier integration in pre-commit hooks. The configuration includes: + +1. `.prettierrc` - Configuration file with Django HTML support +2. `.prettierignore` - Files to exclude from formatting +3. Pre-commit hook for automatic formatting on commits + +#### Manual Installation + +To use Prettier locally (outside of pre-commit hooks), you'll need to install the dependencies: + +```bash +# Using npm +npm install + +# Or install just the required packages +npm install --save-dev prettier prettier-plugin-django-annotations +``` + +#### Usage + +##### With Pre-commit + +Prettier will automatically run as part of the pre-commit hooks when you commit changes. + +To manually run the pre-commit hooks on all files: + +```bash +pre-commit run prettier --all-files +``` + +##### Using npm Scripts + +The package.json includes npm scripts for formatting: + +```bash +# Format all static files +npm run format + +# Check formatting without modifying files +npm run format:check +``` + +##### Command Line + +You can also run Prettier directly: + +```bash +# Format a specific file +npx prettier --write path/to/template.html + +# Format all HTML files +npx prettier --write "dashboard_project/templates/**/*.html" +``` + +#### VSCode Integration + +For VSCode users, install the Prettier extension and add these settings to your `.vscode/settings.json`: + +```json +{ + "editor.defaultFormatter": "esbenp.prettier-vscode", + "[html]": { + "editor.defaultFormatter": "esbenp.prettier-vscode", + "editor.formatOnSave": true + }, + "prettier.requireConfig": true +} +``` + +#### Ignoring Parts of Templates + +If you need to prevent Prettier from formatting a section of your template: + +```html +{# prettier-ignore #} +
This section will not be formatted by Prettier.
+ + +
+ This works too. +
+``` + +#### Django Template Support + +The `prettier-plugin-django-annotations` plugin provides special handling for Django templates, including: + +- Proper formatting of Django template tags (`{% %}`) +- Support for Django template comments (`{# #}`) +- Preservation of Django template variable output (`{{ }}`) +- Special handling for Django template syntax inside HTML attributes + +## Basic Usage Instructions + +1. Login as the superuser you created. +2. Go to the admin interface () and create companies and users. +3. Assign users to companies. +4. Upload CSV files for each company. +5. View the analytics dashboard. + +## Quick Start Guide + +### Creating Sample Data (Optional) + +To quickly populate the system with sample data: + +```sh +python manage.py create_sample_data +``` + +This will create: + +- Admin user (username: admin, password: admin123) +- Three companies with users +- Sample chat data and dashboards + +### Admin Tasks + +1. **Access Admin Panel**: + + - Go to + - Login with your admin credentials + +2. **Create a Company**: + + - Go to Companies > Add Company + - Fill in the company details and save + +3. **Create Users**: + - Go to Users > Add User + - Fill in user details + - Assign the user to a company + - Set appropriate permissions (staff status, company admin) + +### Company Admin Tasks + +1. **Login to Dashboard**: + + - Go to + - Login with your company admin credentials + +2. **Upload Chat Data**: + + - Click on "Upload Data" in the sidebar + - Fill in the data source details + - Select a CSV file containing chat data + - Click "Upload" + +3. **Create a Dashboard**: + - Click on "New Dashboard" in the sidebar + - Fill in the dashboard details + - Select data sources to include + - Click "Create Dashboard" + +### Regular User Tasks + +1. **View Dashboard**: + + - Login with your user credentials + - The dashboard will show automatically + - Select different dashboards from the sidebar + +2. **Search Chat Sessions**: + + - Click on "Search" in the top navigation + - Enter search terms + - Use filters to refine results + +3. **View Session Details**: + - In search results, click the eye icon for a session + - View complete session information and transcript + +### Dashboard Features + +The dashboard includes: + +- **Sessions Over Time**: Line chart showing chat volume trends +- **Sentiment Analysis**: Pie chart of positive/negative/neutral chats +- **Top Countries**: Bar chart of user countries +- **Categories**: Distribution of chat categories + +### Data Source Details + +View details for each data source: + +- Upload date and time +- Total sessions +- Source description +- List of all chat sessions from the source + +### Troubleshooting + +#### CSV Upload Issues + +If your CSV upload fails: + +- Ensure all required columns are present +- Check date formats (should be YYYY-MM-DD HH:MM:SS) +- Verify boolean values (TRUE/FALSE, Yes/No, 1/0) +- Check for special characters in text fields + +#### Access Issues + +If you can't access certain features: + +- Verify your user role (admin, company admin, or regular user) +- Ensure you're assigned to the correct company +- Check if you're trying to access another company's data + +#### Empty Dashboard + +If your dashboard is empty: + +- Verify that data sources have been uploaded +- Check that the dashboard is configured to use those data sources +- Ensure the CSV was processed successfully ## CSV File Format @@ -224,14 +454,159 @@ The CSV file should contain the following columns: | `initial_msg` | First message from the user (text) | | `user_rating` | User rating of the conversation (string) | +Example CSV row: + +```csv +acme_1,2023-05-01 10:30:00,2023-05-01 10:45:00,192.168.1.1,USA,English,10,Positive,FALSE,FALSE,"User: Hello\nAgent: Hi there!",2.5,500,0.01,Support,Hello I need help,Good +``` + +## Implementation Details + +### Core Features Implemented + +1. **Multi-Tenant Architecture**: + + - Companies have isolated data and user access + - Users belong to specific companies + - Role-based permissions (admin, company admin, regular user) + +2. **Data Management**: + + - CSV file upload and processing + - Data source management + - Chat session records with comprehensive metadata + +3. **Dashboard Visualization**: + + - Interactive charts using Plotly.js + - Key metrics and KPIs + - Time-series analysis + - Geographic distribution + - Sentiment analysis + - Category distribution + +4. **Search and Analysis**: + + - Full-text search across chat sessions + - Filtering by various attributes + - Detailed view of individual chat sessions + - Transcript viewing + +5. **User Management**: + + - User registration and authentication + - Profile management + - Password change functionality + - Role assignment + +6. **Admin Interface**: + + - Company management + - User administration + - Data source oversight + - System-wide configuration + +7. **Responsive Design**: + - Mobile-friendly interface using Bootstrap 5 + - Consistent layout and navigation + - Accessible UI components + +### Technical Implementation + +#### Backend (Django) + +- **Custom User Model**: Extended for company association and roles +- **Database Models**: Structured for efficient data storage and queries +- **View Logic**: Separation of concerns with dedicated view functions +- **Form Handling**: Validated data input and file uploads +- **Data Processing**: CSV parsing and structured storage +- **Template Context**: Prepared data for frontend rendering +- **URL Routing**: Clean URL structure +- **Access Control**: Permission checks throughout + +#### Frontend + +- **Bootstrap 5**: For responsive layout and UI components +- **Plotly.js**: For interactive charts and visualizations +- **jQuery**: For AJAX functionality +- **Font Awesome**: For icons +- **Custom CSS**: For styling enhancements + +#### Data Flow + +1. **Upload Process**: + + - File validation + - CSV parsing + - Data normalization + - Record creation + - Association with company + +2. **Dashboard Generation**: + + - Data aggregation + - Statistical calculations + - Chart data preparation + - JSON serialization for frontend + +3. **User Authentication**: + - Login/registration handling + - Session management + - Permission checks + - Access control based on company + +#### Deployment Configuration + +- **Docker**: Containerization for consistent deployment +- **Docker Compose**: Multi-container orchestration +- **Nginx**: Web server and static file serving +- **PostgreSQL**: Production-ready database +- **Gunicorn**: WSGI HTTP server + +### Models + +#### Accounts App + +- **CustomUser**: Extends Django's User model with company association and role +- **Company**: Represents a company with users and data sources + +#### Dashboard App + +- **DataSource**: Represents an uploaded CSV file with chat data +- **ChatSession**: Stores individual chat session data parsed from CSV +- **Dashboard**: Allows configuration of custom dashboards with selected data sources + +### Usage Flow + +1. **Admin Setup**: + + - Admin creates companies + - Admin creates users and assigns them to companies + +2. **Company Admin**: + + - Uploads CSV files with chat data + - Creates and configures dashboards + - Manages company users + +3. **Regular Users**: + - View dashboards + - Search and explore chat data + - Analyze chat metrics + ## Future Enhancements -- API integration for real-time data -- More advanced visualizations -- Custom reports -- Export functionality -- Theme customization -- User access control with more granular permissions +- API integration for real-time data +- More advanced visualizations +- Custom reports +- Export to additional formats (XML, HTML, PDF) +- Theme customization +- User access control with more granular permissions +- Direct integration with chat platforms via API +- Real-time dashboard updates using WebSockets +- Advanced analytics with machine learning +- Customizable reports and scheduling +- Enhanced visualization options ## License diff --git a/TODO.md b/TODO.md index 03f5b1a..3e17c62 100644 --- a/TODO.md +++ b/TODO.md @@ -4,74 +4,74 @@ ### Responsiveness -- [ ] Fix dashboard graphs scaling/adjustment when zooming (currently requires page refresh) +- [ ] Fix dashboard graphs scaling/adjustment when zooming (currently requires page refresh) ### Theming -- [x] Add dark mode/light mode toggle -- [x] Fix dark mode implementation issues: - - [x] Make charts display properly in dark mode - - [x] Fix the footer not changing color in dark mode - - [x] Adjust the sidebar nav-link styling for dark mode - - [x] Make the navbar have a different background color from the body in dark mode - - [x] Make theme toggle automatically detect and respect the user's system preference - - [x] Fix inconsistency between system dark mode preference and manual toggle - - [x] Ensure charts properly update in both scenarios (system preference and manual toggle) -- [x] Implement smooth theme transitions -- [ ] Add Notso AI branding elements -- [ ] Implement responsive table design (reduce rows to fit screen) +- [x] Add dark mode/light mode toggle +- [x] Fix dark mode implementation issues: + - [x] Make charts display properly in dark mode + - [x] Fix the footer not changing color in dark mode + - [x] Adjust the sidebar nav-link styling for dark mode + - [x] Make the navbar have a different background color from the body in dark mode + - [x] Make theme toggle automatically detect and respect the user's system preference + - [x] Fix inconsistency between system dark mode preference and manual toggle + - [x] Ensure charts properly update in both scenarios (system preference and manual toggle) +- [x] Implement smooth theme transitions +- [ ] Add Notso AI branding elements +- [ ] Implement responsive table design (reduce rows to fit screen) ### Data Export -- [x] Implement multi-format export functionality - - [x] CSV format - - [ ] Excel format - - [x] JSON format - - [ ] XML format - - [ ] HTML format - - [ ] PDF format -- [ ] Create dropdown menu for export options -- [x] Make export data section collapsible (folded by default) -- [x] Add company name, date and timestamp to exported filenames -- [ ] Update [data view](dashboard_project/templates/dashboard/partials/data_table.html) to show maximum 10 rows by default, with a "Show more" button to expand to 50 rows, or "Show all" to display all rows +- [x] Implement multi-format export functionality + - [x] CSV format + - [x] Excel format + - [x] JSON format + - [ ] XML format + - [ ] HTML format + - [ ] PDF format +- [ ] Create dropdown menu for export options +- [x] Make export data section collapsible (folded by default) +- [x] Add company name, date and timestamp to exported filenames +- [ ] Update [data view](dashboard_project/templates/dashboard/partials/data_table.html) to show maximum 10 rows by default, with a "Show more" button to expand to 50 rows, or "Show all" to display all rows ## Admin Interface Enhancements ### Company Management -- [ ] Add company logo upload functionality -- [ ] Add direct CSV download button for each company (superusers only) - - [ ] Include company name, date and timestamp in filename -- [ ] Add UI for customizing CSV column names +- [ ] Add company logo upload functionality +- [ ] Add direct CSV download button for each company (superusers only) +- [ ] Include company name, date and timestamp in filename +- [ ] Add UI for customizing CSV column names ## Data Integration ### External Data Sources -- [ ] Implement periodic data download from external API - - Source: - - Authentication: Basic Auth - - Credentials: [stored securely] - - An example of the data structure can be found in [jumbo.csv](examples/jumbo.csv) - - The file that the endpoint returns is a CSV file, but the file is not a standard CSV file. It has a different structure and format: - - The header row is missing, it is supposed to be `session_id,start_time,end_time,ip_address,country,language,messages_sent,sentiment,escalated,forwarded_hr,full_transcript,avg_response_time,tokens,tokens_eur,category,initial_msg,user_rating` - - [ ] The coupling of endpoint to the company and the authentication method should be handled in the backend and the superuser should be able to change it. - - [ ] The data should be stored in the database and the dashboard should be updated with the new data. - - [ ] The csv also contains a column with full_transcript, which is a uri to a txt file, encoded in utf-8. The txt file is a raw transcript of the chat. - - [ ] The txt file should be downloaded, parsed and stored in the database. - - An example of such txt file can be found in [132f3a8c-3ba5-4d89-ae04-cd83f1bc5272.txt](examples/132f3a8c-3ba5-4d89-ae04-cd83f1bc5272.txt) - - Note that the User and Assistant messages can be multiline and can contain html, which should be safely handled, and if safe, rendered in the frontend. -- [ ] Add scheduling options for data refresh -- [ ] Add UI button to trigger manual data refresh +- [ ] Implement periodic data download from external API + - Source: + - Authentication: Basic Auth + - Credentials: [stored securely] + - An example of the data structure can be found in [jumbo.csv](examples/jumbo.csv) + - The file that the endpoint returns is a CSV file, but the file is not a standard CSV file. It has a different structure and format: + - The header row is missing, it is supposed to be `session_id,start_time,end_time,ip_address,country,language,messages_sent,sentiment,escalated,forwarded_hr,full_transcript,avg_response_time,tokens,tokens_eur,category,initial_msg,user_rating` + - [ ] The coupling of endpoint to the company and the authentication method should be handled in the backend and the superuser should be able to change it. + - [ ] The data should be stored in the database and the dashboard should be updated with the new data. + - [ ] The csv also contains a column with full_transcript, which is a uri to a txt file, encoded in utf-8. The txt file is a raw transcript of the chat. + - [ ] The txt file should be downloaded, parsed and stored in the database. + - An example of such txt file can be found in [132f3a8c-3ba5-4d89-ae04-cd83f1bc5272.txt](examples/132f3a8c-3ba5-4d89-ae04-cd83f1bc5272.txt) + - Note that the User and Assistant messages can be multiline and can contain html, which should be safely handled, and if safe, rendered in the frontend. +- [ ] Add scheduling options for data refresh +- [ ] Add UI button to trigger manual data refresh ## Technical Debt ### Performance Optimization -- [ ] Profile and optimize dashboard rendering -- [ ] Implement lazy loading for dashboard elements +- [ ] Profile and optimize dashboard rendering +- [ ] Implement lazy loading for dashboard elements ### Testing -- [ ] Add unit tests for export functionality -- [ ] Add integration tests for data import process +- [ ] Add unit tests for export functionality +- [ ] Add integration tests for data import process diff --git a/dashboard_project/dashboard/views_export.py b/dashboard_project/dashboard/views_export.py index cca6e81..f43510c 100644 --- a/dashboard_project/dashboard/views_export.py +++ b/dashboard_project/dashboard/views_export.py @@ -1,9 +1,11 @@ # dashboard/views_export.py import csv +import io import json from datetime import timedelta +import xlsxwriter from django.contrib.auth.decorators import login_required from django.db.models import Q from django.http import HttpResponse @@ -207,6 +209,11 @@ def export_chats_json(request): data_source = DataSource.objects.get(id=data_source_id) filename = f"{data_source.name.replace(' ', '_').lower()}_chat_sessions" + # Add company name, date, and timestamp to the filename + current_time = timezone.now().strftime("%Y%m%d_%H%M%S") + company_name = company.name.replace(" ", "_").lower() + filename = f"{company_name}_{filename}_{current_time}" + # Prepare the data for JSON export using list comprehension data = [ { @@ -248,3 +255,188 @@ def export_chats_json(request): json.dump(export_data, response, indent=2) return response + + +@login_required +def export_chats_excel(request): + """Export chat sessions to Excel with filtering options""" + user = request.user + company = user.company + + if not company: + return HttpResponse("You are not associated with any company.", status=403) + + # Get and apply filters + data_source_id = request.GET.get("data_source_id") + dashboard_id = request.GET.get("dashboard_id") + view = request.GET.get("view", "all") + start_date = request.GET.get("start_date") + end_date = request.GET.get("end_date") + country = request.GET.get("country") + sentiment = request.GET.get("sentiment") + escalated = request.GET.get("escalated") + + # Base queryset + sessions = ChatSession.objects.filter(data_source__company=company) + + # Apply data source filter if selected + if data_source_id: + data_source = get_object_or_404(DataSource, id=data_source_id, company=company) + sessions = sessions.filter(data_source=data_source) + + # Apply dashboard filter if selected + if dashboard_id: + dashboard = get_object_or_404(Dashboard, id=dashboard_id, company=company) + data_sources = dashboard.data_sources.all() + sessions = sessions.filter(data_source__in=data_sources) + + # Apply view filter + if view == "recent": + seven_days_ago = timezone.now() - timedelta(days=7) + sessions = sessions.filter(start_time__gte=seven_days_ago) + elif view == "positive": + sessions = sessions.filter(Q(sentiment__icontains="positive")) + elif view == "negative": + sessions = sessions.filter(Q(sentiment__icontains="negative")) + elif view == "escalated": + sessions = sessions.filter(escalated=True) + + # Apply additional filters + if start_date: + sessions = sessions.filter(start_time__date__gte=start_date) + if end_date: + sessions = sessions.filter(start_time__date__lte=end_date) + if country: + sessions = sessions.filter(country__icontains=country) + if sentiment: + sessions = sessions.filter(sentiment__icontains=sentiment) + if escalated: + escalated_val = escalated.lower() == "true" + sessions = sessions.filter(escalated=escalated_val) + + # Order by most recent first + sessions = sessions.order_by("-start_time") + + # Create the filename + filename = "chat_sessions" + if dashboard_id: + dashboard = Dashboard.objects.get(id=dashboard_id) + filename = f"{dashboard.name.replace(' ', '_').lower()}_chat_sessions" + elif data_source_id: + data_source = DataSource.objects.get(id=data_source_id) + filename = f"{data_source.name.replace(' ', '_').lower()}_chat_sessions" + + # Add company name, date, and timestamp to the filename + current_time = timezone.now().strftime("%Y%m%d_%H%M%S") + company_name = company.name.replace(" ", "_").lower() + filename = f"{company_name}_{filename}_{current_time}" + + # Create in-memory output file + output = io.BytesIO() + + # Create Excel workbook and worksheet + workbook = xlsxwriter.Workbook(output) + worksheet = workbook.add_worksheet("Chat Sessions") + + # Add a bold format to use to highlight cells + bold = workbook.add_format({"bold": True, "bg_color": "#D9EAD3"}) + date_format = workbook.add_format({"num_format": "yyyy-mm-dd hh:mm:ss"}) + + # Write header row with formatting + headers = [ + "Session ID", + "Start Time", + "End Time", + "IP Address", + "Country", + "Language", + "Messages Sent", + "Sentiment", + "Escalated", + "Forwarded HR", + "Full Transcript", + "Avg Response Time (s)", + "Tokens", + "Tokens EUR", + "Category", + "Initial Message", + "User Rating", + ] + + for col, header in enumerate(headers): + worksheet.write(0, col, header, bold) + + # Write data rows + for row_num, session in enumerate(sessions, 1): + worksheet.write(row_num, 0, session.session_id) + # Write dates with proper formatting if not None + if session.start_time: + worksheet.write_datetime(row_num, 1, session.start_time, date_format) + else: + worksheet.write(row_num, 1, None) + + if session.end_time: + worksheet.write_datetime(row_num, 2, session.end_time, date_format) + else: + worksheet.write(row_num, 2, None) + + worksheet.write(row_num, 3, session.ip_address) + worksheet.write(row_num, 4, session.country) + worksheet.write(row_num, 5, session.language) + worksheet.write(row_num, 6, session.messages_sent) + worksheet.write(row_num, 7, session.sentiment) + worksheet.write(row_num, 8, "Yes" if session.escalated else "No") + worksheet.write(row_num, 9, "Yes" if session.forwarded_hr else "No") + worksheet.write(row_num, 10, session.full_transcript) + worksheet.write(row_num, 11, session.avg_response_time) + worksheet.write(row_num, 12, session.tokens) + worksheet.write(row_num, 13, session.tokens_eur) + worksheet.write(row_num, 14, session.category) + worksheet.write(row_num, 15, session.initial_msg) + worksheet.write(row_num, 16, session.user_rating) + + # Add summary sheet with metadata + summary = workbook.add_worksheet("Summary") + summary.write(0, 0, "Export Information", bold) + summary.write(1, 0, "Company:", bold) + summary.write(1, 1, company.name) + summary.write(2, 0, "Export Date:", bold) + summary.write(2, 1, timezone.now().strftime("%Y-%m-%d %H:%M:%S")) + summary.write(3, 0, "Total Records:", bold) + summary.write(3, 1, len(sessions)) + + # Add filters if used + filter_row = 5 + summary.write(filter_row, 0, "Filters Applied:", bold) + filter_row += 1 + + if data_source_id: + data_source = DataSource.objects.get(id=data_source_id) + summary.write(filter_row, 0, "Data Source:") + summary.write(filter_row, 1, data_source.name) + filter_row += 1 + + if dashboard_id: + dashboard = Dashboard.objects.get(id=dashboard_id) + summary.write(filter_row, 0, "Dashboard:") + summary.write(filter_row, 1, dashboard.name) + filter_row += 1 + + if view != "all": + summary.write(filter_row, 0, "View:") + summary.write(filter_row, 1, view.title()) + filter_row += 1 + + # Auto-adjust column widths for better readability + for i, width in enumerate([20, 20, 20, 15, 15, 10, 12, 15, 10, 12, 30, 15, 10, 10, 20, 50, 10]): + worksheet.set_column(i, i, width) + + # Close the workbook + workbook.close() + + # Set up the response + output.seek(0) + response = HttpResponse(output, content_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") + response["Content-Disposition"] = f'attachment; filename="{filename}.xlsx"' + + return response diff --git a/dashboard_project/data_integration/utils.py b/dashboard_project/data_integration/utils.py index c5b45da..3f1a871 100644 --- a/dashboard_project/data_integration/utils.py +++ b/dashboard_project/data_integration/utils.py @@ -94,19 +94,41 @@ def fetch_and_store_chat_data(source_id=None): padded_row = row + [""] * (len(header) - len(row)) data = dict(zip(header, padded_row, strict=False)) - try: - # Try European date format (DD.MM.YYYY) first - start_time = make_aware(datetime.strptime(data["start_time"], "%d.%m.%Y %H:%M:%S")) - except ValueError: - # Fallback to ISO format (YYYY-MM-DD) - start_time = make_aware(datetime.strptime(data["start_time"], "%Y-%m-%d %H:%M:%S")) + # Parse date fields with multiple format support + start_time = None + end_time = None - try: - # Try European date format (DD.MM.YYYY) first - end_time = make_aware(datetime.strptime(data["end_time"], "%d.%m.%Y %H:%M:%S")) - except ValueError: - # Fallback to ISO format (YYYY-MM-DD) - end_time = make_aware(datetime.strptime(data["end_time"], "%Y-%m-%d %H:%M:%S")) + # List of date formats to try + date_formats = [ + "%d.%m.%Y %H:%M:%S", # European format: DD.MM.YYYY HH:MM:SS + "%Y-%m-%d %H:%M:%S", # ISO format: YYYY-MM-DD HH:MM:SS + "%m/%d/%Y %H:%M:%S", # US format: MM/DD/YYYY HH:MM:SS + "%Y-%m-%dT%H:%M:%S", # ISO format with T separator + "%Y-%m-%dT%H:%M:%S.%fZ", # ISO format with milliseconds and Z + ] + + # Try to parse start_time with multiple formats + for date_format in date_formats: + try: + start_time = make_aware(datetime.strptime(data["start_time"], date_format)) + break + except (ValueError, TypeError): + continue + + # Try to parse end_time with multiple formats + for date_format in date_formats: + try: + end_time = make_aware(datetime.strptime(data["end_time"], date_format)) + break + except (ValueError, TypeError): + continue + + # If we couldn't parse the dates, log an error and skip this row + if not start_time or not end_time: + error_msg = f"Could not parse date fields for session {data['session_id']}: start_time={data['start_time']}, end_time={data['end_time']}" + logger.error(error_msg) + stats["errors"] += 1 + continue messages_sent = int(data["messages_sent"]) if data["messages_sent"] else None escalated = data["escalated"].lower() == "true" if data["escalated"] else None @@ -199,6 +221,10 @@ def fetch_and_store_transcript(session, timeout=30): def parse_and_store_transcript_messages(session, transcript_content): """Parse and store messages from a transcript. + This function parses a chat transcript that contains messages from both User and Assistant. + It identifies message boundaries by looking for lines that start with common sender patterns, + and groups all following lines until the next sender change as part of that message. + Args: session: The ChatSession object transcript_content: The raw transcript content @@ -206,6 +232,11 @@ def parse_and_store_transcript_messages(session, transcript_content): Returns: int: Number of messages created """ + # Handle empty transcripts + if not transcript_content or transcript_content.strip() == "": + logger.warning(f"Empty transcript received for session {session.session_id}") + return 0 + lines = transcript_content.splitlines() current_sender = None current_message_lines = [] @@ -217,35 +248,285 @@ def parse_and_store_transcript_messages(session, transcript_content): logger.info(f"Deleting {existing_count} existing messages for session {session.session_id}") ChatMessage.objects.filter(session=session).delete() + # Define common message patterns to detect - expanded to include more variations + user_patterns = [ + "User:", + "[User]:", + "Customer:", + "[Customer]:", + "Client:", + "[Client]:", + "Human:", + "[Human]:", + "Me:", + "[Me]:", + "Question:", + "User >", + "Customer >", + "User said:", + "Customer said:", + "User writes:", + "User asked:", + "User message:", + "From user:", + "Client message:", + "Q:", + "Input:", + "Query:", + "Person:", + "Visitor:", + "Guest:", + "User input:", + "User query:", + ] + assistant_patterns = [ + "Assistant:", + "[Assistant]:", + "Agent:", + "[Agent]:", + "Bot:", + "[Bot]:", + "AI:", + "[AI]:", + "ChatGPT:", + "[ChatGPT]:", + "System:", + "[System]:", + "Support:", + "[Support]:", + "Answer:", + "Assistant >", + "Bot >", + "Assistant said:", + "Assistant writes:", + "AI responded:", + "LLM:", + "[LLM]:", + "Response:", + "A:", + "Output:", + "AI output:", + "Model:", + "[Model]:", + "Assistant message:", + "From assistant:", + "Bot response:", + "AI says:", + "NotsoAI:", + "[NotsoAI]:", + "Notso:", + "[Notso]:", + ] + + # Function to save current message before starting a new one + def save_current_message(): + nonlocal current_sender, current_message_lines, messages_created + if current_sender and current_message_lines: + message_text = "\n".join(current_message_lines) + # Only save if there's actual content (not just whitespace) + if message_text.strip() and save_message(session, current_sender, message_text): + messages_created += 1 + logger.debug(f"Saved {current_sender} message with {len(current_message_lines)} lines") + + # Initial scan to detect format type and potential message boundaries + has_recognized_patterns = False + potential_timestamps = [] + timestamp_pattern_count = 0 + + # Regex patterns for common timestamp formats + import re + + timestamp_patterns = [ + r"^\[\d{2}:\d{2}:\d{2}\]", # [HH:MM:SS] + r"^\[\d{2}:\d{2}\]", # [HH:MM] + r"^\(\d{2}:\d{2}:\d{2}\)", # (HH:MM:SS) + r"^\(\d{2}:\d{2}\)", # (HH:MM) + r"^\d{2}:\d{2}:\d{2} -", # HH:MM:SS - + r"^\d{2}:\d{2} -", # HH:MM - + r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", # YYYY-MM-DD HH:MM:SS + ] + + # First pass: detect format and message boundaries + for i, line in enumerate(lines): + line_stripped = line.strip() + + # Check for standard message patterns + if any(line_stripped.startswith(pattern) for pattern in user_patterns + assistant_patterns): + has_recognized_patterns = True + + # Check for timestamp patterns that might indicate message boundaries + for pattern in timestamp_patterns: + if re.match(pattern, line_stripped): + timestamp_pattern_count += 1 + potential_timestamps.append(i) + break + + # If no recognized patterns are found, try to intelligently split the transcript + if not has_recognized_patterns and len(lines) > 0: + logger.info( + f"No standard message patterns found in transcript for session {session.session_id}. Attempting intelligent split." + ) + + # Try timestamp-based parsing if we have enough consistent timestamps + if timestamp_pattern_count > 3 and timestamp_pattern_count > 0.2 * len(lines): + logger.info(f"Attempting timestamp-based parsing with {timestamp_pattern_count} detected timestamps") + + # Add the end of file as a boundary + potential_timestamps.append(len(lines)) + + # Process messages between timestamps + for i in range(len(potential_timestamps) - 1): + start_idx = potential_timestamps[i] + end_idx = potential_timestamps[i + 1] + + message_content = "\n".join(lines[start_idx:end_idx]) + first_line = lines[start_idx].lower() + + # Simple heuristic to identify sender + is_user = any( + user_word in first_line + for user_word in ["user", "customer", "client", "human", "question", "query"] + ) + is_assistant = any( + assistant_word in first_line + for assistant_word in ["assistant", "agent", "bot", "ai", "system", "support", "answer", "response"] + ) + + sender = "User" if (is_user or (not is_assistant and i % 2 == 0)) else "Assistant" + + if save_message(session, sender, message_content): + messages_created += 1 + + logger.info(f"Created {messages_created} messages using timestamp-based parsing") + return messages_created + + # Simple heuristic: alternate between user and assistant, with first message from user + # Start with paragraphs (blank line separations) as message boundaries + paragraphs = [] + current_paragraph = [] + + for line in lines: + if line.strip(): + current_paragraph.append(line) + elif current_paragraph: # Empty line and we have a paragraph + paragraphs.append("\n".join(current_paragraph)) + current_paragraph = [] + + # Add the last paragraph if it's not empty + if current_paragraph: + paragraphs.append("\n".join(current_paragraph)) + + # If we have just one paragraph, try to split by sentence boundaries for very long transcripts + if len(paragraphs) == 1 and len(paragraphs[0].split()) > 100: + import re + + # Try to split by sentence boundaries + text = paragraphs[0] + # Define sentence ending patterns + sentence_endings = r"(?<=[.!?])\s+" + sentences = re.split(sentence_endings, text) + # Group sentences into logical chunks (assuming alternating speakers) + chunks = [] + current_chunk = [] + + for i, sentence in enumerate(sentences): + current_chunk.append(sentence) + # Every 2-3 sentences or on a natural break like a question mark + if (i % 2 == 1 and sentence.endswith("?")) or len(current_chunk) >= 3: + chunks.append(" ".join(current_chunk)) + current_chunk = [] + + # Add any remaining sentences + if current_chunk: + chunks.append(" ".join(current_chunk)) + + # Save the chunks alternating between user and assistant + for i, chunk in enumerate(chunks): + if chunk.strip(): + sender = "User" if i % 2 == 0 else "Assistant" + if save_message(session, sender, chunk): + messages_created += 1 + + logger.info(f"Created {messages_created} messages by splitting single paragraph into sentences") + return messages_created + + # Save messages alternating between user and assistant + for i, paragraph in enumerate(paragraphs): + if paragraph.strip(): # Only save non-empty paragraphs + sender = "User" if i % 2 == 0 else "Assistant" + if save_message(session, sender, paragraph): + messages_created += 1 + + logger.info(f"Created {messages_created} messages using intelligent split for session {session.session_id}") + return messages_created + + # Standard processing with recognized patterns for line in lines: - if line.startswith("User:"): - if ( - current_sender - and current_message_lines - and save_message(session, current_sender, "\n".join(current_message_lines)) - ): - messages_created += 1 + line_stripped = line.strip() + + # Skip empty lines at the beginning + if not line_stripped and not current_sender: + continue + + # Check if this line indicates a new sender + is_user_message = any(line_stripped.startswith(pattern) for pattern in user_patterns) + is_assistant_message = any(line_stripped.startswith(pattern) for pattern in assistant_patterns) + + if is_user_message: + # Save previous message if any + save_current_message() + + # Start new user message current_sender = "User" - current_message_lines = [line.replace("User:", "").strip()] - elif line.startswith("Assistant:"): - if ( - current_sender - and current_message_lines - and save_message(session, current_sender, "\n".join(current_message_lines)) - ): - messages_created += 1 + # Remove the prefix from the line + for pattern in user_patterns: + if line_stripped.startswith(pattern): + line = line[len(pattern) :].strip() + break + current_message_lines = [line] if line.strip() else [] + elif is_assistant_message: + # Save previous message if any + save_current_message() + + # Start new assistant message current_sender = "Assistant" - current_message_lines = [line.replace("Assistant:", "").strip()] + # Remove the prefix from the line + for pattern in assistant_patterns: + if line_stripped.startswith(pattern): + line = line[len(pattern) :].strip() + break + current_message_lines = [line] if line.strip() else [] elif current_sender: - current_message_lines.append(line.strip()) + # Continue adding to current message + current_message_lines.append(line) + else: + # If we get here with no current_sender, assume it's the start of a user message + logger.warning(f"Found line without sender prefix: '{line}'. Assuming User message.") + current_sender = "User" + current_message_lines = [line] # Save the last message - if ( - current_sender - and current_message_lines - and save_message(session, current_sender, "\n".join(current_message_lines)) - ): - messages_created += 1 + save_current_message() + + # Handle case with no messages parsed (possibly incorrectly formatted transcript) + if messages_created == 0 and lines: + logger.warning( + f"No messages were parsed from transcript for session {session.session_id}. Using fallback parsing." + ) + + # Fallback: Just split the transcript in half, first part user, second part assistant + mid_point = len(lines) // 2 + user_content = "\n".join(lines[:mid_point]) + assistant_content = "\n".join(lines[mid_point:]) + + # Save the split messages if they have content + if user_content.strip() and save_message(session, "User", user_content): + messages_created += 1 + + if assistant_content.strip() and save_message(session, "Assistant", assistant_content): + messages_created += 1 + + logger.info(f"Created {messages_created} messages using fallback parsing") logger.info(f"Created {messages_created} messages for session {session.session_id}") return messages_created diff --git a/docs/CELERY_REDIS.md b/docs/CELERY_REDIS.md index c76a19c..cad465f 100644 --- a/docs/CELERY_REDIS.md +++ b/docs/CELERY_REDIS.md @@ -6,10 +6,10 @@ This document explains how to set up and use Redis and Celery for background tas The data integration module uses Celery to handle: -- Periodic data fetching from external APIs -- Processing and storing CSV data -- Downloading and parsing transcript files -- Manual data refresh triggered by users +- Periodic data fetching from external APIs +- Processing and storing CSV data +- Downloading and parsing transcript files +- Manual data refresh triggered by users ## Installation @@ -31,32 +31,33 @@ redis-cli ping # Should output PONG After installation, check if Redis is properly configured: -1. Open Redis configuration file: +1. Open Redis configuration file: - ```bash - sudo nano /etc/redis/redis.conf - ``` + ```bash + sudo nano /etc/redis/redis.conf + ``` -2. Ensure the following settings: +2. Ensure the following settings: - ```bash - # For development (localhost only) - bind 127.0.0.1 + ```bash + # For development (localhost only) + bind 127.0.0.1 - # For production (accept connections from specific IP) - # bind 127.0.0.1 your.server.ip.address + # For production (accept connections from specific IP) + # bind 127.0.0.1 your.server.ip.address - # Protected mode (recommended) - protected-mode yes + # Protected mode (recommended) + protected-mode yes - # Port - port 6379 - ``` + # Port + port 6379 + ``` -3. Restart Redis after any changes: - ```bash - sudo systemctl restart redis-server - ``` +3. Restart Redis after any changes: + + ```bash + sudo systemctl restart redis-server + ``` #### macOS @@ -79,7 +80,7 @@ If Redis is not available, the application will automatically fall back to using Set these environment variables in your `.env` file or deployment environment: -```env +```sh # Redis Configuration REDIS_HOST=localhost REDIS_PORT=6379 @@ -126,28 +127,29 @@ docker-compose up -d Development requires multiple terminal windows: -1. **Django Development Server**: +1. **Django Development Server**: - ```bash - make run - ``` + ```bash + make run + ``` -2. **Redis Server** (if needed): +2. **Redis Server** (if needed): - ```bash - make run-redis - ``` + ```bash + make run-redis + ``` -3. **Celery Worker**: +3. **Celery Worker**: - ```bash - make celery - ``` + ```bash + make celery + ``` -4. **Celery Beat** (for scheduled tasks): - ```bash - make celery-beat - ``` +4. **Celery Beat** (for scheduled tasks): + + ```bash + make celery-beat + ``` Or use the combined command: @@ -161,12 +163,12 @@ make run-all If you see connection errors: -1. Check that Redis is running: `redis-cli ping` should return `PONG` -2. Verify firewall settings are not blocking port 6379 -3. Check Redis binding in `/etc/redis/redis.conf` (should be `bind 127.0.0.1` for local dev) +1. Check that Redis is running: `redis-cli ping` should return `PONG` +2. Verify firewall settings are not blocking port 6379 +3. Check Redis binding in `/etc/redis/redis.conf` (should be `bind 127.0.0.1` for local dev) ### Celery Workers Not Processing Tasks -1. Ensure the worker is running with the correct app name: `celery -A dashboard_project worker` -2. Check the Celery logs for errors -3. Verify broker URL settings in both code and environment variables +1. Ensure the worker is running with the correct app name: `celery -A dashboard_project worker` +2. Check the Celery logs for errors +3. Verify broker URL settings in both code and environment variables diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index b460dfa..ad59e64 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -25,39 +25,40 @@ python manage.py test_redis If this fails, check the following: -1. Redis might not be running. Start it with: +1. Redis might not be running. Start it with: - ```bash - sudo systemctl start redis-server - ``` + ```bash + sudo systemctl start redis-server + ``` -2. Connection credentials may be incorrect. Check your environment variables: +2. Connection credentials may be incorrect. Check your environment variables: - ```bash - echo $REDIS_URL - echo $CELERY_BROKER_URL - echo $CELERY_RESULT_BACKEND - ``` + ```bash + echo $REDIS_URL + echo $CELERY_BROKER_URL + echo $CELERY_RESULT_BACKEND + ``` -3. Redis might be binding only to a specific interface. Check `/etc/redis/redis.conf`: +3. Redis might be binding only to a specific interface. Check `/etc/redis/redis.conf`: - ```bash - grep "bind" /etc/redis/redis.conf - ``` + ```bash + grep "bind" /etc/redis/redis.conf + ``` -4. Firewall rules might be blocking Redis. If you're connecting remotely: - ```bash - sudo ufw status # Check if firewall is enabled - sudo ufw allow 6379/tcp # Allow Redis port if needed - ``` +4. Firewall rules might be blocking Redis. If you're connecting remotely: + + ```bash + sudo ufw status # Check if firewall is enabled + sudo ufw allow 6379/tcp # Allow Redis port if needed + ``` ## Fixing CSV Data Processing Issues If you see the error `zip() argument 2 is shorter than argument 1`, it means the data format doesn't match the expected headers. We've implemented a fix that: -1. Pads shorter rows with empty strings -2. Uses more flexible date format parsing -3. Provides better error handling +1. Pads shorter rows with empty strings +2. Uses more flexible date format parsing +3. Provides better error handling After these changes, your data should be processed correctly regardless of format variations. @@ -77,15 +78,18 @@ python manage.py test_celery If the task isn't completing, check: -1. Look for errors in the Celery worker terminal -2. Verify broker URL settings match in both terminals: - ```bash - echo $CELERY_BROKER_URL - ``` -3. Check if Redis is accessible from both terminals: - ```bash - redis-cli ping - ``` +1. Look for errors in the Celery worker terminal +2. Verify broker URL settings match in both terminals: + + ```bash + echo $CELERY_BROKER_URL + ``` + +3. Check if Redis is accessible from both terminals: + + ```bash + redis-cli ping + ``` ## Checking Scheduled Tasks @@ -99,36 +103,36 @@ python manage.py celery inspect scheduled Common issues with scheduled tasks: -1. **Celery Beat not running**: Start it with: +1. **Celery Beat not running**: Start it with: - ```bash - cd dashboard_project - celery -A dashboard_project beat - ``` + ```bash + cd dashboard_project + celery -A dashboard_project beat + ``` -2. **Task registered but not running**: Check worker logs for any errors +2. **Task registered but not running**: Check worker logs for any errors -3. **Wrong schedule**: Check the interval in settings.py and CELERY_BEAT_SCHEDULE +3. **Wrong schedule**: Check the interval in settings.py and CELERY_BEAT_SCHEDULE ## Data Source Configuration If data sources aren't being processed correctly: -1. Verify active data sources exist: +1. Verify active data sources exist: - ```bash - cd dashboard_project - python manage.py shell -c "from data_integration.models import ExternalDataSource; print(ExternalDataSource.objects.filter(is_active=True).count())" - ``` + ```bash + cd dashboard_project + python manage.py shell -c "from data_integration.models import ExternalDataSource; print(ExternalDataSource.objects.filter(is_active=True).count())" + ``` -2. Create a default data source if needed: +2. Create a default data source if needed: - ```bash - cd dashboard_project - python manage.py create_default_datasource - ``` + ```bash + cd dashboard_project + python manage.py create_default_datasource + ``` -3. Check source URLs and credentials in the admin interface or environment variables. +3. Check source URLs and credentials in the admin interface or environment variables. ## Manually Triggering Data Refresh diff --git a/nginx/conf.d/default.conf b/nginx/conf.d/default.conf index 5a079f7..8a0f11e 100644 --- a/nginx/conf.d/default.conf +++ b/nginx/conf.d/default.conf @@ -1,7 +1,7 @@ # nginx/conf.d/default.conf upstream dashboard { - server web:8000; + server web:8001; } server { diff --git a/package.json b/package.json index 57679ee..48d481e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,35 @@ { "devDependencies": { + "markdownlint-cli2": "^0.18.1", "prettier": "^3.5.3", "prettier-plugin-jinja-template": "^2.1.0" + }, + "scripts": { + "format": "prettier --write .", + "format:check": "prettier --check .", + "lint:md": "markdownlint-cli2 \"**/*.md\" \"!.trunk/**\" \"!.venv/**\" \"!node_modules/**\"", + "lint:md:fix": "markdownlint-cli2 --fix \"**/*.md\" \"!.trunk/**\" \"!.venv/**\" \"!node_modules/**\"" + }, + "markdownlint-cli2": { + "config": { + "MD007": { + "indent": 4, + "start_indented": false, + "start_indent": 4 + }, + "MD013": false, + "MD030": { + "ul_single": 3, + "ol_single": 2, + "ul_multi": 3, + "ol_multi": 2 + }, + "MD033": false + }, + "ignores": [ + "node_modules", + ".git", + "*.json" + ] } } diff --git a/pyproject.toml b/pyproject.toml index 798e452..383a16a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "sqlalchemy>=2.0.41", "tinycss2>=1.4.0", "whitenoise>=6.9.0", + "xlsxwriter>=3.2.3", ] [dependency-groups] diff --git a/requirements.txt b/requirements.txt index c469a06..f1e4023 100644 --- a/requirements.txt +++ b/requirements.txt @@ -479,3 +479,7 @@ whitenoise==6.9.0 \ --hash=sha256:8c4a7c9d384694990c26f3047e118c691557481d624f069b7f7752a2f735d609 \ --hash=sha256:c8a489049b7ee9889617bb4c274a153f3d979e8f51d2efd0f5b403caf41c57df # via livegraphsdjango +xlsxwriter==3.2.3 \ + --hash=sha256:593f8296e8a91790c6d0378ab08b064f34a642b3feb787cf6738236bd0a4860d \ + --hash=sha256:ad6fd41bdcf1b885876b1f6b7087560aecc9ae5a9cc2ba97dcac7ab2e210d3d5 + # via livegraphsdjango diff --git a/uv.lock b/uv.lock index 21dbbfc..f5a4a92 100644 --- a/uv.lock +++ b/uv.lock @@ -482,6 +482,7 @@ dependencies = [ { name = "sqlalchemy" }, { name = "tinycss2" }, { name = "whitenoise" }, + { name = "xlsxwriter" }, ] [package.dev-dependencies] @@ -517,6 +518,7 @@ requires-dist = [ { name = "sqlalchemy", specifier = ">=2.0.41" }, { name = "tinycss2", specifier = ">=1.4.0" }, { name = "whitenoise", specifier = ">=6.9.0" }, + { name = "xlsxwriter", specifier = ">=3.2.3" }, ] [package.metadata.requires-dev] @@ -1058,3 +1060,12 @@ sdist = { url = "https://files.pythonhosted.org/packages/b9/cf/c15c2f21aee6b22a9 wheels = [ { url = "https://files.pythonhosted.org/packages/64/b2/2ce9263149fbde9701d352bda24ea1362c154e196d2fda2201f18fc585d7/whitenoise-6.9.0-py3-none-any.whl", hash = "sha256:c8a489049b7ee9889617bb4c274a153f3d979e8f51d2efd0f5b403caf41c57df", size = 20161, upload-time = "2025-02-06T22:16:32.589Z" }, ] + +[[package]] +name = "xlsxwriter" +version = "3.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/d1/e026d33dd5d552e5bf3a873dee54dad66b550230df8290d79394f09b2315/xlsxwriter-3.2.3.tar.gz", hash = "sha256:ad6fd41bdcf1b885876b1f6b7087560aecc9ae5a9cc2ba97dcac7ab2e210d3d5", size = 209135, upload-time = "2025-04-17T10:11:23.481Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/b1/a252d499f2760b314fcf264d2b36fcc4343a1ecdb25492b210cb0db70a68/XlsxWriter-3.2.3-py3-none-any.whl", hash = "sha256:593f8296e8a91790c6d0378ab08b064f34a642b3feb787cf6738236bd0a4860d", size = 169433, upload-time = "2025-04-17T10:11:21.329Z" }, +]