Enhance data integration and transcript parsing

- Improved date parsing in fetch_and_store_chat_data to support multiple formats and added error logging for unparseable dates.
- Enhanced parse_and_store_transcript_messages to handle empty transcripts and expanded message pattern recognition for both User and Assistant.
- Implemented intelligent splitting of transcripts based on detected patterns and timestamps, with fallback mechanisms for unrecognized formats.
- Updated documentation for Celery and Redis setup, troubleshooting, and project structure.
- Added markdown linting configuration and scripts for code formatting.
- Updated Nginx configuration to change the web server port.
- Added xlsxwriter dependency for Excel file handling in project requirements.
This commit is contained in:
2025-05-18 19:18:31 +00:00
parent 8bbbb109bd
commit f0ae061fa7
24 changed files with 1672 additions and 931 deletions

View File

@ -61,20 +61,64 @@
"customizations": {
"vscode": {
"extensions": [
"bierner.github-markdown-preview",
"bierner.markdown-mermaid",
"bierner.markdown-preview-github-styles",
"charliermarsh.ruff",
"CS50.ddb50",
"DavidAnson.vscode-markdownlint",
"esbenp.prettier-vscode",
"GitHub.copilot-chat",
"GitHub.copilot-workspace",
"GitHub.remotehub",
"github.vscode-github-actions",
"ms-vscode.copilot-mermaid-diagram",
"ms-vscode.vscode-copilot-data-analysis",
"ms-vscode.vscode-copilot-vision",
"ms-vscode.vscode-github-issue-notebooks",
"ms-vscode.vscode-websearchforcopilot",
"PyCQA.bandit-pycqa",
"samuelcolvin.jinjahtml",
"shd101wyy.markdown-preview-enhanced",
"tamasfe.even-better-toml",
"timonwong.shellcheck",
"trunk.io"
"trunk.io",
"VisualStudioExptTeam.intellicode-api-usage-examples",
"yzhang.markdown-all-in-one"
],
"settings": {
"github.copilot.chat.codeGeneration.instructions": [
{
"text": "This dev container includes an up-to-date version of Git, built from source as needed, pre-installed and available on the `PATH`."
},
{
"text": "This dev container includes the Docker CLI (`docker`) pre-installed and available on the `PATH` for running and managing containers using a dedicated Docker daemon running inside the dev container."
},
{
"text": "This dev container includes an up-to-date version of Git, built from source as needed, pre-installed and available on the `PATH`."
},
{
"text": "This dev container includes Go and common Go utilities pre-installed and available on the `PATH`, along with the Go language extension for Go development."
},
{
"text": "This dev container includes `node`, `npm` and `eslint` pre-installed and available on the `PATH` for Node.js and JavaScript development."
},
{
"text": "This dev container includes `node`, `npm` and `eslint` pre-installed and available on the `PATH` for Node.js and JavaScript development."
},
{
"text": "This dev container includes `python3` and `pip3` pre-installed and available on the `PATH`, along with the Python language extensions for Python development."
},
{
"text": "This dev container includes an SSH server so that you can use an external terminal, sftp, or SSHFS to interact with it. The first time you've started the container, you will want to set a password for your user. With each connection to the container, you'll want to forward the SSH port to your local machine and use a local terminal or other tool to connect using the password you set."
},
{
"text": "This dev container includes the GitHub CLI (`gh`), which is pre-installed and available on the `PATH`. IMPORTANT: `gh api -f` does not support object values, use multiple `-f` flags with hierarchical keys and string values instead. When using GitHub actions `actions/upload-artifact` or `actions/download-artifact` use v4 or later."
},
{
"text": "This workspace is in a dev container running on \"Ubuntu 22.04.5 LTS\".\n\nUse `\"$BROWSER\" <url>` to open a webpage in the host's default browser.\n\nSome of the command line tools available on the `PATH`: `apt`, `dpkg`, `docker`, `git`, `gh`, `curl`, `wget`, `ssh`, `scp`, `rsync`, `gpg`, `ps`, `lsof`, `netstat`, `top`, `tree`, `find`, `grep`, `zip`, `unzip`, `tar`, `gzip`, `bzip2`, `xz`"
}
],
"[css]": {
"editor.defaultFormatter": "esbenp.prettier-vscode",
"editor.formatOnSave": true
@ -87,6 +131,10 @@
"editor.defaultFormatter": "esbenp.prettier-vscode",
"editor.formatOnSave": true
},
"[markdown]": {
"editor.defaultFormatter": "DavidAnson.vscode-markdownlint",
"editor.formatOnSave": true
},
"[python]": {
"editor.codeActionsOnSave": {
"source.fixAll": "explicit",
@ -130,14 +178,19 @@
"fileMatch": ["*/devcontainer.json"],
"url": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json"
}
]
],
"markdownlint.config": {
"MD007": {
"indent": 4
}
}
}
}
},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
"forwardPorts": [6379, 8001],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "sudo apt update && sudo apt full-upgrade -y && sudo apt autoremove -y; sudo apt install -y ruby-foreman; npm i -g prettier prettier-plugin-jinja-template; redis-server --daemonize yes; uname -a; export UV_LINK_MODE=copy; uv python install; uv pip install -Ur pyproject.toml"
"postCreateCommand": "bash .devcontainer/postCreateCommand.sh"
// Configure tool-specific properties.
// "customizations": {},
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.

View File

@ -0,0 +1,86 @@
#!/usr/bin/env bash
sudo apt update
sudo apt full-upgrade -y
sudo apt autoremove -y;
sudo apt install -y ruby-foreman;
npm install --save-dev prettier prettier-plugin-jinja-template markdownlint-cli2;
# Install ohmyposh
curl -s https://ohmyposh.dev/install.sh | bash -s
if ! command -v oh-my-posh &> /dev/null; then
echo "Failed to install oh-my-posh. Please check the installation script."
else
echo "oh-my-posh installed successfully."
echo "Setting up oh-my-posh theme..."
# Install fonts using proper bash array syntax
fonts=("Hack" "AnonymousPro" "Noto")
for font in "${fonts[@]}"; do
oh-my-posh font install "$font"
done
fi
# Setup shell completions
echo "Setting up shell completions..."
# if uv, uvx, ruff, or oh-my-posh are not available, don't add shell completions for that tool
if ! command -v uv &> /dev/null; then
echo "uv not found, skipping shell completion setup for uv"
else
echo "uv found, setting up shell completion"
uv generate-shell-completion bash > ~/.cache/uv-completion.bash
fi
if ! command -v uvx &> /dev/null; then
echo "uvx not found, skipping shell completion setup for uvx"
else
echo "uvx found, setting up shell completion"
uvx --generate-shell-completion bash > ~/.cache/uvx-completion.bash
fi
if ! command -v ruff &> /dev/null; then
echo "ruff not found, skipping shell completion setup for ruff"
else
echo "ruff found, setting up shell completion"
ruff generate-shell-completion bash > ~/.cache/ruff-completion.bash
fi
if ! command -v oh-my-posh &> /dev/null; then
echo "oh-my-posh not found, skipping shell completion setup for oh-my-posh"
else
echo "oh-my-posh found, setting up shell completion"
oh-my-posh init bash --config ~/.cache/oh-my-posh/themes/paradox.omp.json > ~/.cache/oh-my-posh-completion.bash
fi
# Check if ~/.bashrc already contains the completion setup
if ! grep -q 'uv generate-shell-completion' ~/.bashrc; then
echo "Adding shell completions to ~/.bashrc"
cat << EOF >> ~/.bashrc
# Shell completions
if [ -f ~/.cache/uv-completion.bash ]; then
source ~/.cache/uv-completion.bash
fi
if [ -f ~/.cache/uvx-completion.bash ]; then
source ~/.cache/uvx-completion.bash
fi
if [ -f ~/.cache/ruff-completion.bash ]; then
source ~/.cache/ruff-completion.bash
fi
if [ -f ~/.cache/oh-my-posh-completion.bash ]; then
source ~/.cache/oh-my-posh-completion.bash
fi
export UV_LINK_MODE=copy;
EOF
echo "Shell completions added to ~/.bashrc"
else
echo "Shell completions already present in ~/.bashrc"
fi
uv python install
uv pip install -Ur pyproject.toml --group dev
redis-server --daemonize yes;

265
.github/copilot-instructions.md vendored Normal file
View File

@ -0,0 +1,265 @@
# Instructions for Copilot
## General Instructions
- Use clear and concise language.
- Provide code examples where applicable.
- Write clean code with Django best practices.
- Use comments to explain complex logic.
- Use packages and libraries where appropriate and possible to avoid reinventing the wheel.
- Update [TODO](TODO.md), [README](README.md) as fits.
## uv
UV is a fast Python package and project manager written in Rust. Use UV to manage dependencies, virtual environments, and run Python scripts with improved performance.
### Running Python Scripts
- Execute a Python script with uv:
```bash
uv run python ${FILE}.py
```
- Run a script with a specific Python version:
```bash
uv run python3.8 ${FILE}.py
```
- Run a script with arguments:
```bash
uv run python ${FILE}.py --arg1 value1 --arg2 value2
```
- Add dependencies to standalone scripts:
```bash
uv add --script <package-name> ${FILE}.py
```
- Remove dependencies from a script:
```bash
uv remove --script <package-name> ${FILE}.py
```
### Package Management
- Install packages:
```bash
uv pip install <package-name>
```
- Install from requirements file:
```bash
uv pip install -r requirements.txt
```
- Add a package to current project:
```bash
uv add <package-name>
```
- Remove a package:
```bash
uv remove <package-name>
```
### Virtual Environment Management
- Create and activate a virtual environment:
```bash
uv venv .venv
source .venv/bin/activate # Linux/macOS
```
- Install project dependencies into an environment:
```bash
uv pip sync
```
- Lock dependencies for reproducible environments:
```bash
uv lock
```
### Project Management
- Create a new Python project:
```bash
uv init <project-name>
```
- Build a project into distribution archives:
```bash
uv build
```
- View dependency tree:
```bash
uv tree
```
- Publish package to PyPI:
```bash
uv publish
```
### Python Version Management
- Install specific Python version:
```bash
uv python install 3.11
```
- List available Python versions:
```bash
uv python list
```
- Find installed Python version:
```bash
uv python find
```
- Pin project to specific Python version:
```bash
uv python pin 3.11
```
### Performance Benefits
- UV offers significantly faster package installations than pip
- Built-in caching improves repeated operations
- Compatible with existing Python tooling ecosystem
- Reliable dependency resolution to avoid conflicts
## Project Structure
This section provides a comprehensive overview of the LiveGraphsDjango project structure and the function of each key file. Please update this section whenever there are noteworthy changes to the structure or to a file's function.
```tree
LiveGraphsDjango/
├── dashboard_project/ # Main Django project directory
│ ├── __init__.py # Python package declaration
│ ├── __main__.py # Entry point for running module as script
│ ├── asgi.py # ASGI configuration for async web servers
│ ├── manage.py # Django command-line utility
│ ├── wsgi.py # WSGI configuration for web servers
│ ├── accounts/ # User authentication and company management
│ │ ├── admin.py # Admin interface for accounts
│ │ ├── forms.py # User registration and login forms
│ │ ├── models.py # User and Company models
│ │ ├── urls.py # URL routing for accounts
│ │ └── views.py # View functions for user authentication
│ ├── dashboard/ # Core dashboard functionality
│ │ ├── admin.py # Admin interface for dashboard components
│ │ ├── forms.py # Dashboard configuration forms
│ │ ├── models.py # Dashboard, DataSource models
│ │ ├── signals.py # Signal handlers for dashboard events
│ │ ├── urls.py # URL routing for dashboard
│ │ ├── utils.py # Utility functions for dashboard
│ │ ├── views.py # Main dashboard view functions
│ │ ├── views_export.py # Data export views (CSV, JSON, Excel)
│ │ ├── management/ # Custom management commands
│ │ │ └── commands/ # Django management commands
│ │ ├── migrations/ # Database migrations
│ │ └── templatetags/ # Custom template tags
│ ├── dashboard_project/ # Project settings and configuration
│ │ ├── settings.py # Django settings
│ │ ├── urls.py # Main URL configuration
│ │ └── celery.py # Celery configuration for async tasks
│ ├── data_integration/ # External data integration
│ │ ├── admin.py # Admin interface for data sources
│ │ ├── models.py # ExternalDataSource, ChatSession models
│ │ ├── tasks.py # Celery tasks for data fetching
│ │ ├── urls.py # URL routing for data integration
│ │ ├── utils.py # Data fetching and transformation utilities
│ │ └── views.py # Views for data source management
│ ├── media/ # User-uploaded files
│ │ └── data_sources/ # Uploaded CSV data sources
│ ├── scripts/ # Utility scripts
│ │ ├── cleanup_duplicates.py # Script to remove duplicate data
│ │ └── fix_dashboard_data.py # Script to fix corrupt dashboard data
│ ├── static/ # Static assets (CSS, JS, images)
│ │ ├── css/ # Stylesheets
│ │ ├── img/ # Images
│ │ └── js/ # JavaScript files
│ └── templates/ # HTML templates
│ ├── base.html # Base template with common layout
│ ├── accounts/ # Account-related templates
│ └── dashboard/ # Dashboard-related templates
├── docs/ # Project documentation
│ ├── CELERY_REDIS.md # Celery and Redis setup guide
│ └── TROUBLESHOOTING.md # Common issues and solutions
├── examples/ # Example data files
│ ├── 132f3a8c-3ba5-4d89-ae04-cd83f1bc5272.txt # Sample transcript
│ ├── jumbo.csv # Sample chat data
│ ├── sample.csv # Generic sample data
│ └── sessions.csv # Sample session data
├── nginx/ # Nginx configuration
│ └── conf.d/ # Nginx site configs
├── .github/ # GitHub-specific files
│ └── copilot-instructions.md # Instructions for GitHub Copilot (this file)
├── dev.sh # Development environment setup script
├── docker-compose.yml # Docker Compose configuration
├── Dockerfile # Docker image definition
├── IMPLEMENTATION_SUMMARY.md # Implementation details and status
├── Makefile # Common commands for the project
├── Procfile # Heroku deployment configuration
├── PROJECT_OVERVIEW.md # Project overview and architecture
├── pyproject.toml # Python project configuration
├── QUICK_START_GUIDE.md # Getting started guide
├── README.md # Project introduction and overview
├── requirements.txt # Python dependencies
├── start.sh # Production startup script
└── TODO.md # Pending tasks and features
```
### Key Component Relationships
1. **Multi-Tenant Architecture**:
- Companies are the top-level organizational unit
- Users belong to Companies and have different permission levels
- DataSources are owned by Companies
- Dashboards display analytics based on DataSources
2. **Data Integration Flow**:
- External APIs are configured via ExternalDataSource models
- Data is fetched, parsed, and stored as ChatSessions and ChatMessages
- Dashboard views aggregate and visualize this data
3. **Export Functionality**:
- Export available in CSV, JSON, and Excel formats
- Filtering options to customize exported data
### Important Note
**Please update this section whenever:**
1. New files or directories are added to the project
2. The function of existing files changes significantly
3. New relationships between components are established
4. The architecture of the application changes
This ensures that anyone working with GitHub Copilot has an up-to-date understanding of the project structure.

17
.markdownlint.json Normal file
View File

@ -0,0 +1,17 @@
{
"default": true,
"MD007": {
"indent": 4,
"start_indented": false,
"start_indent": 4
},
"MD013": false,
"MD029": false,
"MD030": {
"ul_single": 3,
"ol_single": 2,
"ul_multi": 3,
"ol_multi": 2
},
"MD033": false
}

View File

@ -57,6 +57,12 @@ repos:
# types_or: [javascript, jsx, ts, tsx, css, scss, json, yaml, markdown]
# exclude: '.*\.html$'
- repo: https://github.com/DavidAnson/markdownlint-cli2
rev: v0.18.1
hooks:
- id: markdownlint-cli2
args: [--fix]
# Ruff for linting and formatting
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.10

View File

@ -48,5 +48,9 @@ docker-compose.override.yml
*.swp
*.swo
# Ignore all SQLite3 files:
**/*.sqlite3
# Ignore all SQLite files:
*.sqlite3
*.sqlite
# Ignore markdown
*.md

View File

@ -18,6 +18,15 @@
"options": {
"parser": "jinja-template"
}
},
{
"files": ["*.md", "*.markdown"],
"options": {
"tabWidth": 2,
"useTabs": false,
"proseWrap": "preserve",
"printWidth": 100
}
}
],
"plugins": ["prettier-plugin-jinja-template"]

3
.uv
View File

@ -16,3 +16,6 @@ environment-checks = ["python", "dependencies"]
# How to resolve dependencies not specified with exact versions
dependency-resolution = "strict"
# If the cache and target directories are on different filesystems, hardlinking may not be supported.
link-mode = "copy"

View File

@ -43,5 +43,12 @@
"notebook.source.organizeImports": "explicit"
},
"notebook.formatOnSave.enabled": true,
"prettier.requireConfig": true
"prettier.requireConfig": true,
"markdownlint.config": {
"default": true,
},
"[markdown]": {
"editor.defaultFormatter": "DavidAnson.vscode-markdownlint",
"editor.formatOnSave": true
}
}

View File

@ -1,134 +0,0 @@
# Chat Analytics Dashboard: Implementation Summary
## Core Features Implemented
1. **Multi-Tenant Architecture**:
- Companies have isolated data and user access
- Users belong to specific companies
- Role-based permissions (admin, company admin, regular user)
2. **Data Management**:
- CSV file upload and processing
- Data source management
- Chat session records with comprehensive metadata
3. **Dashboard Visualization**:
- Interactive charts using Plotly.js
- Key metrics and KPIs
- Time-series analysis
- Geographic distribution
- Sentiment analysis
- Category distribution
4. **Search and Analysis**:
- Full-text search across chat sessions
- Filtering by various attributes
- Detailed view of individual chat sessions
- Transcript viewing
5. **User Management**:
- User registration and authentication
- Profile management
- Password change functionality
- Role assignment
6. **Admin Interface**:
- Company management
- User administration
- Data source oversight
- System-wide configuration
7. **Responsive Design**:
- Mobile-friendly interface using Bootstrap 5
- Consistent layout and navigation
- Accessible UI components
## Technical Implementation
### Backend (Django)
- **Custom User Model**: Extended for company association and roles
- **Database Models**: Structured for efficient data storage and queries
- **View Logic**: Separation of concerns with dedicated view functions
- **Form Handling**: Validated data input and file uploads
- **Data Processing**: CSV parsing and structured storage
- **Template Context**: Prepared data for frontend rendering
- **URL Routing**: Clean URL structure
- **Access Control**: Permission checks throughout
### Frontend
- **Bootstrap 5**: For responsive layout and UI components
- **Plotly.js**: For interactive charts and visualizations
- **jQuery**: For AJAX functionality
- **Font Awesome**: For icons
- **Custom CSS**: For styling enhancements
### Data Flow
1. **Upload Process**:
- File validation
- CSV parsing
- Data normalization
- Record creation
- Association with company
2. **Dashboard Generation**:
- Data aggregation
- Statistical calculations
- Chart data preparation
- JSON serialization for frontend
3. **User Authentication**:
- Login/registration handling
- Session management
- Permission checks
- Access control based on company
### Deployment Configuration
- **Docker**: Containerization for consistent deployment
- **Docker Compose**: Multi-container orchestration
- **Nginx**: Web server and static file serving
- **PostgreSQL**: Production-ready database
- **Gunicorn**: WSGI HTTP server
## API Structure
While the current implementation does not have a formal REST API, the foundation is in place for adding one in the future:
1. **Dashboard API**: Already implemented for chart data (JSON responses)
2. **Data Source API**: Potential endpoint for uploading data programmatically
3. **Chat Session API**: Could expose data for external integration
## Testing and Development
- **Sample Data Generation**: Management command to create test data
- **Local Development Setup**: Easy configuration with sqlite
- **Production Deployment**: Docker-based for scalability
## Security Considerations
- **Authentication**: Django's secure authentication system
- **Data Isolation**: Company-specific queries prevent data leakage
- **Password Management**: Secure password handling
- **CSRF Protection**: Django's built-in CSRF protection
- **Input Validation**: Form validation for all user inputs
## Future Extensions
The architecture supports easy extension for:
1. **API Integration**: Direct connection to chat platforms
2. **Real-time Updates**: WebSockets for live dashboard updates
3. **Advanced Analytics**: Machine learning integration
4. **Customizable Reports**: Report generation and scheduling
5. **Enhanced Visualization**: More chart types and interactive features

View File

@ -1,97 +0,0 @@
# Prettier for Django Templates
This project uses Prettier with the `prettier-plugin-django-annotations` plugin to format HTML templates with Django template syntax.
## Setup
The project is already configured with Prettier integration in pre-commit hooks. The configuration includes:
1. `.prettierrc` - Configuration file with Django HTML support
2. `.prettierignore` - Files to exclude from formatting
3. Pre-commit hook for automatic formatting on commits
### Manual Installation
To use Prettier locally (outside of pre-commit hooks), you'll need to install the dependencies:
```bash
# Using npm
npm install
# Or install just the required packages
npm install --save-dev prettier prettier-plugin-django-annotations
```
## Usage
### With Pre-commit
Prettier will automatically run as part of the pre-commit hooks when you commit changes.
To manually run the pre-commit hooks on all files:
```bash
pre-commit run prettier --all-files
```
### Using npm Scripts
The package.json includes npm scripts for formatting:
```bash
# Format all static files
npm run format
# Check formatting without modifying files
npm run format:check
```
### Command Line
You can also run Prettier directly:
```bash
# Format a specific file
npx prettier --write path/to/template.html
# Format all HTML files
npx prettier --write "dashboard_project/templates/**/*.html"
```
## VSCode Integration
For VSCode users, install the Prettier extension and add these settings to your `.vscode/settings.json`:
```json
{
"editor.defaultFormatter": "esbenp.prettier-vscode",
"[html]": {
"editor.defaultFormatter": "esbenp.prettier-vscode",
"editor.formatOnSave": true
},
"prettier.requireConfig": true
}
```
## Ignoring Parts of Templates
If you need to prevent Prettier from formatting a section of your template:
```html
{# prettier-ignore #}
<div>This section will not be formatted by Prettier.</div>
<!-- prettier-ignore -->
<div>
This works too.
</div>
```
## Django Template Support
The `prettier-plugin-django-annotations` plugin provides special handling for Django templates, including:
- Proper formatting of Django template tags (`{% %}`)
- Support for Django template comments (`{# #}`)
- Preservation of Django template variable output (`{{ }}`)
- Special handling for Django template syntax inside HTML attributes

View File

@ -1,128 +0,0 @@
# Chat Analytics Dashboard Project
## Overview
This Django project creates a multi-tenant dashboard application for analyzing chat session data. Companies can upload their chat data (in CSV format) and view analytics and metrics through an interactive dashboard. The application supports user authentication, role-based access control, and separate data isolation for different companies.
## Project Structure
The project consists of two main Django apps:
1. **accounts**: Handles user authentication, company management, and user roles
2. **dashboard**: Manages data sources, chat sessions, and dashboard visualization
## Key Features
- **Multi-company Support**: Each company has their own private dashboards and data
- **User Management**: Different user roles (admin, company admin, regular user)
- **CSV File Upload**: Upload and process CSV files containing chat session data
- **Interactive Dashboard**: Visualize chat data with charts and metrics
- **Search Functionality**: Find specific chat sessions based on various criteria
- **Data Exploration**: Drill down into individual chat sessions for detailed analysis
## Setup and Installation
### Requirements
- Python 3.8+
- Django 4.0+
- Other dependencies listed in `requirements.txt`
### Installation Steps
1. Clone the repository
2. Set up a virtual environment
3. Install dependencies with `pip install -r requirements.txt`
4. Run database migrations with `python manage.py migrate`
5. Create a superuser with `python manage.py createsuperuser`
6. Start the development server with `python manage.py runserver`
### Creating Sample Data
To quickly populate the application with sample data, run:
```sh
python manage.py create_sample_data
```
This will create:
- An admin user (username: admin, password: admin123)
- Three sample companies
- Company admin users for each company
- Regular users for each company
- Sample chat data for each company
- Default dashboards for each company
## Models
### Accounts App
- **CustomUser**: Extends Django's User model with company association and role
- **Company**: Represents a company with users and data sources
### Dashboard App
- **DataSource**: Represents an uploaded CSV file with chat data
- **ChatSession**: Stores individual chat session data parsed from CSV
- **Dashboard**: Allows configuration of custom dashboards with selected data sources
## Usage Flow
1. **Admin Setup**:
- Admin creates companies
- Admin creates users and assigns them to companies
2. **Company Admin**:
- Uploads CSV files with chat data
- Creates and configures dashboards
- Manages company users
3. **Regular Users**:
- View dashboards
- Search and explore chat data
- Analyze chat metrics
## CSV Format
The application expects CSV files with the following columns:
- **session_id**: Unique identifier for each chat session
- **start_time**: When the chat session started
- **end_time**: When the chat session ended
- **ip_address**: User's IP address
- **country**: User's country
- **language**: Language used in the chat
- **messages_sent**: Number of messages in the conversation
- **sentiment**: Sentiment analysis result (Positive, Neutral, Negative)
- **escalated**: Whether the chat was escalated
- **forwarded_hr**: Whether the chat was forwarded to HR
- **full_transcript**: Complete chat transcript
- **avg_response_time**: Average response time in seconds
- **tokens**: Number of tokens used (for AI chat systems)
- **tokens_eur**: Cost of tokens in EUR
- **category**: Chat category or topic
- **initial_msg**: First message from the user
- **user_rating**: User satisfaction rating
## Deployment
For production deployment, the project includes:
- **Dockerfile**: For containerizing the application
- **docker-compose.yml**: For orchestrating the application with PostgreSQL and Nginx
- **Nginx Configuration**: For serving the application and static files
## Future Enhancements
- **API Integration**: Direct integration with chat systems
- **Real-time Updates**: Live dashboard updates as new chats occur
- **Advanced Analytics**: More detailed and customizable metrics
- **Export Functionality**: Export reports and analysis
- **Customizable Themes**: Company-specific branding
## Support
For any issues or questions, please create an issue in the repository or contact the project maintainers.

View File

@ -1,249 +0,0 @@
# Chat Analytics Dashboard: Quick Start Guide
## Getting Started
This guide will help you quickly set up and start using the Chat Analytics Dashboard.
### Installation
#### Option 1: Local Development
1. **Clone the repository**:
```sh
git clone <repository-url>
cd dashboard_project
```
2. **Set up a virtual environment**:
```sh
uv venv
source .venv/bin/activate # On Windows: .venv\Scripts\activate
```
3. **Install dependencies**: # from pyproject.toml
```sh
uv pip install -r requirements.txt
```
4. **Set up the database**:
```sh
python manage.py migrate
```
5. **Create admin user**:
```sh
python manage.py createsuperuser
```
6. **Start the development server**:
```sh
python manage.py runserver
```
7. **Access the application**:
Open your browser and go to <http://127.0.0.1:8000/>
#### Option 2: Docker Deployment
1. **Clone the repository**:
```sh
git clone <repository-url>
cd dashboard_project
```
2. **Build and start the containers**:
```sh
docker-compose up -d --build
```
3. **Create admin user**:
```sh
docker-compose exec web python manage.py createsuperuser
```
4. **Access the application**:
Open your browser and go to <http://localhost/>
### Creating Sample Data (Optional)
To quickly populate the system with sample data:
```sh
python manage.py create_sample_data
```
This will create:
- Admin user (username: admin, password: admin123)
- Three companies with users
- Sample chat data and dashboards
## Basic Usage
### Admin Tasks
1. **Access Admin Panel**:
- Go to <http://localhost/admin/>
- Login with your admin credentials
2. **Create a Company**:
- Go to Companies > Add Company
- Fill in the company details and save
3. **Create Users**:
- Go to Users > Add User
- Fill in user details
- Assign the user to a company
- Set appropriate permissions (staff status, company admin)
### Company Admin Tasks
1. **Login to Dashboard**:
- Go to <http://localhost/>
- Login with your company admin credentials
2. **Upload Chat Data**:
- Click on "Upload Data" in the sidebar
- Fill in the data source details
- Select a CSV file containing chat data
- Click "Upload"
3. **Create a Dashboard**:
- Click on "New Dashboard" in the sidebar
- Fill in the dashboard details
- Select data sources to include
- Click "Create Dashboard"
### Regular User Tasks
1. **View Dashboard**:
- Login with your user credentials
- The dashboard will show automatically
- Select different dashboards from the sidebar
2. **Search Chat Sessions**:
- Click on "Search" in the top navigation
- Enter search terms
- Use filters to refine results
3. **View Session Details**:
- In search results, click the eye icon for a session
- View complete session information and transcript
## CSV Format
Your CSV files should include the following columns:
| Column | Description | Type |
| ------------------- | ------------------------------- | -------- |
| `session_id` | Unique ID for the chat | String |
| `start_time` | Session start time | Datetime |
| `end_time` | Session end time | Datetime |
| `ip_address` | User's IP address | String |
| `country` | User's country | String |
| `language` | Chat language | String |
| `messages_sent` | Number of messages | Integer |
| `sentiment` | Sentiment analysis result | String |
| `escalated` | Whether chat was escalated | Boolean |
| `forwarded_hr` | Whether chat was sent to HR | Boolean |
| `full_transcript` | Complete chat text | Text |
| `avg_response_time` | Average response time (seconds) | Float |
| `tokens` | Number of tokens used | Integer |
| `tokens_eur` | Cost in EUR | Float |
| `category` | Chat category | String |
| `initial_msg` | First user message | Text |
| `user_rating` | User satisfaction rating | String |
Example CSV row:
```csv
acme_1,2023-05-01 10:30:00,2023-05-01 10:45:00,192.168.1.1,USA,English,10,Positive,FALSE,FALSE,"User: Hello\nAgent: Hi there!",2.5,500,0.01,Support,Hello I need help,Good
```
## Dashboard Features
### Overview Panel
The main dashboard shows:
- Total chat sessions
- Average response time
- Total tokens used
- Total cost
### Charts
The dashboard includes:
- **Sessions Over Time**: Line chart showing chat volume trends
- **Sentiment Analysis**: Pie chart of positive/negative/neutral chats
- **Top Countries**: Bar chart of user countries
- **Categories**: Distribution of chat categories
### Data Source Details
View details for each data source:
- Upload date and time
- Total sessions
- Source description
- List of all chat sessions from the source
### Session Details
For each chat session, you can view:
- Session metadata (time, location, etc.)
- Full chat transcript
- Performance metrics
- User sentiment and rating
## Troubleshooting
### CSV Upload Issues
If your CSV upload fails:
- Ensure all required columns are present
- Check date formats (should be YYYY-MM-DD HH:MM:SS)
- Verify boolean values (TRUE/FALSE, Yes/No, 1/0)
- Check for special characters in text fields
### Access Issues
If you can't access certain features:
- Verify your user role (admin, company admin, or regular user)
- Ensure you're assigned to the correct company
- Check if you're trying to access another company's data
### Empty Dashboard
If your dashboard is empty:
- Verify that data sources have been uploaded
- Check that the dashboard is configured to use those data sources
- Ensure the CSV was processed successfully
## Getting Help
If you encounter any issues:
- Check the documentation
- Contact your system administrator
- File an issue in the project repository

395
README.md
View File

@ -2,15 +2,28 @@
A Django application that creates an analytics dashboard for chat session data. The application allows different companies to have their own dashboards and view their own data.
## Features
## Project Overview
- Multi-company support with user authentication
- CSV file upload and processing
- Interactive dashboard with charts and visualizations
- Detailed data views for chat sessions
- Search functionality to find specific chat sessions
- Admin interface for managing users and companies
- Responsive design using Bootstrap 5
This Django project creates a multi-tenant dashboard application for analyzing chat session data. Companies can upload their chat data (in CSV format) and view analytics and metrics through an interactive dashboard. The application supports user authentication, role-based access control, and separate data isolation for different companies.
### Project Structure
The project consists of two main Django apps:
1. **accounts**: Handles user authentication, company management, and user roles
2. **dashboard**: Manages data sources, chat sessions, and dashboard visualization
3. **data_integration**: Handles external API data integration
### Key Features
- **Multi-company Support**: Each company has their own private dashboards and data
- **User Management**: Different user roles (admin, company admin, regular user)
- **CSV File Upload**: Upload and process CSV files containing chat session data
- **Interactive Dashboard**: Visualize chat data with charts and metrics
- **Search Functionality**: Find specific chat sessions based on various criteria
- **Data Export**: Export data in CSV, JSON, and Excel formats
- **Data Exploration**: Drill down into individual chat sessions for detailed analysis
- **Responsive Design**: Mobile-friendly interface using Bootstrap 5
## Requirements
@ -192,7 +205,107 @@ UV offers several advantages over traditional pip, including faster dependency r
4. Access the application at <http://localhost/>
## Usage
## Development Tools
### Prettier for Django Templates
This project uses Prettier with the `prettier-plugin-django-annotations` plugin to format HTML templates with Django template syntax.
#### Prettier Configuration
The project is already configured with Prettier integration in pre-commit hooks. The configuration includes:
1. `.prettierrc` - Configuration file with Django HTML support
2. `.prettierignore` - Files to exclude from formatting
3. Pre-commit hook for automatic formatting on commits
#### Manual Installation
To use Prettier locally (outside of pre-commit hooks), you'll need to install the dependencies:
```bash
# Using npm
npm install
# Or install just the required packages
npm install --save-dev prettier prettier-plugin-django-annotations
```
#### Usage
##### With Pre-commit
Prettier will automatically run as part of the pre-commit hooks when you commit changes.
To manually run the pre-commit hooks on all files:
```bash
pre-commit run prettier --all-files
```
##### Using npm Scripts
The package.json includes npm scripts for formatting:
```bash
# Format all static files
npm run format
# Check formatting without modifying files
npm run format:check
```
##### Command Line
You can also run Prettier directly:
```bash
# Format a specific file
npx prettier --write path/to/template.html
# Format all HTML files
npx prettier --write "dashboard_project/templates/**/*.html"
```
#### VSCode Integration
For VSCode users, install the Prettier extension and add these settings to your `.vscode/settings.json`:
```json
{
"editor.defaultFormatter": "esbenp.prettier-vscode",
"[html]": {
"editor.defaultFormatter": "esbenp.prettier-vscode",
"editor.formatOnSave": true
},
"prettier.requireConfig": true
}
```
#### Ignoring Parts of Templates
If you need to prevent Prettier from formatting a section of your template:
```html
{# prettier-ignore #}
<div>This section will not be formatted by Prettier.</div>
<!-- prettier-ignore -->
<div>
This works too.
</div>
```
#### Django Template Support
The `prettier-plugin-django-annotations` plugin provides special handling for Django templates, including:
- Proper formatting of Django template tags (`{% %}`)
- Support for Django template comments (`{# #}`)
- Preservation of Django template variable output (`{{ }}`)
- Special handling for Django template syntax inside HTML attributes
## Basic Usage Instructions
1. Login as the superuser you created.
2. Go to the admin interface (<http://localhost/admin/>) and create companies and users.
@ -200,6 +313,123 @@ UV offers several advantages over traditional pip, including faster dependency r
4. Upload CSV files for each company.
5. View the analytics dashboard.
## Quick Start Guide
### Creating Sample Data (Optional)
To quickly populate the system with sample data:
```sh
python manage.py create_sample_data
```
This will create:
- Admin user (username: admin, password: admin123)
- Three companies with users
- Sample chat data and dashboards
### Admin Tasks
1. **Access Admin Panel**:
- Go to <http://localhost/admin/>
- Login with your admin credentials
2. **Create a Company**:
- Go to Companies > Add Company
- Fill in the company details and save
3. **Create Users**:
- Go to Users > Add User
- Fill in user details
- Assign the user to a company
- Set appropriate permissions (staff status, company admin)
### Company Admin Tasks
1. **Login to Dashboard**:
- Go to <http://localhost/>
- Login with your company admin credentials
2. **Upload Chat Data**:
- Click on "Upload Data" in the sidebar
- Fill in the data source details
- Select a CSV file containing chat data
- Click "Upload"
3. **Create a Dashboard**:
- Click on "New Dashboard" in the sidebar
- Fill in the dashboard details
- Select data sources to include
- Click "Create Dashboard"
### Regular User Tasks
1. **View Dashboard**:
- Login with your user credentials
- The dashboard will show automatically
- Select different dashboards from the sidebar
2. **Search Chat Sessions**:
- Click on "Search" in the top navigation
- Enter search terms
- Use filters to refine results
3. **View Session Details**:
- In search results, click the eye icon for a session
- View complete session information and transcript
### Dashboard Features
The dashboard includes:
- **Sessions Over Time**: Line chart showing chat volume trends
- **Sentiment Analysis**: Pie chart of positive/negative/neutral chats
- **Top Countries**: Bar chart of user countries
- **Categories**: Distribution of chat categories
### Data Source Details
View details for each data source:
- Upload date and time
- Total sessions
- Source description
- List of all chat sessions from the source
### Troubleshooting
#### CSV Upload Issues
If your CSV upload fails:
- Ensure all required columns are present
- Check date formats (should be YYYY-MM-DD HH:MM:SS)
- Verify boolean values (TRUE/FALSE, Yes/No, 1/0)
- Check for special characters in text fields
#### Access Issues
If you can't access certain features:
- Verify your user role (admin, company admin, or regular user)
- Ensure you're assigned to the correct company
- Check if you're trying to access another company's data
#### Empty Dashboard
If your dashboard is empty:
- Verify that data sources have been uploaded
- Check that the dashboard is configured to use those data sources
- Ensure the CSV was processed successfully
## CSV File Format
The CSV file should contain the following columns:
@ -224,14 +454,159 @@ The CSV file should contain the following columns:
| `initial_msg` | First message from the user (text) |
| `user_rating` | User rating of the conversation (string) |
Example CSV row:
```csv
acme_1,2023-05-01 10:30:00,2023-05-01 10:45:00,192.168.1.1,USA,English,10,Positive,FALSE,FALSE,"User: Hello\nAgent: Hi there!",2.5,500,0.01,Support,Hello I need help,Good
```
## Implementation Details
### Core Features Implemented
1. **Multi-Tenant Architecture**:
- Companies have isolated data and user access
- Users belong to specific companies
- Role-based permissions (admin, company admin, regular user)
2. **Data Management**:
- CSV file upload and processing
- Data source management
- Chat session records with comprehensive metadata
3. **Dashboard Visualization**:
- Interactive charts using Plotly.js
- Key metrics and KPIs
- Time-series analysis
- Geographic distribution
- Sentiment analysis
- Category distribution
4. **Search and Analysis**:
- Full-text search across chat sessions
- Filtering by various attributes
- Detailed view of individual chat sessions
- Transcript viewing
5. **User Management**:
- User registration and authentication
- Profile management
- Password change functionality
- Role assignment
6. **Admin Interface**:
- Company management
- User administration
- Data source oversight
- System-wide configuration
7. **Responsive Design**:
- Mobile-friendly interface using Bootstrap 5
- Consistent layout and navigation
- Accessible UI components
### Technical Implementation
#### Backend (Django)
- **Custom User Model**: Extended for company association and roles
- **Database Models**: Structured for efficient data storage and queries
- **View Logic**: Separation of concerns with dedicated view functions
- **Form Handling**: Validated data input and file uploads
- **Data Processing**: CSV parsing and structured storage
- **Template Context**: Prepared data for frontend rendering
- **URL Routing**: Clean URL structure
- **Access Control**: Permission checks throughout
#### Frontend
- **Bootstrap 5**: For responsive layout and UI components
- **Plotly.js**: For interactive charts and visualizations
- **jQuery**: For AJAX functionality
- **Font Awesome**: For icons
- **Custom CSS**: For styling enhancements
#### Data Flow
1. **Upload Process**:
- File validation
- CSV parsing
- Data normalization
- Record creation
- Association with company
2. **Dashboard Generation**:
- Data aggregation
- Statistical calculations
- Chart data preparation
- JSON serialization for frontend
3. **User Authentication**:
- Login/registration handling
- Session management
- Permission checks
- Access control based on company
#### Deployment Configuration
- **Docker**: Containerization for consistent deployment
- **Docker Compose**: Multi-container orchestration
- **Nginx**: Web server and static file serving
- **PostgreSQL**: Production-ready database
- **Gunicorn**: WSGI HTTP server
### Models
#### Accounts App
- **CustomUser**: Extends Django's User model with company association and role
- **Company**: Represents a company with users and data sources
#### Dashboard App
- **DataSource**: Represents an uploaded CSV file with chat data
- **ChatSession**: Stores individual chat session data parsed from CSV
- **Dashboard**: Allows configuration of custom dashboards with selected data sources
### Usage Flow
1. **Admin Setup**:
- Admin creates companies
- Admin creates users and assigns them to companies
2. **Company Admin**:
- Uploads CSV files with chat data
- Creates and configures dashboards
- Manages company users
3. **Regular Users**:
- View dashboards
- Search and explore chat data
- Analyze chat metrics
## Future Enhancements
- API integration for real-time data
- More advanced visualizations
- Custom reports
- Export functionality
- Export to additional formats (XML, HTML, PDF)
- Theme customization
- User access control with more granular permissions
- Direct integration with chat platforms via API
- Real-time dashboard updates using WebSockets
- Advanced analytics with machine learning
- Customizable reports and scheduling
- Enhanced visualization options
## License

View File

@ -25,7 +25,7 @@
- [x] Implement multi-format export functionality
- [x] CSV format
- [ ] Excel format
- [x] Excel format
- [x] JSON format
- [ ] XML format
- [ ] HTML format

View File

@ -1,9 +1,11 @@
# dashboard/views_export.py
import csv
import io
import json
from datetime import timedelta
import xlsxwriter
from django.contrib.auth.decorators import login_required
from django.db.models import Q
from django.http import HttpResponse
@ -207,6 +209,11 @@ def export_chats_json(request):
data_source = DataSource.objects.get(id=data_source_id)
filename = f"{data_source.name.replace(' ', '_').lower()}_chat_sessions"
# Add company name, date, and timestamp to the filename
current_time = timezone.now().strftime("%Y%m%d_%H%M%S")
company_name = company.name.replace(" ", "_").lower()
filename = f"{company_name}_{filename}_{current_time}"
# Prepare the data for JSON export using list comprehension
data = [
{
@ -248,3 +255,188 @@ def export_chats_json(request):
json.dump(export_data, response, indent=2)
return response
@login_required
def export_chats_excel(request):
"""Export chat sessions to Excel with filtering options"""
user = request.user
company = user.company
if not company:
return HttpResponse("You are not associated with any company.", status=403)
# Get and apply filters
data_source_id = request.GET.get("data_source_id")
dashboard_id = request.GET.get("dashboard_id")
view = request.GET.get("view", "all")
start_date = request.GET.get("start_date")
end_date = request.GET.get("end_date")
country = request.GET.get("country")
sentiment = request.GET.get("sentiment")
escalated = request.GET.get("escalated")
# Base queryset
sessions = ChatSession.objects.filter(data_source__company=company)
# Apply data source filter if selected
if data_source_id:
data_source = get_object_or_404(DataSource, id=data_source_id, company=company)
sessions = sessions.filter(data_source=data_source)
# Apply dashboard filter if selected
if dashboard_id:
dashboard = get_object_or_404(Dashboard, id=dashboard_id, company=company)
data_sources = dashboard.data_sources.all()
sessions = sessions.filter(data_source__in=data_sources)
# Apply view filter
if view == "recent":
seven_days_ago = timezone.now() - timedelta(days=7)
sessions = sessions.filter(start_time__gte=seven_days_ago)
elif view == "positive":
sessions = sessions.filter(Q(sentiment__icontains="positive"))
elif view == "negative":
sessions = sessions.filter(Q(sentiment__icontains="negative"))
elif view == "escalated":
sessions = sessions.filter(escalated=True)
# Apply additional filters
if start_date:
sessions = sessions.filter(start_time__date__gte=start_date)
if end_date:
sessions = sessions.filter(start_time__date__lte=end_date)
if country:
sessions = sessions.filter(country__icontains=country)
if sentiment:
sessions = sessions.filter(sentiment__icontains=sentiment)
if escalated:
escalated_val = escalated.lower() == "true"
sessions = sessions.filter(escalated=escalated_val)
# Order by most recent first
sessions = sessions.order_by("-start_time")
# Create the filename
filename = "chat_sessions"
if dashboard_id:
dashboard = Dashboard.objects.get(id=dashboard_id)
filename = f"{dashboard.name.replace(' ', '_').lower()}_chat_sessions"
elif data_source_id:
data_source = DataSource.objects.get(id=data_source_id)
filename = f"{data_source.name.replace(' ', '_').lower()}_chat_sessions"
# Add company name, date, and timestamp to the filename
current_time = timezone.now().strftime("%Y%m%d_%H%M%S")
company_name = company.name.replace(" ", "_").lower()
filename = f"{company_name}_{filename}_{current_time}"
# Create in-memory output file
output = io.BytesIO()
# Create Excel workbook and worksheet
workbook = xlsxwriter.Workbook(output)
worksheet = workbook.add_worksheet("Chat Sessions")
# Add a bold format to use to highlight cells
bold = workbook.add_format({"bold": True, "bg_color": "#D9EAD3"})
date_format = workbook.add_format({"num_format": "yyyy-mm-dd hh:mm:ss"})
# Write header row with formatting
headers = [
"Session ID",
"Start Time",
"End Time",
"IP Address",
"Country",
"Language",
"Messages Sent",
"Sentiment",
"Escalated",
"Forwarded HR",
"Full Transcript",
"Avg Response Time (s)",
"Tokens",
"Tokens EUR",
"Category",
"Initial Message",
"User Rating",
]
for col, header in enumerate(headers):
worksheet.write(0, col, header, bold)
# Write data rows
for row_num, session in enumerate(sessions, 1):
worksheet.write(row_num, 0, session.session_id)
# Write dates with proper formatting if not None
if session.start_time:
worksheet.write_datetime(row_num, 1, session.start_time, date_format)
else:
worksheet.write(row_num, 1, None)
if session.end_time:
worksheet.write_datetime(row_num, 2, session.end_time, date_format)
else:
worksheet.write(row_num, 2, None)
worksheet.write(row_num, 3, session.ip_address)
worksheet.write(row_num, 4, session.country)
worksheet.write(row_num, 5, session.language)
worksheet.write(row_num, 6, session.messages_sent)
worksheet.write(row_num, 7, session.sentiment)
worksheet.write(row_num, 8, "Yes" if session.escalated else "No")
worksheet.write(row_num, 9, "Yes" if session.forwarded_hr else "No")
worksheet.write(row_num, 10, session.full_transcript)
worksheet.write(row_num, 11, session.avg_response_time)
worksheet.write(row_num, 12, session.tokens)
worksheet.write(row_num, 13, session.tokens_eur)
worksheet.write(row_num, 14, session.category)
worksheet.write(row_num, 15, session.initial_msg)
worksheet.write(row_num, 16, session.user_rating)
# Add summary sheet with metadata
summary = workbook.add_worksheet("Summary")
summary.write(0, 0, "Export Information", bold)
summary.write(1, 0, "Company:", bold)
summary.write(1, 1, company.name)
summary.write(2, 0, "Export Date:", bold)
summary.write(2, 1, timezone.now().strftime("%Y-%m-%d %H:%M:%S"))
summary.write(3, 0, "Total Records:", bold)
summary.write(3, 1, len(sessions))
# Add filters if used
filter_row = 5
summary.write(filter_row, 0, "Filters Applied:", bold)
filter_row += 1
if data_source_id:
data_source = DataSource.objects.get(id=data_source_id)
summary.write(filter_row, 0, "Data Source:")
summary.write(filter_row, 1, data_source.name)
filter_row += 1
if dashboard_id:
dashboard = Dashboard.objects.get(id=dashboard_id)
summary.write(filter_row, 0, "Dashboard:")
summary.write(filter_row, 1, dashboard.name)
filter_row += 1
if view != "all":
summary.write(filter_row, 0, "View:")
summary.write(filter_row, 1, view.title())
filter_row += 1
# Auto-adjust column widths for better readability
for i, width in enumerate([20, 20, 20, 15, 15, 10, 12, 15, 10, 12, 30, 15, 10, 10, 20, 50, 10]):
worksheet.set_column(i, i, width)
# Close the workbook
workbook.close()
# Set up the response
output.seek(0)
response = HttpResponse(output, content_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
response["Content-Disposition"] = f'attachment; filename="{filename}.xlsx"'
return response

View File

@ -94,19 +94,41 @@ def fetch_and_store_chat_data(source_id=None):
padded_row = row + [""] * (len(header) - len(row))
data = dict(zip(header, padded_row, strict=False))
try:
# Try European date format (DD.MM.YYYY) first
start_time = make_aware(datetime.strptime(data["start_time"], "%d.%m.%Y %H:%M:%S"))
except ValueError:
# Fallback to ISO format (YYYY-MM-DD)
start_time = make_aware(datetime.strptime(data["start_time"], "%Y-%m-%d %H:%M:%S"))
# Parse date fields with multiple format support
start_time = None
end_time = None
# List of date formats to try
date_formats = [
"%d.%m.%Y %H:%M:%S", # European format: DD.MM.YYYY HH:MM:SS
"%Y-%m-%d %H:%M:%S", # ISO format: YYYY-MM-DD HH:MM:SS
"%m/%d/%Y %H:%M:%S", # US format: MM/DD/YYYY HH:MM:SS
"%Y-%m-%dT%H:%M:%S", # ISO format with T separator
"%Y-%m-%dT%H:%M:%S.%fZ", # ISO format with milliseconds and Z
]
# Try to parse start_time with multiple formats
for date_format in date_formats:
try:
# Try European date format (DD.MM.YYYY) first
end_time = make_aware(datetime.strptime(data["end_time"], "%d.%m.%Y %H:%M:%S"))
except ValueError:
# Fallback to ISO format (YYYY-MM-DD)
end_time = make_aware(datetime.strptime(data["end_time"], "%Y-%m-%d %H:%M:%S"))
start_time = make_aware(datetime.strptime(data["start_time"], date_format))
break
except (ValueError, TypeError):
continue
# Try to parse end_time with multiple formats
for date_format in date_formats:
try:
end_time = make_aware(datetime.strptime(data["end_time"], date_format))
break
except (ValueError, TypeError):
continue
# If we couldn't parse the dates, log an error and skip this row
if not start_time or not end_time:
error_msg = f"Could not parse date fields for session {data['session_id']}: start_time={data['start_time']}, end_time={data['end_time']}"
logger.error(error_msg)
stats["errors"] += 1
continue
messages_sent = int(data["messages_sent"]) if data["messages_sent"] else None
escalated = data["escalated"].lower() == "true" if data["escalated"] else None
@ -199,6 +221,10 @@ def fetch_and_store_transcript(session, timeout=30):
def parse_and_store_transcript_messages(session, transcript_content):
"""Parse and store messages from a transcript.
This function parses a chat transcript that contains messages from both User and Assistant.
It identifies message boundaries by looking for lines that start with common sender patterns,
and groups all following lines until the next sender change as part of that message.
Args:
session: The ChatSession object
transcript_content: The raw transcript content
@ -206,6 +232,11 @@ def parse_and_store_transcript_messages(session, transcript_content):
Returns:
int: Number of messages created
"""
# Handle empty transcripts
if not transcript_content or transcript_content.strip() == "":
logger.warning(f"Empty transcript received for session {session.session_id}")
return 0
lines = transcript_content.splitlines()
current_sender = None
current_message_lines = []
@ -217,36 +248,286 @@ def parse_and_store_transcript_messages(session, transcript_content):
logger.info(f"Deleting {existing_count} existing messages for session {session.session_id}")
ChatMessage.objects.filter(session=session).delete()
# Define common message patterns to detect - expanded to include more variations
user_patterns = [
"User:",
"[User]:",
"Customer:",
"[Customer]:",
"Client:",
"[Client]:",
"Human:",
"[Human]:",
"Me:",
"[Me]:",
"Question:",
"User >",
"Customer >",
"User said:",
"Customer said:",
"User writes:",
"User asked:",
"User message:",
"From user:",
"Client message:",
"Q:",
"Input:",
"Query:",
"Person:",
"Visitor:",
"Guest:",
"User input:",
"User query:",
]
assistant_patterns = [
"Assistant:",
"[Assistant]:",
"Agent:",
"[Agent]:",
"Bot:",
"[Bot]:",
"AI:",
"[AI]:",
"ChatGPT:",
"[ChatGPT]:",
"System:",
"[System]:",
"Support:",
"[Support]:",
"Answer:",
"Assistant >",
"Bot >",
"Assistant said:",
"Assistant writes:",
"AI responded:",
"LLM:",
"[LLM]:",
"Response:",
"A:",
"Output:",
"AI output:",
"Model:",
"[Model]:",
"Assistant message:",
"From assistant:",
"Bot response:",
"AI says:",
"NotsoAI:",
"[NotsoAI]:",
"Notso:",
"[Notso]:",
]
# Function to save current message before starting a new one
def save_current_message():
nonlocal current_sender, current_message_lines, messages_created
if current_sender and current_message_lines:
message_text = "\n".join(current_message_lines)
# Only save if there's actual content (not just whitespace)
if message_text.strip() and save_message(session, current_sender, message_text):
messages_created += 1
logger.debug(f"Saved {current_sender} message with {len(current_message_lines)} lines")
# Initial scan to detect format type and potential message boundaries
has_recognized_patterns = False
potential_timestamps = []
timestamp_pattern_count = 0
# Regex patterns for common timestamp formats
import re
timestamp_patterns = [
r"^\[\d{2}:\d{2}:\d{2}\]", # [HH:MM:SS]
r"^\[\d{2}:\d{2}\]", # [HH:MM]
r"^\(\d{2}:\d{2}:\d{2}\)", # (HH:MM:SS)
r"^\(\d{2}:\d{2}\)", # (HH:MM)
r"^\d{2}:\d{2}:\d{2} -", # HH:MM:SS -
r"^\d{2}:\d{2} -", # HH:MM -
r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", # YYYY-MM-DD HH:MM:SS
]
# First pass: detect format and message boundaries
for i, line in enumerate(lines):
line_stripped = line.strip()
# Check for standard message patterns
if any(line_stripped.startswith(pattern) for pattern in user_patterns + assistant_patterns):
has_recognized_patterns = True
# Check for timestamp patterns that might indicate message boundaries
for pattern in timestamp_patterns:
if re.match(pattern, line_stripped):
timestamp_pattern_count += 1
potential_timestamps.append(i)
break
# If no recognized patterns are found, try to intelligently split the transcript
if not has_recognized_patterns and len(lines) > 0:
logger.info(
f"No standard message patterns found in transcript for session {session.session_id}. Attempting intelligent split."
)
# Try timestamp-based parsing if we have enough consistent timestamps
if timestamp_pattern_count > 3 and timestamp_pattern_count > 0.2 * len(lines):
logger.info(f"Attempting timestamp-based parsing with {timestamp_pattern_count} detected timestamps")
# Add the end of file as a boundary
potential_timestamps.append(len(lines))
# Process messages between timestamps
for i in range(len(potential_timestamps) - 1):
start_idx = potential_timestamps[i]
end_idx = potential_timestamps[i + 1]
message_content = "\n".join(lines[start_idx:end_idx])
first_line = lines[start_idx].lower()
# Simple heuristic to identify sender
is_user = any(
user_word in first_line
for user_word in ["user", "customer", "client", "human", "question", "query"]
)
is_assistant = any(
assistant_word in first_line
for assistant_word in ["assistant", "agent", "bot", "ai", "system", "support", "answer", "response"]
)
sender = "User" if (is_user or (not is_assistant and i % 2 == 0)) else "Assistant"
if save_message(session, sender, message_content):
messages_created += 1
logger.info(f"Created {messages_created} messages using timestamp-based parsing")
return messages_created
# Simple heuristic: alternate between user and assistant, with first message from user
# Start with paragraphs (blank line separations) as message boundaries
paragraphs = []
current_paragraph = []
for line in lines:
if line.startswith("User:"):
if (
current_sender
and current_message_lines
and save_message(session, current_sender, "\n".join(current_message_lines))
):
if line.strip():
current_paragraph.append(line)
elif current_paragraph: # Empty line and we have a paragraph
paragraphs.append("\n".join(current_paragraph))
current_paragraph = []
# Add the last paragraph if it's not empty
if current_paragraph:
paragraphs.append("\n".join(current_paragraph))
# If we have just one paragraph, try to split by sentence boundaries for very long transcripts
if len(paragraphs) == 1 and len(paragraphs[0].split()) > 100:
import re
# Try to split by sentence boundaries
text = paragraphs[0]
# Define sentence ending patterns
sentence_endings = r"(?<=[.!?])\s+"
sentences = re.split(sentence_endings, text)
# Group sentences into logical chunks (assuming alternating speakers)
chunks = []
current_chunk = []
for i, sentence in enumerate(sentences):
current_chunk.append(sentence)
# Every 2-3 sentences or on a natural break like a question mark
if (i % 2 == 1 and sentence.endswith("?")) or len(current_chunk) >= 3:
chunks.append(" ".join(current_chunk))
current_chunk = []
# Add any remaining sentences
if current_chunk:
chunks.append(" ".join(current_chunk))
# Save the chunks alternating between user and assistant
for i, chunk in enumerate(chunks):
if chunk.strip():
sender = "User" if i % 2 == 0 else "Assistant"
if save_message(session, sender, chunk):
messages_created += 1
logger.info(f"Created {messages_created} messages by splitting single paragraph into sentences")
return messages_created
# Save messages alternating between user and assistant
for i, paragraph in enumerate(paragraphs):
if paragraph.strip(): # Only save non-empty paragraphs
sender = "User" if i % 2 == 0 else "Assistant"
if save_message(session, sender, paragraph):
messages_created += 1
logger.info(f"Created {messages_created} messages using intelligent split for session {session.session_id}")
return messages_created
# Standard processing with recognized patterns
for line in lines:
line_stripped = line.strip()
# Skip empty lines at the beginning
if not line_stripped and not current_sender:
continue
# Check if this line indicates a new sender
is_user_message = any(line_stripped.startswith(pattern) for pattern in user_patterns)
is_assistant_message = any(line_stripped.startswith(pattern) for pattern in assistant_patterns)
if is_user_message:
# Save previous message if any
save_current_message()
# Start new user message
current_sender = "User"
current_message_lines = [line.replace("User:", "").strip()]
elif line.startswith("Assistant:"):
if (
current_sender
and current_message_lines
and save_message(session, current_sender, "\n".join(current_message_lines))
):
messages_created += 1
# Remove the prefix from the line
for pattern in user_patterns:
if line_stripped.startswith(pattern):
line = line[len(pattern) :].strip()
break
current_message_lines = [line] if line.strip() else []
elif is_assistant_message:
# Save previous message if any
save_current_message()
# Start new assistant message
current_sender = "Assistant"
current_message_lines = [line.replace("Assistant:", "").strip()]
# Remove the prefix from the line
for pattern in assistant_patterns:
if line_stripped.startswith(pattern):
line = line[len(pattern) :].strip()
break
current_message_lines = [line] if line.strip() else []
elif current_sender:
current_message_lines.append(line.strip())
# Continue adding to current message
current_message_lines.append(line)
else:
# If we get here with no current_sender, assume it's the start of a user message
logger.warning(f"Found line without sender prefix: '{line}'. Assuming User message.")
current_sender = "User"
current_message_lines = [line]
# Save the last message
if (
current_sender
and current_message_lines
and save_message(session, current_sender, "\n".join(current_message_lines))
):
save_current_message()
# Handle case with no messages parsed (possibly incorrectly formatted transcript)
if messages_created == 0 and lines:
logger.warning(
f"No messages were parsed from transcript for session {session.session_id}. Using fallback parsing."
)
# Fallback: Just split the transcript in half, first part user, second part assistant
mid_point = len(lines) // 2
user_content = "\n".join(lines[:mid_point])
assistant_content = "\n".join(lines[mid_point:])
# Save the split messages if they have content
if user_content.strip() and save_message(session, "User", user_content):
messages_created += 1
if assistant_content.strip() and save_message(session, "Assistant", assistant_content):
messages_created += 1
logger.info(f"Created {messages_created} messages using fallback parsing")
logger.info(f"Created {messages_created} messages for session {session.session_id}")
return messages_created

View File

@ -54,6 +54,7 @@ After installation, check if Redis is properly configured:
```
3. Restart Redis after any changes:
```bash
sudo systemctl restart redis-server
```
@ -79,7 +80,7 @@ If Redis is not available, the application will automatically fall back to using
Set these environment variables in your `.env` file or deployment environment:
```env
```sh
# Redis Configuration
REDIS_HOST=localhost
REDIS_PORT=6379
@ -145,6 +146,7 @@ Development requires multiple terminal windows:
```
4. **Celery Beat** (for scheduled tasks):
```bash
make celery-beat
```

View File

@ -46,6 +46,7 @@ If this fails, check the following:
```
4. Firewall rules might be blocking Redis. If you're connecting remotely:
```bash
sudo ufw status # Check if firewall is enabled
sudo ufw allow 6379/tcp # Allow Redis port if needed
@ -79,10 +80,13 @@ If the task isn't completing, check:
1. Look for errors in the Celery worker terminal
2. Verify broker URL settings match in both terminals:
```bash
echo $CELERY_BROKER_URL
```
3. Check if Redis is accessible from both terminals:
```bash
redis-cli ping
```

View File

@ -1,7 +1,7 @@
# nginx/conf.d/default.conf
upstream dashboard {
server web:8000;
server web:8001;
}
server {

View File

@ -1,6 +1,35 @@
{
"devDependencies": {
"markdownlint-cli2": "^0.18.1",
"prettier": "^3.5.3",
"prettier-plugin-jinja-template": "^2.1.0"
},
"scripts": {
"format": "prettier --write .",
"format:check": "prettier --check .",
"lint:md": "markdownlint-cli2 \"**/*.md\" \"!.trunk/**\" \"!.venv/**\" \"!node_modules/**\"",
"lint:md:fix": "markdownlint-cli2 --fix \"**/*.md\" \"!.trunk/**\" \"!.venv/**\" \"!node_modules/**\""
},
"markdownlint-cli2": {
"config": {
"MD007": {
"indent": 4,
"start_indented": false,
"start_indent": 4
},
"MD013": false,
"MD030": {
"ul_single": 3,
"ol_single": 2,
"ul_multi": 3,
"ol_multi": 2
},
"MD033": false
},
"ignores": [
"node_modules",
".git",
"*.json"
]
}
}

View File

@ -34,6 +34,7 @@ dependencies = [
"sqlalchemy>=2.0.41",
"tinycss2>=1.4.0",
"whitenoise>=6.9.0",
"xlsxwriter>=3.2.3",
]
[dependency-groups]

View File

@ -479,3 +479,7 @@ whitenoise==6.9.0 \
--hash=sha256:8c4a7c9d384694990c26f3047e118c691557481d624f069b7f7752a2f735d609 \
--hash=sha256:c8a489049b7ee9889617bb4c274a153f3d979e8f51d2efd0f5b403caf41c57df
# via livegraphsdjango
xlsxwriter==3.2.3 \
--hash=sha256:593f8296e8a91790c6d0378ab08b064f34a642b3feb787cf6738236bd0a4860d \
--hash=sha256:ad6fd41bdcf1b885876b1f6b7087560aecc9ae5a9cc2ba97dcac7ab2e210d3d5
# via livegraphsdjango

11
uv.lock generated
View File

@ -482,6 +482,7 @@ dependencies = [
{ name = "sqlalchemy" },
{ name = "tinycss2" },
{ name = "whitenoise" },
{ name = "xlsxwriter" },
]
[package.dev-dependencies]
@ -517,6 +518,7 @@ requires-dist = [
{ name = "sqlalchemy", specifier = ">=2.0.41" },
{ name = "tinycss2", specifier = ">=1.4.0" },
{ name = "whitenoise", specifier = ">=6.9.0" },
{ name = "xlsxwriter", specifier = ">=3.2.3" },
]
[package.metadata.requires-dev]
@ -1058,3 +1060,12 @@ sdist = { url = "https://files.pythonhosted.org/packages/b9/cf/c15c2f21aee6b22a9
wheels = [
{ url = "https://files.pythonhosted.org/packages/64/b2/2ce9263149fbde9701d352bda24ea1362c154e196d2fda2201f18fc585d7/whitenoise-6.9.0-py3-none-any.whl", hash = "sha256:c8a489049b7ee9889617bb4c274a153f3d979e8f51d2efd0f5b403caf41c57df", size = 20161, upload-time = "2025-02-06T22:16:32.589Z" },
]
[[package]]
name = "xlsxwriter"
version = "3.2.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a7/d1/e026d33dd5d552e5bf3a873dee54dad66b550230df8290d79394f09b2315/xlsxwriter-3.2.3.tar.gz", hash = "sha256:ad6fd41bdcf1b885876b1f6b7087560aecc9ae5a9cc2ba97dcac7ab2e210d3d5", size = 209135, upload-time = "2025-04-17T10:11:23.481Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/37/b1/a252d499f2760b314fcf264d2b36fcc4343a1ecdb25492b210cb0db70a68/XlsxWriter-3.2.3-py3-none-any.whl", hash = "sha256:593f8296e8a91790c6d0378ab08b064f34a642b3feb787cf6738236bd0a4860d", size = 169433, upload-time = "2025-04-17T10:11:21.329Z" },
]