From 351f3cfe21a9f771f6e8089e7c0877dba3fed211 Mon Sep 17 00:00:00 2001 From: Kaj Kowalski Date: Sun, 13 Jul 2025 16:45:07 +0200 Subject: [PATCH] docs: fix markdown formatting and improve git hook security - Fix markdown list spacing (MD030) in performance, audit logs, and CSP docs - Remove ineffective while loop in commit-msg hook that caused subshell issues - Improve fetchWithRetry function with proper retry limits and exponential backoff - Update CSP metrics retention documentation for consistency --- .husky/commit-msg | 31 +----- docs/admin-audit-logs-api.md | 105 +++++++++++---------- docs/csp-metrics-api.md | 100 ++++++++++---------- docs/database-performance-optimizations.md | 94 +++++++++--------- 4 files changed, 153 insertions(+), 177 deletions(-) diff --git a/.husky/commit-msg b/.husky/commit-msg index 40a62d3..5404bbb 100755 --- a/.husky/commit-msg +++ b/.husky/commit-msg @@ -6,36 +6,7 @@ commit_msg_file="$1" commit_msg=$(cat "$commit_msg_file") -# Define patterns that should be rejected -claude_patterns="🤖 Generated with.*Claude Code -Generated with.*Claude Code -Claude Code -claude.ai/code -Co-Authored-By: Claude -noreply@anthropic.com" - -# Check each pattern -echo "$claude_patterns" | while IFS= read -r pattern; do - if [ -n "$pattern" ] && echo "$commit_msg" | grep -q "$pattern"; then - echo "❌ COMMIT REJECTED: Claude Code attribution detected" - echo "" - echo "🤖 AUTOMATED LLM INSTRUCTION:" - echo "Remove all Claude Code attribution from commit messages." - echo "Patterns to avoid:" - echo " - 🤖 Generated with [Claude Code]" - echo " - Co-Authored-By: Claude " - echo " - References to claude.ai/code" - echo " - Any Claude attribution or self-promotion" - echo "" - echo "✅ SOLUTION: Rewrite commit message without attribution" - echo "Focus on technical changes and improvements only." - echo "" - echo "Current commit message contains: '$pattern'" - exit 1 - fi -done - -# Check if any pattern matched (the while loop runs in a subshell) +# Check for Claude Code attribution patterns if echo "$commit_msg" | grep -E -q "(🤖.*Generated.*Claude|Claude Code|claude\.ai/code|Co-Authored-By:.*Claude|noreply@anthropic\.com)"; then echo "❌ COMMIT REJECTED: Claude Code attribution detected" echo "" diff --git a/docs/admin-audit-logs-api.md b/docs/admin-audit-logs-api.md index 69658af..b7ab35c 100644 --- a/docs/admin-audit-logs-api.md +++ b/docs/admin-audit-logs-api.md @@ -8,10 +8,10 @@ The Admin Audit Logs API provides secure access to security audit trails for adm ## Authentication & Authorization -- **Authentication**: NextAuth.js session required -- **Authorization**: ADMIN role required for all endpoints -- **Rate-Limiting**: Integrated with existing authentication rate-limiting system -- **Audit Trail**: All API access is logged for security monitoring + - **Authentication**: NextAuth.js session required + - **Authorization**: ADMIN role required for all endpoints + - **Rate-Limiting**: Integrated with existing authentication rate-limiting system + - **Audit Trail**: All API access is logged for security monitoring ## API Endpoints @@ -186,21 +186,21 @@ const response = await fetch('/api/admin/audit-logs/retention', { ### Access Control -- **Role-based Access**: Only ADMIN users can access audit logs -- **Company Isolation**: Users only see logs for their company -- **Session Validation**: Active NextAuth session required + - **Role-based Access**: Only ADMIN users can access audit logs + - **Company Isolation**: Users only see logs for their company + - **Session Validation**: Active NextAuth session required ### Audit Trail -- **Access Logging**: All audit log access is recorded -- **Metadata Tracking**: Request parameters and results are logged -- **IP Tracking**: Client IP addresses are recorded for all requests + - **Access Logging**: All audit log access is recorded + - **Metadata Tracking**: Request parameters and results are logged + - **IP Tracking**: Client IP addresses are recorded for all requests ### Rate Limiting -- **Integrated Protection**: Uses existing authentication rate-limiting -- **Abuse Prevention**: Protects against excessive API usage -- **Error Tracking**: Failed attempts are monitored + - **Integrated Protection**: Uses existing authentication rate-limiting + - **Abuse Prevention**: Protects against excessive API usage + - **Error Tracking**: Failed attempts are monitored ## Event Types @@ -303,21 +303,21 @@ async function getUserActivity(userId, days = 7) { ### Database Optimization -- **Indexed Queries**: All filter columns are properly indexed -- **Pagination**: Efficient offset-based pagination with limits -- **Time Range Filtering**: Optimized for date range queries + - **Indexed Queries**: All filter columns are properly indexed + - **Pagination**: Efficient offset-based pagination with limits + - **Time Range Filtering**: Optimized for date range queries ### Memory Usage -- **Limited Results**: Maximum 100 records per request -- **Streaming**: Large exports use streaming for memory efficiency -- **Connection Pooling**: Database connections are pooled + - **Limited Results**: Maximum 100 records per request + - **Streaming**: Large exports use streaming for memory efficiency + - **Connection Pooling**: Database connections are pooled ### Caching Considerations -- **No Caching**: Audit logs are never cached for security reasons -- **Fresh Data**: All queries hit the database for real-time results -- **Read Replicas**: Consider using read replicas for heavy reporting + - **No Caching**: Audit logs are never cached for security reasons + - **Fresh Data**: All queries hit the database for real-time results + - **Read Replicas**: Consider using read replicas for heavy reporting ## Error Handling @@ -349,13 +349,18 @@ try { ### Rate-Limiting Handling ```javascript -async function fetchWithRetry(url, options = {}) { +async function fetchWithRetry(url, options = {}, maxRetries = 3, retryCount = 0) { const response = await fetch(url, options); + if (response.status === 429 && retryCount < maxRetries) { + // Rate limited, wait with exponential backoff and retry + const delay = Math.pow(2, retryCount) * 1000; // 1s, 2s, 4s + await new Promise(resolve => setTimeout(resolve, delay)); + return fetchWithRetry(url, options, maxRetries, retryCount + 1); + } + if (response.status === 429) { - // Rate limited, wait and retry - await new Promise(resolve => setTimeout(resolve, 5000)); - return fetchWithRetry(url, options); + throw new Error(`Rate limited after ${maxRetries} retries`); } return response; @@ -366,44 +371,44 @@ async function fetchWithRetry(url, options = {}) { ### Key Metrics to Monitor -- **Request Volume**: Track API usage patterns -- **Error Rates**: Monitor authentication and authorization failures -- **Query Performance**: Track slow queries and optimize -- **Data Growth**: Monitor audit log size and plan retention + - **Request Volume**: Track API usage patterns + - **Error Rates**: Monitor authentication and authorization failures + - **Query Performance**: Track slow queries and optimize + - **Data Growth**: Monitor audit log size and plan retention ### Alert Conditions -- **High Error Rates**: >5% of requests failing -- **Unusual Access Patterns**: Off-hours access, high-volume usage -- **Performance Degradation**: Query times >2 seconds -- **Security Events**: Multiple failed admin access attempts + - **High Error Rates**: >5% of requests failing + - **Unusual Access Patterns**: Off-hours access, high-volume usage + - **Performance Degradation**: Query times >2 seconds + - **Security Events**: Multiple failed admin access attempts ## Best Practices ### Security -- Always validate user permissions before displaying UI -- Log all administrative access to audit logs -- Use HTTPS in production environments -- Implement proper error handling to avoid information leakage + - Always validate user permissions before displaying UI + - Log all administrative access to audit logs + - Use HTTPS in production environments + - Implement proper error handling to avoid information leakage ### Performance -- Use appropriate page sizes (25-50 records typical) -- Implement client-side pagination for better UX -- Cache results only in memory, never persist -- Use date range filters to limit query scope + - Use appropriate page sizes (25-50 records typical) + - Implement client-side pagination for better UX + - Cache results only in memory, never persist + - Use date range filters to limit query scope ### User Experience -- Provide clear filtering options in the UI -- Show loading states for long-running queries -- Implement export functionality for reports -- Provide search and sort capabilities + - Provide clear filtering options in the UI + - Show loading states for long-running queries + - Implement export functionality for reports + - Provide search and sort capabilities ## Related Documentation -- [Security Audit Logging](./security-audit-logging.md) -- [Security Monitoring](./security-monitoring.md) -- [CSRF Protection](./CSRF_PROTECTION.md) -- [Authentication System](../lib/auth.ts) + - [Security Audit Logging](./security-audit-logging.md) + - [Security Monitoring](./security-monitoring.md) + - [CSRF Protection](./CSRF_PROTECTION.md) + - [Authentication System](../lib/auth.ts) diff --git a/docs/csp-metrics-api.md b/docs/csp-metrics-api.md index b0050db..f87467f 100644 --- a/docs/csp-metrics-api.md +++ b/docs/csp-metrics-api.md @@ -6,11 +6,11 @@ This document describes the Content Security Policy (CSP) metrics and violation The CSP Metrics API provides comprehensive monitoring of Content Security Policy violations, including: -- Real-time violation tracking and metrics -- Bypass attempt detection and risk assessment -- Policy optimization recommendations -- Historical trend analysis -- Export capabilities for security analysis + - Real-time violation tracking and metrics + - Bypass attempt detection and risk assessment + - Policy optimization recommendations + - Historical trend analysis + - Export capabilities for security analysis ## API Endpoints @@ -24,7 +24,7 @@ POST /api/csp-report #### Request Headers -- `Content-Type`: `application/csp-report` or `application/json` + - `Content-Type`: `application/csp-report` or `application/json` #### Request Body (Automatic from Browser) @@ -43,10 +43,10 @@ POST /api/csp-report #### Features -- **Rate Limiting**: 10 reports per minute per IP -- **Risk Assessment**: Automatic classification of violation severity -- **Bypass Detection**: Identifies potential CSP bypass attempts -- **Real-time Processing**: Immediate analysis and alerting + - **Rate Limiting**: 10 reports per minute per IP + - **Risk Assessment**: Automatic classification of violation severity + - **Bypass Detection**: Identifies potential CSP bypass attempts + - **Real-time Processing**: Immediate analysis and alerting ### CSP Metrics API @@ -67,11 +67,11 @@ GET /api/csp-metrics #### Time Range Options -- `1h` - Last 1 hour -- `6h` - Last 6 hours -- `24h` - Last 24 hours (default) -- `7d` - Last 7 days -- `30d` - Last 30 days + - `1h` - Last 1 hour + - `6h` - Last 6 hours + - `24h` - Last 24 hours (default) + - `7d` - Last 7 days + - `30d` - Last 30 days #### Example Request @@ -165,11 +165,11 @@ console.log(result.recommendations); // array of suggestions The service automatically assesses violation risk based on: -- **Directive Type**: Script violations are higher risk than style violations -- **Source Pattern**: External domains vs inline vs data URIs -- **Bypass Indicators**: Known CSP bypass techniques -- **Frequency**: Repeated violations from same source -- **Geographic Factors**: Unusual source locations + - **Directive Type**: Script violations are higher risk than style violations + - **Source Pattern**: External domains vs inline vs data URIs + - **Bypass Indicators**: Known CSP bypass techniques + - **Frequency**: Repeated violations from same source + - **Geographic Factors**: Unusual source locations #### 3. Bypass Detection @@ -191,10 +191,10 @@ const bypassPatterns = [ Based on violation patterns, the service provides actionable recommendations: -- **Tighten Policies**: Suggest removing broad allowlists -- **Add Domains**: Recommend allowing legitimate external resources -- **Implement Nonces**: Suggest nonce-based policies for inline content -- **Upgrade Directives**: Recommend modern CSP features + - **Tighten Policies**: Suggest removing broad allowlists + - **Add Domains**: Recommend allowing legitimate external resources + - **Implement Nonces**: Suggest nonce-based policies for inline content + - **Upgrade Directives**: Recommend modern CSP features ## Violation Analysis @@ -405,21 +405,21 @@ CSP_ALERT_THRESHOLD=5 # violations per 10 minutes ### Rate Limiting -- **10 reports per minute per IP** prevents spam attacks -- **Exponential backoff** for repeated violations from same source -- **Memory cleanup** removes old violations automatically + - **10 reports per minute per IP** prevents spam attacks + - **Exponential backoff** for repeated violations from same source + - **Memory cleanup** removes old violations automatically ### Memory Management -- **Violation buffer** limited to 1 hour of data in memory -- **Automatic cleanup** runs every 100 requests (1% probability) -- **Efficient storage** using Map data structures + - **Violation buffer** limited to 7 days of data in memory + - **Automatic cleanup** runs every 100 requests (1% probability) + - **Efficient storage** using Map data structures ### Database Impact -- **No persistent storage** for real-time metrics (memory only) -- **Optional logging** to database for long-term analysis -- **Indexed queries** for historical data retrieval + - **No persistent storage** for real-time metrics (memory only) + - **Optional logging** to database for long-term analysis + - **Indexed queries** for historical data retrieval ## Security Considerations @@ -427,28 +427,28 @@ CSP_ALERT_THRESHOLD=5 # violations per 10 minutes **⚠️ Data Collection Notice:** -- **IP addresses** are collected and stored in memory for security monitoring -- **User agent strings** are stored for browser compatibility analysis -- **Legal basis**: Legitimate interest for security incident detection and prevention -- **Retention**: In-memory storage only, automatically purged after 7 days or application restart -- **Data minimization**: Only violation-related metadata is retained, not page content + - **IP addresses** are collected and stored in memory for security monitoring + - **User agent strings** are stored for browser compatibility analysis + - **Legal basis**: Legitimate interest for security incident detection and prevention + - **Retention**: In-memory storage only, automatically purged after 7 days or application restart + - **Data minimization**: Only violation-related metadata is retained, not page content **Planned Privacy Enhancements:** -- IP anonymization options for GDPR compliance (roadmap) -- User agent sanitization to remove sensitive information (roadmap) + - IP anonymization options for GDPR compliance (roadmap) + - User agent sanitization to remove sensitive information (roadmap) ### Rate-Limiting Protection -- **Per-IP limits** prevent DoS attacks on reporting endpoint -- **Content-type validation** ensures proper report format -- **Request size limits** prevent memory exhaustion + - **Per-IP limits** prevent DoS attacks on reporting endpoint + - **Content-type validation** ensures proper report format + - **Request size limits** prevent memory exhaustion ### False Positive Handling -- **Learning mode** for new deployments -- **Whitelist support** for known legitimate violations -- **Risk score adjustment** based on historical patterns + - **Learning mode** for new deployments + - **Whitelist support** for known legitimate violations + - **Risk score adjustment** based on historical patterns ## Troubleshooting @@ -497,10 +497,10 @@ if (duration > 2000) { ## Related Documentation -- [Enhanced CSP Implementation](./security/enhanced-csp.md) -- [Security Monitoring](./security-monitoring.md) -- [Security Headers](./security-headers.md) -- [Rate Limiting](../lib/rateLimiter.ts) + - [Enhanced CSP Implementation](./security/enhanced-csp.md) + - [Security Monitoring](./security-monitoring.md) + - [Security Headers](./security-headers.md) + - [Rate Limiting](../lib/rateLimiter.ts) ## API Reference Summary diff --git a/docs/database-performance-optimizations.md b/docs/database-performance-optimizations.md index 84b1eb6..d0d0178 100644 --- a/docs/database-performance-optimizations.md +++ b/docs/database-performance-optimizations.md @@ -6,10 +6,10 @@ This document outlines the comprehensive database performance optimizations impl The optimization focuses on the most frequently queried patterns in the application, particularly around: -- AI processing request tracking and batching -- Session analytics and filtering -- Security audit log analysis -- Multi-tenant data isolation performance + - AI processing request tracking and batching + - Session analytics and filtering + - Security audit log analysis + - Multi-tenant data isolation performance ## Applied Optimizations @@ -31,9 +31,9 @@ INCLUDE ("processingStatus", "batchId", "requestedAt", "sessionId"); **Impact**: -- ~70% faster batch job queries -- Reduced I/O for cost analysis reports -- Improved scheduler performance + - ~70% faster batch job queries + - Reduced I/O for cost analysis reports + - Improved scheduler performance ### 2. Session Analytics Optimizations @@ -54,9 +54,9 @@ INCLUDE ("startTime", "messagesSent"); **Impact**: -- ~85% faster dashboard load times -- Efficient date range filtering -- Optimized sentiment analysis queries + - ~85% faster dashboard load times + - Efficient date range filtering + - Optimized sentiment analysis queries ### 3. Security Audit Log Optimizations @@ -77,9 +77,9 @@ INCLUDE ("eventType", "severity", "userId", "companyId"); **Impact**: -- ~90% faster security monitoring -- Efficient threat detection -- Improved compliance reporting + - ~90% faster security monitoring + - Efficient threat detection + - Improved compliance reporting ### 4. Message Processing Optimizations @@ -95,8 +95,8 @@ INCLUDE ("content"); **Impact**: -- ~60% faster conversation loading -- Reduced memory usage for message queries + - ~60% faster conversation loading + - Reduced memory usage for message queries ### 5. Processing Pipeline Optimizations @@ -118,29 +118,29 @@ INCLUDE ("sessionId", "errorMessage", "retryCount", "startedAt"); **Impact**: -- ~75% faster processing monitoring -- Efficient error tracking -- Improved retry logic performance + - ~75% faster processing monitoring + - Efficient error tracking + - Improved retry logic performance ## Index Strategy Principles ### 1. Composite Index Design -- **Leading column**: Most selective filter (usually companyId for multi-tenancy) -- **Secondary columns**: Common WHERE clause filters -- **Covering columns**: SELECT list columns via INCLUDE + - **Leading column**: Most selective filter (usually companyId for multi-tenancy) + - **Secondary columns**: Common WHERE clause filters + - **Covering columns**: SELECT list columns via INCLUDE ### 2. Partial Indexes -- Used for error analysis and specific status filtering -- Reduces index size and maintenance overhead -- Improves write performance + - Used for error analysis and specific status filtering + - Reduces index size and maintenance overhead + - Improves write performance ### 3. Covering Indexes -- Include frequently accessed columns to avoid table lookups -- Reduces I/O for read-heavy operations -- Particularly effective for dashboard queries + - Include frequently accessed columns to avoid table lookups + - Reduces I/O for read-heavy operations + - Particularly effective for dashboard queries ## Query Pattern Analysis @@ -166,29 +166,29 @@ INCLUDE ("sessionId", "errorMessage", "retryCount", "startedAt"); ### Index Monitoring -- Monitor index usage with `pg_stat_user_indexes` -- Track bloat with `pg_stat_user_tables` -- Regular ANALYZE after bulk operations + - Monitor index usage with `pg_stat_user_indexes` + - Track bloat with `pg_stat_user_tables` + - Regular ANALYZE after bulk operations ### Write Performance Impact -- Composite indexes add ~15% write overhead -- Offset by dramatic read performance gains -- Monitored via slow query logs + - Composite indexes add ~15% write overhead + - Offset by dramatic read performance gains + - Monitored via slow query logs ### Storage Impact -- Indexes add ~25% to total storage -- Covering indexes reduce need for table scans -- Partial indexes minimize storage overhead + - Indexes add ~25% to total storage + - Covering indexes reduce need for table scans + - Partial indexes minimize storage overhead ## Migration Safety ### CONCURRENTLY Operations -- All indexes created with `CREATE INDEX CONCURRENTLY` -- No table locks during creation -- Production-safe deployment + - All indexes created with `CREATE INDEX CONCURRENTLY` + - No table locks during creation + - Production-safe deployment ### Rollback Strategy @@ -238,18 +238,18 @@ LIMIT 10; ### Monitoring Strategy -- Set up automated index usage monitoring -- Track slow query evolution -- Monitor storage growth patterns -- Implement performance alerting + - Set up automated index usage monitoring + - Track slow query evolution + - Monitor storage growth patterns + - Implement performance alerting ## Conclusion These database optimizations provide: -- **70-90% improvement** in query performance -- **Reduced server load** through efficient indexing -- **Better user experience** with faster dashboards -- **Scalable foundation** for future growth + - **70-90% improvement** in query performance + - **Reduced server load** through efficient indexing + - **Better user experience** with faster dashboards + - **Scalable foundation** for future growth The optimizations are designed to be production-safe and monitoring-friendly, ensuring both immediate performance gains and long-term maintainability.