Initial commit: Telegram Management System
Some checks failed
Deploy / deploy (push) Has been cancelled
Some checks failed
Deploy / deploy (push) Has been cancelled
Full-stack web application for Telegram management - Frontend: Vue 3 + Vben Admin - Backend: NestJS - Features: User management, group broadcast, statistics 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
39
marketing-agent/services/logging/.env.example
Normal file
39
marketing-agent/services/logging/.env.example
Normal file
@@ -0,0 +1,39 @@
|
||||
# Logging Service Configuration Example
|
||||
|
||||
# Service Port
|
||||
PORT=3010
|
||||
|
||||
# Elasticsearch Configuration
|
||||
ELASTICSEARCH_NODE=http://localhost:9200
|
||||
ELASTICSEARCH_USERNAME=elastic
|
||||
ELASTICSEARCH_PASSWORD=changeme
|
||||
|
||||
# Redis Configuration
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6379
|
||||
REDIS_PASSWORD=
|
||||
REDIS_DB=2
|
||||
|
||||
# Alert Configuration
|
||||
# Email Alerts
|
||||
ALERT_EMAIL_ENABLED=false
|
||||
SMTP_HOST=smtp.gmail.com
|
||||
SMTP_PORT=587
|
||||
SMTP_USER=your-email@gmail.com
|
||||
SMTP_PASS=your-app-password
|
||||
ALERT_EMAIL_FROM=alerts@telegram-marketing.com
|
||||
ALERT_EMAIL_TO=admin@example.com,devops@example.com
|
||||
|
||||
# Slack Alerts
|
||||
ALERT_SLACK_ENABLED=false
|
||||
SLACK_WEBHOOK_URL=https://hooks.slack.com/services/YOUR/WEBHOOK/URL
|
||||
|
||||
# Webhook Alerts
|
||||
ALERT_WEBHOOK_ENABLED=false
|
||||
ALERT_WEBHOOK_URL=https://your-webhook-endpoint.com/alerts
|
||||
|
||||
# Log Level (debug, info, warn, error)
|
||||
LOG_LEVEL=info
|
||||
|
||||
# Node Environment (development, production)
|
||||
NODE_ENV=development
|
||||
53
marketing-agent/services/logging/Dockerfile
Normal file
53
marketing-agent/services/logging/Dockerfile
Normal file
@@ -0,0 +1,53 @@
|
||||
# Build stage
|
||||
FROM node:18-alpine AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy package files
|
||||
COPY package*.json ./
|
||||
|
||||
# Install all dependencies (including dev) for building
|
||||
RUN npm ci
|
||||
|
||||
# Copy source code
|
||||
COPY . .
|
||||
|
||||
# Production stage
|
||||
FROM node:18-alpine
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install dumb-init for proper signal handling
|
||||
RUN apk add --no-cache dumb-init
|
||||
|
||||
# Create non-root user
|
||||
RUN addgroup -g 1001 -S nodejs && \
|
||||
adduser -S nodejs -u 1001
|
||||
|
||||
# Copy package files and install production dependencies only
|
||||
COPY package*.json ./
|
||||
RUN npm ci --only=production && \
|
||||
npm cache clean --force
|
||||
|
||||
# Copy application code
|
||||
COPY --chown=nodejs:nodejs . .
|
||||
|
||||
# Create necessary directories with proper permissions
|
||||
RUN mkdir -p logs data && \
|
||||
chown -R nodejs:nodejs logs data
|
||||
|
||||
# Switch to non-root user
|
||||
USER nodejs
|
||||
|
||||
# Expose port
|
||||
EXPOSE 3010
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=40s --retries=3 \
|
||||
CMD node healthcheck.js || exit 1
|
||||
|
||||
# Use dumb-init to handle signals properly
|
||||
ENTRYPOINT ["dumb-init", "--"]
|
||||
|
||||
# Start application
|
||||
CMD ["node", "src/index.js"]
|
||||
334
marketing-agent/services/logging/README.md
Normal file
334
marketing-agent/services/logging/README.md
Normal file
@@ -0,0 +1,334 @@
|
||||
# Logging Service
|
||||
|
||||
Centralized logging, monitoring, and alerting service for the Telegram Marketing Agent System.
|
||||
|
||||
## Features
|
||||
|
||||
- **Log Collection**: Centralized log collection from all microservices
|
||||
- **Real-time Analysis**: Pattern detection and anomaly analysis
|
||||
- **Alert Management**: Multi-channel alerting (Email, Slack, Webhook)
|
||||
- **Dashboard**: Real-time monitoring dashboard
|
||||
- **Log Storage**: Elasticsearch-based storage with retention policies
|
||||
- **Performance Metrics**: System and application performance tracking
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
|
||||
│ Microservices │────▶│ Log Collector │────▶│ Elasticsearch │
|
||||
└─────────────────┘ └─────────────────┘ └─────────────────┘
|
||||
│ │
|
||||
▼ ▼
|
||||
┌─────────────────┐ ┌─────────────────┐
|
||||
│ Log Analyzer │────▶│ Alert Manager │
|
||||
└─────────────────┘ └─────────────────┘
|
||||
│
|
||||
▼
|
||||
┌──────────────┐
|
||||
│ Channels │
|
||||
├──────────────┤
|
||||
│ Email │
|
||||
│ Slack │
|
||||
│ Webhook │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Node.js 18+
|
||||
- Elasticsearch 8.x
|
||||
- Redis 6.x
|
||||
|
||||
## Installation
|
||||
|
||||
1. Install dependencies:
|
||||
```bash
|
||||
cd services/logging
|
||||
npm install
|
||||
```
|
||||
|
||||
2. Configure environment variables:
|
||||
```bash
|
||||
cp .env.example .env
|
||||
# Edit .env with your configuration
|
||||
```
|
||||
|
||||
3. Start Elasticsearch:
|
||||
```bash
|
||||
docker run -d --name elasticsearch \
|
||||
-e "discovery.type=single-node" \
|
||||
-e "xpack.security.enabled=false" \
|
||||
-p 9200:9200 \
|
||||
elasticsearch:8.12.0
|
||||
```
|
||||
|
||||
4. Start Redis:
|
||||
```bash
|
||||
docker run -d --name redis \
|
||||
-p 6379:6379 \
|
||||
redis:latest
|
||||
```
|
||||
|
||||
5. Start the service:
|
||||
```bash
|
||||
npm start
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Description | Default |
|
||||
|----------|-------------|---------|
|
||||
| PORT | Service port | 3010 |
|
||||
| ELASTICSEARCH_NODE | Elasticsearch URL | http://localhost:9200 |
|
||||
| ELASTICSEARCH_USERNAME | Elasticsearch username | elastic |
|
||||
| ELASTICSEARCH_PASSWORD | Elasticsearch password | changeme |
|
||||
| REDIS_HOST | Redis host | localhost |
|
||||
| REDIS_PORT | Redis port | 6379 |
|
||||
| ALERT_EMAIL_ENABLED | Enable email alerts | false |
|
||||
| ALERT_SLACK_ENABLED | Enable Slack alerts | false |
|
||||
| ALERT_WEBHOOK_ENABLED | Enable webhook alerts | false |
|
||||
|
||||
### Alert Thresholds
|
||||
|
||||
Configure alert thresholds in `config/index.js`:
|
||||
|
||||
```javascript
|
||||
alerts: {
|
||||
rules: {
|
||||
errorRate: {
|
||||
threshold: 0.05, // 5% error rate
|
||||
window: 300000 // 5 minutes
|
||||
},
|
||||
responseTime: {
|
||||
threshold: 1000, // 1 second
|
||||
window: 300000 // 5 minutes
|
||||
},
|
||||
systemResources: {
|
||||
cpu: 80, // 80% CPU usage
|
||||
memory: 85, // 85% memory usage
|
||||
disk: 90 // 90% disk usage
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Log Management
|
||||
|
||||
- `GET /api/logs/search` - Search logs
|
||||
- `GET /api/logs/stats` - Get log statistics
|
||||
- `GET /api/logs/metrics` - Get aggregated metrics
|
||||
- `GET /api/logs/stream` - Stream logs in real-time
|
||||
- `DELETE /api/logs/cleanup` - Delete old logs
|
||||
|
||||
### Alert Management
|
||||
|
||||
- `GET /api/alerts/history` - Get alert history
|
||||
- `GET /api/alerts/active` - Get active alerts
|
||||
- `POST /api/alerts/:id/acknowledge` - Acknowledge an alert
|
||||
- `DELETE /api/alerts/:id` - Clear an alert
|
||||
- `POST /api/alerts/test` - Send test alert
|
||||
- `GET /api/alerts/config` - Get alert configuration
|
||||
|
||||
### Dashboard
|
||||
|
||||
- `GET /api/dashboard/overview` - Get dashboard overview
|
||||
- `GET /api/dashboard/health` - Get service health
|
||||
- `GET /api/dashboard/trends` - Get performance trends
|
||||
- `GET /api/dashboard/top-errors` - Get top errors
|
||||
|
||||
## Log Format
|
||||
|
||||
### Application Logs
|
||||
```json
|
||||
{
|
||||
"@timestamp": "2024-01-15T10:30:00.000Z",
|
||||
"service": "api-gateway",
|
||||
"level": "error",
|
||||
"message": "Request failed",
|
||||
"userId": "user123",
|
||||
"requestId": "req-456",
|
||||
"method": "POST",
|
||||
"path": "/api/campaigns",
|
||||
"statusCode": 500,
|
||||
"responseTime": 234.5,
|
||||
"error": {
|
||||
"type": "ValidationError",
|
||||
"message": "Invalid campaign data",
|
||||
"stack": "..."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Metrics
|
||||
```json
|
||||
{
|
||||
"@timestamp": "2024-01-15T10:30:00.000Z",
|
||||
"service": "api-gateway",
|
||||
"metric": "response_time",
|
||||
"value": 234.5,
|
||||
"unit": "ms",
|
||||
"dimensions": {
|
||||
"endpoint": "/api/campaigns",
|
||||
"method": "POST",
|
||||
"status": "success"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Integration
|
||||
|
||||
### Sending Logs from Services
|
||||
|
||||
1. Install logging client:
|
||||
```bash
|
||||
npm install winston winston-elasticsearch
|
||||
```
|
||||
|
||||
2. Configure winston logger:
|
||||
```javascript
|
||||
import winston from 'winston';
|
||||
import { ElasticsearchTransport } from 'winston-elasticsearch';
|
||||
|
||||
const logger = winston.createLogger({
|
||||
transports: [
|
||||
new ElasticsearchTransport({
|
||||
level: 'info',
|
||||
clientOpts: {
|
||||
node: 'http://localhost:3010',
|
||||
auth: { username: 'elastic', password: 'changeme' }
|
||||
},
|
||||
index: 'marketing-agent-logs'
|
||||
})
|
||||
]
|
||||
});
|
||||
```
|
||||
|
||||
3. Send logs:
|
||||
```javascript
|
||||
logger.info('Campaign created', {
|
||||
service: 'campaign-service',
|
||||
userId: 'user123',
|
||||
campaignId: 'camp456',
|
||||
action: 'create'
|
||||
});
|
||||
```
|
||||
|
||||
### Real-time Log Streaming
|
||||
|
||||
```javascript
|
||||
const eventSource = new EventSource('/api/logs/stream?service=api-gateway&level=error');
|
||||
|
||||
eventSource.onmessage = (event) => {
|
||||
const log = JSON.parse(event.data);
|
||||
console.log('New log:', log);
|
||||
};
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Health Check
|
||||
```bash
|
||||
curl http://localhost:3010/health
|
||||
```
|
||||
|
||||
### Queue Statistics
|
||||
```bash
|
||||
curl http://localhost:3010/api/dashboard/overview
|
||||
```
|
||||
|
||||
### Service Health
|
||||
```bash
|
||||
curl http://localhost:3010/api/dashboard/health
|
||||
```
|
||||
|
||||
## Maintenance
|
||||
|
||||
### Log Retention
|
||||
|
||||
Logs are automatically deleted based on retention policies:
|
||||
- Application logs: 30 days
|
||||
- Metrics: 90 days
|
||||
- Error logs: 60 days
|
||||
|
||||
### Manual Cleanup
|
||||
```bash
|
||||
curl -X DELETE http://localhost:3010/api/logs/cleanup
|
||||
```
|
||||
|
||||
### Index Management
|
||||
|
||||
The service automatically creates and manages Elasticsearch indices with:
|
||||
- Lifecycle policies for automatic rollover
|
||||
- Retention policies for automatic deletion
|
||||
- Optimized mappings for different log types
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Elasticsearch Connection Failed**
|
||||
- Check Elasticsearch is running: `curl http://localhost:9200`
|
||||
- Verify credentials in .env file
|
||||
- Check network connectivity
|
||||
|
||||
2. **High Memory Usage**
|
||||
- Adjust batch size in config: `collection.batchSize`
|
||||
- Reduce flush interval: `collection.flushInterval`
|
||||
- Check Elasticsearch heap size
|
||||
|
||||
3. **Alerts Not Sending**
|
||||
- Verify alert channel configuration
|
||||
- Check SMTP settings for email
|
||||
- Test webhook URL accessibility
|
||||
|
||||
### Debug Mode
|
||||
|
||||
Enable debug logging:
|
||||
```bash
|
||||
LOG_LEVEL=debug npm start
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
### Running Tests
|
||||
```bash
|
||||
npm test
|
||||
```
|
||||
|
||||
### Development Mode
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
### Adding New Alert Channels
|
||||
|
||||
1. Create channel handler in `services/alertManager.js`:
|
||||
```javascript
|
||||
async sendCustomAlert(alert) {
|
||||
// Implement channel logic
|
||||
}
|
||||
```
|
||||
|
||||
2. Add to alert sending pipeline
|
||||
3. Update configuration schema
|
||||
|
||||
## Performance Tuning
|
||||
|
||||
### Elasticsearch Optimization
|
||||
- Increase shards for high-volume indices
|
||||
- Configure index lifecycle management
|
||||
- Use bulk operations for better throughput
|
||||
|
||||
### Redis Optimization
|
||||
- Configure maxmemory policy
|
||||
- Use Redis clustering for high availability
|
||||
- Monitor queue sizes
|
||||
|
||||
### Application Optimization
|
||||
- Adjust batch sizes based on load
|
||||
- Configure worker concurrency
|
||||
- Use connection pooling
|
||||
103
marketing-agent/services/logging/examples/integration.js
Normal file
103
marketing-agent/services/logging/examples/integration.js
Normal file
@@ -0,0 +1,103 @@
|
||||
// Example: How to integrate the logging client in your service
|
||||
|
||||
import express from 'express';
|
||||
import { createLoggingClient } from '../src/client/index.js';
|
||||
|
||||
// Initialize logging client
|
||||
const logger = createLoggingClient({
|
||||
serviceName: 'api-gateway',
|
||||
redisHost: 'localhost',
|
||||
redisPort: 6379,
|
||||
logLevel: 'info'
|
||||
});
|
||||
|
||||
// Create Express app
|
||||
const app = express();
|
||||
|
||||
// Use logging middleware
|
||||
app.use(logger.expressMiddleware());
|
||||
|
||||
// Example route with logging
|
||||
app.get('/api/users/:id', async (req, res) => {
|
||||
try {
|
||||
// Log the operation
|
||||
await logger.info('Fetching user', {
|
||||
userId: req.params.id,
|
||||
requestedBy: req.user?.id
|
||||
});
|
||||
|
||||
// Simulate fetching user
|
||||
const user = { id: req.params.id, name: 'John Doe' };
|
||||
|
||||
// Send metric
|
||||
await logger.metric('user_fetch_success', 1, {
|
||||
userId: req.params.id
|
||||
});
|
||||
|
||||
res.json(user);
|
||||
} catch (error) {
|
||||
// Log error
|
||||
await logger.error(error, {
|
||||
userId: req.params.id,
|
||||
operation: 'user_fetch'
|
||||
});
|
||||
|
||||
res.status(500).json({ error: 'Internal server error' });
|
||||
}
|
||||
});
|
||||
|
||||
// Example of logging different levels
|
||||
async function exampleLogging() {
|
||||
// Debug logging
|
||||
await logger.debug('Debug message', { detail: 'some debug info' });
|
||||
|
||||
// Info logging
|
||||
await logger.info('User logged in', { userId: 'user123' });
|
||||
|
||||
// Warning logging
|
||||
await logger.warn('API rate limit approaching', {
|
||||
userId: 'user123',
|
||||
current: 95,
|
||||
limit: 100
|
||||
});
|
||||
|
||||
// Error logging
|
||||
try {
|
||||
throw new Error('Database connection failed');
|
||||
} catch (error) {
|
||||
await logger.error(error, {
|
||||
database: 'mongodb',
|
||||
host: 'localhost'
|
||||
});
|
||||
}
|
||||
|
||||
// Metrics
|
||||
await logger.metric('api_requests', 1, {
|
||||
endpoint: '/api/users',
|
||||
method: 'GET'
|
||||
});
|
||||
|
||||
await logger.metric('response_time', 234.5, {
|
||||
endpoint: '/api/users',
|
||||
method: 'GET'
|
||||
});
|
||||
|
||||
await logger.metric('memory_usage', process.memoryUsage().heapUsed / 1024 / 1024, {
|
||||
unit: 'MB'
|
||||
});
|
||||
}
|
||||
|
||||
// Run examples
|
||||
exampleLogging();
|
||||
|
||||
// Start server
|
||||
app.listen(3000, () => {
|
||||
logger.info('API Gateway started', { port: 3000 });
|
||||
});
|
||||
|
||||
// Graceful shutdown
|
||||
process.on('SIGTERM', async () => {
|
||||
await logger.info('Shutting down service');
|
||||
await logger.close();
|
||||
process.exit(0);
|
||||
});
|
||||
28
marketing-agent/services/logging/healthcheck.js
Normal file
28
marketing-agent/services/logging/healthcheck.js
Normal file
@@ -0,0 +1,28 @@
|
||||
const http = require('http');
|
||||
|
||||
const options = {
|
||||
hostname: 'localhost',
|
||||
port: 3010,
|
||||
path: '/api/health',
|
||||
method: 'GET',
|
||||
timeout: 2000
|
||||
};
|
||||
|
||||
const req = http.request(options, (res) => {
|
||||
if (res.statusCode === 200) {
|
||||
process.exit(0);
|
||||
} else {
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
req.on('error', () => {
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
req.on('timeout', () => {
|
||||
req.abort();
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
req.end();
|
||||
35
marketing-agent/services/logging/package.json
Normal file
35
marketing-agent/services/logging/package.json
Normal file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"name": "logging-service",
|
||||
"version": "1.0.0",
|
||||
"description": "Centralized logging and analysis service",
|
||||
"main": "src/index.js",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"start": "node src/index.js",
|
||||
"dev": "nodemon src/index.js",
|
||||
"test": "jest"
|
||||
},
|
||||
"dependencies": {
|
||||
"@elastic/elasticsearch": "^8.12.0",
|
||||
"winston": "^3.11.0",
|
||||
"winston-elasticsearch": "^0.17.4",
|
||||
"winston-daily-rotate-file": "^4.7.1",
|
||||
"express": "^4.18.2",
|
||||
"pino": "^8.17.2",
|
||||
"pino-elasticsearch": "^8.0.0",
|
||||
"pino-pretty": "^10.3.1",
|
||||
"bull": "^4.11.3",
|
||||
"ioredis": "^5.3.2",
|
||||
"node-cron": "^3.0.2",
|
||||
"dotenv": "^16.0.3",
|
||||
"uuid": "^9.0.1",
|
||||
"axios": "^1.4.0",
|
||||
"lodash": "^4.17.21",
|
||||
"moment": "^2.29.4",
|
||||
"nodemailer": "^6.9.7"
|
||||
},
|
||||
"devDependencies": {
|
||||
"nodemon": "^3.0.3",
|
||||
"jest": "^29.5.0"
|
||||
}
|
||||
}
|
||||
172
marketing-agent/services/logging/src/client/index.js
Normal file
172
marketing-agent/services/logging/src/client/index.js
Normal file
@@ -0,0 +1,172 @@
|
||||
import Redis from 'ioredis';
|
||||
import winston from 'winston';
|
||||
|
||||
/**
|
||||
* Logging client for sending logs to the centralized logging service
|
||||
*/
|
||||
export class LoggingClient {
|
||||
constructor(options = {}) {
|
||||
this.serviceName = options.serviceName || 'unknown';
|
||||
this.redis = new Redis({
|
||||
host: options.redisHost || 'localhost',
|
||||
port: options.redisPort || 6379,
|
||||
password: options.redisPassword,
|
||||
db: options.redisDb || 2
|
||||
});
|
||||
|
||||
// Create winston logger for local fallback
|
||||
this.logger = winston.createLogger({
|
||||
level: options.logLevel || 'info',
|
||||
format: winston.format.json(),
|
||||
defaultMeta: { service: this.serviceName },
|
||||
transports: [
|
||||
new winston.transports.Console({
|
||||
format: winston.format.simple()
|
||||
})
|
||||
]
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Send log entry
|
||||
*/
|
||||
async log(level, message, metadata = {}) {
|
||||
const logEntry = {
|
||||
'@timestamp': new Date().toISOString(),
|
||||
service: this.serviceName,
|
||||
level,
|
||||
message,
|
||||
...metadata
|
||||
};
|
||||
|
||||
try {
|
||||
// Publish to Redis channel
|
||||
await this.redis.publish('logs:app', JSON.stringify(logEntry));
|
||||
} catch (error) {
|
||||
// Fallback to local logging
|
||||
this.logger.log(level, message, metadata);
|
||||
this.logger.error('Failed to send log to centralized service:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send metric
|
||||
*/
|
||||
async metric(name, value, dimensions = {}) {
|
||||
const metric = {
|
||||
'@timestamp': new Date().toISOString(),
|
||||
service: this.serviceName,
|
||||
metric: name,
|
||||
value,
|
||||
dimensions
|
||||
};
|
||||
|
||||
try {
|
||||
await this.redis.publish('logs:metrics', JSON.stringify(metric));
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to send metric:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error
|
||||
*/
|
||||
async error(error, context = {}) {
|
||||
const errorLog = {
|
||||
'@timestamp': new Date().toISOString(),
|
||||
service: this.serviceName,
|
||||
level: 'error',
|
||||
message: error.message,
|
||||
error: {
|
||||
type: error.constructor.name,
|
||||
message: error.message,
|
||||
stack: error.stack,
|
||||
code: error.code
|
||||
},
|
||||
context
|
||||
};
|
||||
|
||||
try {
|
||||
await this.redis.publish('logs:errors', JSON.stringify(errorLog));
|
||||
} catch (err) {
|
||||
this.logger.error('Failed to send error log:', err);
|
||||
this.logger.error(error.message, context);
|
||||
}
|
||||
}
|
||||
|
||||
// Convenience methods
|
||||
async info(message, metadata) {
|
||||
return this.log('info', message, metadata);
|
||||
}
|
||||
|
||||
async warn(message, metadata) {
|
||||
return this.log('warn', message, metadata);
|
||||
}
|
||||
|
||||
async debug(message, metadata) {
|
||||
return this.log('debug', message, metadata);
|
||||
}
|
||||
|
||||
/**
|
||||
* Express middleware for request logging
|
||||
*/
|
||||
expressMiddleware() {
|
||||
return async (req, res, next) => {
|
||||
const start = Date.now();
|
||||
const requestId = req.headers['x-request-id'] || `req-${Date.now()}`;
|
||||
|
||||
// Log request
|
||||
await this.info('Incoming request', {
|
||||
requestId,
|
||||
method: req.method,
|
||||
path: req.path,
|
||||
query: req.query,
|
||||
userId: req.user?.id,
|
||||
ip: req.ip,
|
||||
userAgent: req.headers['user-agent']
|
||||
});
|
||||
|
||||
// Capture response
|
||||
const originalSend = res.send;
|
||||
res.send = function(data) {
|
||||
res.send = originalSend;
|
||||
const responseTime = Date.now() - start;
|
||||
|
||||
// Log response
|
||||
this.info('Request completed', {
|
||||
requestId,
|
||||
method: req.method,
|
||||
path: req.path,
|
||||
statusCode: res.statusCode,
|
||||
responseTime,
|
||||
userId: req.user?.id
|
||||
}).catch(() => {});
|
||||
|
||||
// Send metric
|
||||
this.metric('response_time', responseTime, {
|
||||
endpoint: req.path,
|
||||
method: req.method,
|
||||
status: res.statusCode < 400 ? 'success' : 'error'
|
||||
}).catch(() => {});
|
||||
|
||||
return res.send(data);
|
||||
}.bind(this);
|
||||
|
||||
next();
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Close connections
|
||||
*/
|
||||
async close() {
|
||||
await this.redis.disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a logging client instance
|
||||
*/
|
||||
export function createLoggingClient(options) {
|
||||
return new LoggingClient(options);
|
||||
}
|
||||
115
marketing-agent/services/logging/src/config/index.js
Normal file
115
marketing-agent/services/logging/src/config/index.js
Normal file
@@ -0,0 +1,115 @@
|
||||
import dotenv from 'dotenv';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
export const config = {
|
||||
port: process.env.PORT || 3010,
|
||||
|
||||
elasticsearch: {
|
||||
node: process.env.ELASTICSEARCH_NODE || 'http://localhost:9200',
|
||||
auth: {
|
||||
username: process.env.ELASTICSEARCH_USERNAME || 'elastic',
|
||||
password: process.env.ELASTICSEARCH_PASSWORD || 'changeme'
|
||||
},
|
||||
indices: {
|
||||
logs: 'marketing-agent-logs',
|
||||
metrics: 'marketing-agent-metrics',
|
||||
errors: 'marketing-agent-errors'
|
||||
}
|
||||
},
|
||||
|
||||
redis: {
|
||||
host: process.env.REDIS_HOST || 'localhost',
|
||||
port: process.env.REDIS_PORT || 6379,
|
||||
password: process.env.REDIS_PASSWORD,
|
||||
db: process.env.REDIS_DB || 2
|
||||
},
|
||||
|
||||
collection: {
|
||||
batchSize: 100,
|
||||
flushInterval: 5000, // 5 seconds
|
||||
maxQueueSize: 10000
|
||||
},
|
||||
|
||||
retention: {
|
||||
logs: 30, // days
|
||||
metrics: 90, // days
|
||||
errors: 60 // days
|
||||
},
|
||||
|
||||
alerts: {
|
||||
channels: {
|
||||
email: {
|
||||
enabled: process.env.ALERT_EMAIL_ENABLED === 'true',
|
||||
smtp: {
|
||||
host: process.env.SMTP_HOST,
|
||||
port: process.env.SMTP_PORT || 587,
|
||||
auth: {
|
||||
user: process.env.SMTP_USER,
|
||||
pass: process.env.SMTP_PASS
|
||||
}
|
||||
},
|
||||
from: process.env.ALERT_EMAIL_FROM,
|
||||
to: process.env.ALERT_EMAIL_TO?.split(',') || []
|
||||
},
|
||||
slack: {
|
||||
enabled: process.env.ALERT_SLACK_ENABLED === 'true',
|
||||
webhookUrl: process.env.SLACK_WEBHOOK_URL
|
||||
},
|
||||
webhook: {
|
||||
enabled: process.env.ALERT_WEBHOOK_ENABLED === 'true',
|
||||
url: process.env.ALERT_WEBHOOK_URL
|
||||
}
|
||||
},
|
||||
rules: {
|
||||
errorRate: {
|
||||
threshold: 0.05, // 5% error rate
|
||||
window: 300000 // 5 minutes
|
||||
},
|
||||
responseTime: {
|
||||
threshold: 1000, // 1 second
|
||||
window: 300000 // 5 minutes
|
||||
},
|
||||
systemResources: {
|
||||
cpu: 80, // 80% CPU usage
|
||||
memory: 85, // 85% memory usage
|
||||
disk: 90 // 90% disk usage
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
analysis: {
|
||||
patterns: {
|
||||
error: [
|
||||
/error/i,
|
||||
/exception/i,
|
||||
/failed/i,
|
||||
/timeout/i,
|
||||
/refused/i
|
||||
],
|
||||
warning: [
|
||||
/warning/i,
|
||||
/deprecated/i,
|
||||
/slow/i,
|
||||
/retry/i
|
||||
],
|
||||
security: [
|
||||
/unauthorized/i,
|
||||
/forbidden/i,
|
||||
/invalid token/i,
|
||||
/authentication failed/i
|
||||
]
|
||||
},
|
||||
anomaly: {
|
||||
enabled: true,
|
||||
sensitivity: 0.8,
|
||||
trainingPeriod: 7 // days
|
||||
}
|
||||
},
|
||||
|
||||
services: {
|
||||
apiGateway: 'http://localhost:3000',
|
||||
orchestrator: 'http://localhost:3001',
|
||||
analytics: 'http://localhost:3003'
|
||||
}
|
||||
};
|
||||
84
marketing-agent/services/logging/src/index.js
Normal file
84
marketing-agent/services/logging/src/index.js
Normal file
@@ -0,0 +1,84 @@
|
||||
import express from 'express';
|
||||
import { LogCollector } from './services/logCollector.js';
|
||||
import { LogAnalyzer } from './services/logAnalyzer.js';
|
||||
import { AlertManager } from './services/alertManager.js';
|
||||
import { LogStorage } from './services/logStorage.js';
|
||||
import { config } from './config/index.js';
|
||||
import { logger } from './utils/logger.js';
|
||||
import logRoutes from './routes/logs.js';
|
||||
import alertRoutes from './routes/alerts.js';
|
||||
import dashboardRoutes from './routes/dashboard.js';
|
||||
|
||||
const app = express();
|
||||
|
||||
// Middleware
|
||||
app.use(express.json());
|
||||
app.use(express.urlencoded({ extended: true }));
|
||||
|
||||
// Initialize services
|
||||
const logStorage = new LogStorage();
|
||||
const logCollector = new LogCollector(logStorage);
|
||||
const logAnalyzer = new LogAnalyzer(logStorage);
|
||||
const alertManager = new AlertManager(logAnalyzer);
|
||||
|
||||
// Start services
|
||||
async function startServices() {
|
||||
try {
|
||||
await logStorage.initialize();
|
||||
await logCollector.start();
|
||||
await logAnalyzer.start();
|
||||
await alertManager.start();
|
||||
|
||||
logger.info('All logging services started successfully');
|
||||
} catch (error) {
|
||||
logger.error('Failed to start services:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Set up app locals for route access
|
||||
app.locals.logStorage = logStorage;
|
||||
app.locals.logCollector = logCollector;
|
||||
app.locals.logAnalyzer = logAnalyzer;
|
||||
app.locals.alertManager = alertManager;
|
||||
|
||||
// Routes
|
||||
app.use('/api/logs', logRoutes);
|
||||
app.use('/api/alerts', alertRoutes);
|
||||
app.use('/api/dashboard', dashboardRoutes);
|
||||
|
||||
// Health check
|
||||
app.get('/health', (req, res) => {
|
||||
res.json({
|
||||
status: 'healthy',
|
||||
service: 'logging-service',
|
||||
timestamp: new Date().toISOString(),
|
||||
components: {
|
||||
elasticsearch: logStorage.isConnected(),
|
||||
collector: logCollector.isRunning(),
|
||||
analyzer: logAnalyzer.isRunning()
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Start server
|
||||
const PORT = config.port || 3010;
|
||||
|
||||
app.listen(PORT, async () => {
|
||||
logger.info(`Logging service running on port ${PORT}`);
|
||||
await startServices();
|
||||
});
|
||||
|
||||
// Graceful shutdown
|
||||
process.on('SIGTERM', async () => {
|
||||
logger.info('SIGTERM received, shutting down gracefully');
|
||||
|
||||
await logCollector.stop();
|
||||
await logAnalyzer.stop();
|
||||
await alertManager.stop();
|
||||
await logStorage.close();
|
||||
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
export { app, logStorage, logCollector, logAnalyzer, alertManager };
|
||||
168
marketing-agent/services/logging/src/routes/alerts.js
Normal file
168
marketing-agent/services/logging/src/routes/alerts.js
Normal file
@@ -0,0 +1,168 @@
|
||||
import express from 'express';
|
||||
import { config } from '../config/index.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
// Get alert manager instance from app locals
|
||||
const getAlertManager = (req) => req.app.locals.alertManager;
|
||||
|
||||
/**
|
||||
* Get alert history
|
||||
*/
|
||||
router.get('/history', async (req, res) => {
|
||||
try {
|
||||
const { type, severity, startTime, endTime } = req.query;
|
||||
|
||||
const alerts = await getAlertManager(req).getAlertHistory({
|
||||
type,
|
||||
severity,
|
||||
startTime,
|
||||
endTime
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: alerts
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to get alert history:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to get alert history'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get active alerts
|
||||
*/
|
||||
router.get('/active', async (req, res) => {
|
||||
try {
|
||||
const activeAlerts = await getAlertManager(req).getAlertHistory({
|
||||
acknowledged: false
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: activeAlerts.filter(a => !a.acknowledged)
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to get active alerts:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to get active alerts'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Acknowledge an alert
|
||||
*/
|
||||
router.post('/:alertId/acknowledge', async (req, res) => {
|
||||
try {
|
||||
const { alertId } = req.params;
|
||||
|
||||
const alert = await getAlertManager(req).acknowledgeAlert(alertId);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: alert
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to acknowledge alert:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: error.message || 'Failed to acknowledge alert'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Clear an alert
|
||||
*/
|
||||
router.delete('/:alertId', async (req, res) => {
|
||||
try {
|
||||
const { alertId } = req.params;
|
||||
|
||||
await getAlertManager(req).clearAlert(alertId);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: 'Alert cleared successfully'
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to clear alert:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to clear alert'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Test alert system
|
||||
*/
|
||||
router.post('/test', async (req, res) => {
|
||||
try {
|
||||
const { type = 'test', severity = 'medium', channel } = req.body;
|
||||
|
||||
await getAlertManager(req).triggerAlert(type, {
|
||||
severity,
|
||||
title: 'Test Alert',
|
||||
message: 'This is a test alert from the logging system',
|
||||
details: {
|
||||
triggeredBy: 'API',
|
||||
timestamp: new Date().toISOString(),
|
||||
channel: channel || 'all'
|
||||
}
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: 'Test alert sent successfully'
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to send test alert:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to send test alert'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get alert configuration
|
||||
*/
|
||||
router.get('/config', async (req, res) => {
|
||||
try {
|
||||
const alertConfig = {
|
||||
channels: {
|
||||
email: {
|
||||
enabled: config.alerts.channels.email.enabled,
|
||||
recipients: config.alerts.channels.email.to
|
||||
},
|
||||
slack: {
|
||||
enabled: config.alerts.channels.slack.enabled
|
||||
},
|
||||
webhook: {
|
||||
enabled: config.alerts.channels.webhook.enabled
|
||||
}
|
||||
},
|
||||
rules: config.alerts.rules
|
||||
};
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: alertConfig
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to get alert config:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to get alert configuration'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
356
marketing-agent/services/logging/src/routes/dashboard.js
Normal file
356
marketing-agent/services/logging/src/routes/dashboard.js
Normal file
@@ -0,0 +1,356 @@
|
||||
import express from 'express';
|
||||
// Get service instances from app locals
|
||||
const getLogStorage = (req) => req.app.locals.logStorage;
|
||||
const getLogCollector = (req) => req.app.locals.logCollector;
|
||||
const getLogAnalyzer = (req) => req.app.locals.logAnalyzer;
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
/**
|
||||
* Get dashboard overview
|
||||
*/
|
||||
router.get('/overview', async (req, res) => {
|
||||
try {
|
||||
const { timeRange = '24h' } = req.query;
|
||||
|
||||
// Get log statistics
|
||||
const logStats = await getLogStorage(req).getStats(null, timeRange);
|
||||
|
||||
// Get collector stats
|
||||
const collectorStats = await getLogCollector(req).getQueueStats();
|
||||
|
||||
// Get recent analysis
|
||||
const recentAnalysis = await getLogStorage(req).search({
|
||||
index: getLogStorage(req).indices.metrics,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ term: { metric: 'error_analysis' } },
|
||||
{ range: { '@timestamp': { gte: `now-${timeRange}` } } }
|
||||
]
|
||||
}
|
||||
},
|
||||
sort: [{ '@timestamp': { order: 'desc' } }],
|
||||
size: 1
|
||||
});
|
||||
|
||||
const overview = {
|
||||
timestamp: new Date().toISOString(),
|
||||
period: timeRange,
|
||||
summary: {
|
||||
totalLogs: logStats.total_count.value,
|
||||
errorRate: calculateErrorRate(logStats),
|
||||
topServices: logStats.by_service.buckets.slice(0, 5).map(b => ({
|
||||
name: b.key,
|
||||
count: b.doc_count
|
||||
})),
|
||||
logLevels: Object.fromEntries(
|
||||
logStats.by_level.buckets.map(b => [b.key, b.doc_count])
|
||||
)
|
||||
},
|
||||
queues: collectorStats,
|
||||
timeline: logStats.over_time.buckets.map(bucket => ({
|
||||
timestamp: bucket.key_as_string,
|
||||
total: bucket.doc_count,
|
||||
levels: Object.fromEntries(
|
||||
bucket.by_level.buckets.map(b => [b.key, b.doc_count])
|
||||
)
|
||||
})),
|
||||
recentIssues: recentAnalysis.hits.length > 0 ?
|
||||
recentAnalysis.hits[0].analysis : null
|
||||
};
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: overview
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to get dashboard overview:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to get dashboard overview'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get service health
|
||||
*/
|
||||
router.get('/health', async (req, res) => {
|
||||
try {
|
||||
const { service, timeRange = '1h' } = req.query;
|
||||
|
||||
const must = [
|
||||
{ range: { '@timestamp': { gte: `now-${timeRange}` } } }
|
||||
];
|
||||
|
||||
if (service) {
|
||||
must.push({ term: { service } });
|
||||
}
|
||||
|
||||
// Get error logs
|
||||
const errorLogs = await getLogStorage(req).search({
|
||||
index: getLogStorage(req).indices.errors,
|
||||
query: { bool: { must } },
|
||||
size: 0,
|
||||
aggregations: {
|
||||
by_service: {
|
||||
terms: { field: 'service', size: 50 },
|
||||
aggs: {
|
||||
error_types: {
|
||||
terms: { field: 'error.type', size: 10 }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Get performance metrics
|
||||
const perfMetrics = await getLogStorage(req).aggregate({
|
||||
index: getLogStorage(req).indices.metrics,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
...must,
|
||||
{ term: { metric: 'response_time' } }
|
||||
]
|
||||
}
|
||||
},
|
||||
aggregations: {
|
||||
by_service: {
|
||||
terms: { field: 'service', size: 50 },
|
||||
aggs: {
|
||||
avg_response: { avg: { field: 'value' } },
|
||||
p95_response: { percentiles: { field: 'value', percents: [95] } }
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Combine health data
|
||||
const healthData = {};
|
||||
|
||||
// Add error data
|
||||
if (errorLogs.aggregations) {
|
||||
errorLogs.aggregations.by_service.buckets.forEach(bucket => {
|
||||
healthData[bucket.key] = {
|
||||
service: bucket.key,
|
||||
errors: {
|
||||
total: bucket.doc_count,
|
||||
types: Object.fromEntries(
|
||||
bucket.error_types.buckets.map(b => [b.key, b.doc_count])
|
||||
)
|
||||
}
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// Add performance data
|
||||
if (perfMetrics.by_service) {
|
||||
perfMetrics.by_service.buckets.forEach(bucket => {
|
||||
if (!healthData[bucket.key]) {
|
||||
healthData[bucket.key] = { service: bucket.key };
|
||||
}
|
||||
healthData[bucket.key].performance = {
|
||||
avgResponseTime: bucket.avg_response.value,
|
||||
p95ResponseTime: bucket.p95_response.values['95.0']
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// Calculate health scores
|
||||
Object.values(healthData).forEach(service => {
|
||||
service.healthScore = calculateHealthScore(service);
|
||||
service.status = getHealthStatus(service.healthScore);
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: Object.values(healthData)
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to get service health:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to get service health'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get performance trends
|
||||
*/
|
||||
router.get('/trends', async (req, res) => {
|
||||
try {
|
||||
const {
|
||||
metric = 'response_time',
|
||||
service,
|
||||
timeRange = '24h',
|
||||
interval = '1h'
|
||||
} = req.query;
|
||||
|
||||
const must = [
|
||||
{ term: { metric } },
|
||||
{ range: { '@timestamp': { gte: `now-${timeRange}` } } }
|
||||
];
|
||||
|
||||
if (service) {
|
||||
must.push({ term: { service } });
|
||||
}
|
||||
|
||||
const trends = await getLogStorage(req).aggregate({
|
||||
index: getLogStorage(req).indices.metrics,
|
||||
query: { bool: { must } },
|
||||
aggregations: {
|
||||
trend: {
|
||||
date_histogram: {
|
||||
field: '@timestamp',
|
||||
fixed_interval: interval
|
||||
},
|
||||
aggs: {
|
||||
avg_value: { avg: { field: 'value' } },
|
||||
min_value: { min: { field: 'value' } },
|
||||
max_value: { max: { field: 'value' } },
|
||||
percentiles: {
|
||||
percentiles: {
|
||||
field: 'value',
|
||||
percents: [50, 95, 99]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const trendData = trends.trend.buckets.map(bucket => ({
|
||||
timestamp: bucket.key_as_string,
|
||||
avg: bucket.avg_value.value,
|
||||
min: bucket.min_value.value,
|
||||
max: bucket.max_value.value,
|
||||
p50: bucket.percentiles.values['50.0'],
|
||||
p95: bucket.percentiles.values['95.0'],
|
||||
p99: bucket.percentiles.values['99.0']
|
||||
}));
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
metric,
|
||||
service: service || 'all',
|
||||
timeRange,
|
||||
interval,
|
||||
trends: trendData
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to get performance trends:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to get performance trends'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get top errors
|
||||
*/
|
||||
router.get('/top-errors', async (req, res) => {
|
||||
try {
|
||||
const { timeRange = '24h', limit = 10 } = req.query;
|
||||
|
||||
const topErrors = await getLogStorage(req).aggregate({
|
||||
index: getLogStorage(req).indices.errors,
|
||||
query: {
|
||||
range: {
|
||||
'@timestamp': { gte: `now-${timeRange}` }
|
||||
}
|
||||
},
|
||||
aggregations: {
|
||||
by_error: {
|
||||
terms: {
|
||||
field: 'error.message.keyword',
|
||||
size: limit,
|
||||
order: { _count: 'desc' }
|
||||
},
|
||||
aggs: {
|
||||
sample: {
|
||||
top_hits: {
|
||||
size: 1,
|
||||
_source: ['error', 'service', '@timestamp']
|
||||
}
|
||||
},
|
||||
services: {
|
||||
terms: { field: 'service', size: 5 }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const errors = topErrors.by_error.buckets.map(bucket => ({
|
||||
message: bucket.key,
|
||||
count: bucket.doc_count,
|
||||
sample: bucket.sample.hits.hits[0]._source,
|
||||
affectedServices: bucket.services.buckets.map(s => ({
|
||||
name: s.key,
|
||||
count: s.doc_count
|
||||
}))
|
||||
}));
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: errors
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to get top errors:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to get top errors'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Helper functions
|
||||
function calculateErrorRate(stats) {
|
||||
const errorCount = stats.by_level.buckets
|
||||
.filter(b => ['error', 'fatal'].includes(b.key))
|
||||
.reduce((sum, b) => sum + b.doc_count, 0);
|
||||
|
||||
return stats.total_count.value > 0 ?
|
||||
(errorCount / stats.total_count.value) * 100 : 0;
|
||||
}
|
||||
|
||||
function calculateHealthScore(service) {
|
||||
let score = 100;
|
||||
|
||||
// Deduct for errors
|
||||
if (service.errors) {
|
||||
const errorPenalty = Math.min(50, service.errors.total * 2);
|
||||
score -= errorPenalty;
|
||||
}
|
||||
|
||||
// Deduct for slow response times
|
||||
if (service.performance) {
|
||||
if (service.performance.avgResponseTime > 1000) {
|
||||
score -= 20;
|
||||
} else if (service.performance.avgResponseTime > 500) {
|
||||
score -= 10;
|
||||
}
|
||||
|
||||
if (service.performance.p95ResponseTime > 2000) {
|
||||
score -= 15;
|
||||
}
|
||||
}
|
||||
|
||||
return Math.max(0, score);
|
||||
}
|
||||
|
||||
function getHealthStatus(score) {
|
||||
if (score >= 90) return 'healthy';
|
||||
if (score >= 70) return 'degraded';
|
||||
if (score >= 50) return 'unhealthy';
|
||||
return 'critical';
|
||||
}
|
||||
|
||||
export default router;
|
||||
245
marketing-agent/services/logging/src/routes/logs.js
Normal file
245
marketing-agent/services/logging/src/routes/logs.js
Normal file
@@ -0,0 +1,245 @@
|
||||
import express from 'express';
|
||||
// Get log storage instance from app locals
|
||||
const getLogStorage = (req) => req.app.locals.logStorage;
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
/**
|
||||
* Search logs
|
||||
*/
|
||||
router.get('/search', async (req, res) => {
|
||||
try {
|
||||
const {
|
||||
index = 'logs',
|
||||
query,
|
||||
service,
|
||||
level,
|
||||
startTime,
|
||||
endTime,
|
||||
size = 100,
|
||||
from = 0,
|
||||
sort = '@timestamp:desc'
|
||||
} = req.query;
|
||||
|
||||
// Build query
|
||||
const must = [];
|
||||
|
||||
if (startTime || endTime) {
|
||||
const range = { '@timestamp': {} };
|
||||
if (startTime) range['@timestamp'].gte = startTime;
|
||||
if (endTime) range['@timestamp'].lte = endTime;
|
||||
must.push({ range });
|
||||
}
|
||||
|
||||
if (service) {
|
||||
must.push({ term: { service } });
|
||||
}
|
||||
|
||||
if (level) {
|
||||
must.push({ term: { level } });
|
||||
}
|
||||
|
||||
if (query) {
|
||||
must.push({
|
||||
multi_match: {
|
||||
query,
|
||||
fields: ['message', 'error.message', 'path', 'method']
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const searchQuery = must.length > 0 ? { bool: { must } } : { match_all: {} };
|
||||
|
||||
// Parse sort
|
||||
const [sortField, sortOrder] = sort.split(':');
|
||||
const sortObj = { [sortField]: { order: sortOrder || 'desc' } };
|
||||
|
||||
const results = await getLogStorage(req).search({
|
||||
index: `${getLogStorage(req).indices[index] || getLogStorage(req).indices.logs}*`,
|
||||
query: searchQuery,
|
||||
sort: [sortObj],
|
||||
size: parseInt(size),
|
||||
from: parseInt(from)
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: results
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to search logs:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to search logs'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get log statistics
|
||||
*/
|
||||
router.get('/stats', async (req, res) => {
|
||||
try {
|
||||
const { index, timeRange = '24h' } = req.query;
|
||||
|
||||
const stats = await getLogStorage(req).getStats(index, timeRange);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: stats
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to get log stats:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to get statistics'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get aggregated metrics
|
||||
*/
|
||||
router.get('/metrics', async (req, res) => {
|
||||
try {
|
||||
const {
|
||||
metric,
|
||||
service,
|
||||
startTime,
|
||||
endTime,
|
||||
interval = '1h',
|
||||
aggregation = 'avg'
|
||||
} = req.query;
|
||||
|
||||
const must = [
|
||||
{ term: { metric } }
|
||||
];
|
||||
|
||||
if (service) {
|
||||
must.push({ term: { service } });
|
||||
}
|
||||
|
||||
if (startTime || endTime) {
|
||||
const range = { '@timestamp': {} };
|
||||
if (startTime) range['@timestamp'].gte = startTime;
|
||||
if (endTime) range['@timestamp'].lte = endTime;
|
||||
must.push({ range });
|
||||
}
|
||||
|
||||
const aggregations = {
|
||||
over_time: {
|
||||
date_histogram: {
|
||||
field: '@timestamp',
|
||||
fixed_interval: interval
|
||||
},
|
||||
aggs: {
|
||||
value: {
|
||||
[aggregation]: { field: 'value' }
|
||||
}
|
||||
}
|
||||
},
|
||||
summary: {
|
||||
stats: { field: 'value' }
|
||||
}
|
||||
};
|
||||
|
||||
const results = await getLogStorage(req).aggregate({
|
||||
index: getLogStorage(req).indices.metrics,
|
||||
query: { bool: { must } },
|
||||
aggregations
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
summary: results.summary,
|
||||
timeSeries: results.over_time.buckets.map(bucket => ({
|
||||
timestamp: bucket.key_as_string,
|
||||
value: bucket.value.value,
|
||||
count: bucket.doc_count
|
||||
}))
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to get metrics:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to get metrics'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Stream logs in real-time
|
||||
*/
|
||||
router.get('/stream', async (req, res) => {
|
||||
// Set up SSE
|
||||
res.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive'
|
||||
});
|
||||
|
||||
const { service, level, index = 'logs' } = req.query;
|
||||
|
||||
// Create a tail cursor
|
||||
let lastTimestamp = new Date().toISOString();
|
||||
|
||||
const streamInterval = setInterval(async () => {
|
||||
try {
|
||||
const must = [
|
||||
{ range: { '@timestamp': { gt: lastTimestamp } } }
|
||||
];
|
||||
|
||||
if (service) must.push({ term: { service } });
|
||||
if (level) must.push({ term: { level } });
|
||||
|
||||
const results = await getLogStorage(req).search({
|
||||
index: `${getLogStorage(req).indices[index]}*`,
|
||||
query: { bool: { must } },
|
||||
sort: [{ '@timestamp': { order: 'asc' } }],
|
||||
size: 100
|
||||
});
|
||||
|
||||
if (results.hits.length > 0) {
|
||||
// Update last timestamp
|
||||
lastTimestamp = results.hits[results.hits.length - 1]['@timestamp'];
|
||||
|
||||
// Send logs
|
||||
for (const log of results.hits) {
|
||||
res.write(`data: ${JSON.stringify(log)}\n\n`);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Stream error:', error);
|
||||
}
|
||||
}, 1000);
|
||||
|
||||
// Clean up on disconnect
|
||||
req.on('close', () => {
|
||||
clearInterval(streamInterval);
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Delete old logs
|
||||
*/
|
||||
router.delete('/cleanup', async (req, res) => {
|
||||
try {
|
||||
await getLogStorage(req).deleteOldData();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: 'Old logs deleted successfully'
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error('Failed to cleanup logs:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to cleanup logs'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
356
marketing-agent/services/logging/src/services/alertManager.js
Normal file
356
marketing-agent/services/logging/src/services/alertManager.js
Normal file
@@ -0,0 +1,356 @@
|
||||
import axios from 'axios';
|
||||
import nodemailer from 'nodemailer';
|
||||
import { config } from '../config/index.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
export class AlertManager {
|
||||
constructor(logAnalyzer) {
|
||||
this.logAnalyzer = logAnalyzer;
|
||||
this.alertHistory = new Map();
|
||||
this.emailTransporter = null;
|
||||
this.running = false;
|
||||
this.alertCooldown = new Map(); // Prevent alert spam
|
||||
}
|
||||
|
||||
async start() {
|
||||
try {
|
||||
// Initialize email transporter if enabled
|
||||
if (config.alerts.channels.email.enabled) {
|
||||
this.initializeEmailTransporter();
|
||||
}
|
||||
|
||||
// Start monitoring
|
||||
this.startMonitoring();
|
||||
|
||||
this.running = true;
|
||||
logger.info('Alert manager started successfully');
|
||||
} catch (error) {
|
||||
logger.error('Failed to start alert manager:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
initializeEmailTransporter() {
|
||||
this.emailTransporter = nodemailer.createTransport({
|
||||
host: config.alerts.channels.email.smtp.host,
|
||||
port: config.alerts.channels.email.smtp.port,
|
||||
secure: config.alerts.channels.email.smtp.port === 465,
|
||||
auth: config.alerts.channels.email.smtp.auth
|
||||
});
|
||||
}
|
||||
|
||||
startMonitoring() {
|
||||
// Monitor for anomalies
|
||||
setInterval(async () => {
|
||||
await this.checkAlertConditions();
|
||||
}, 60000); // Check every minute
|
||||
}
|
||||
|
||||
async checkAlertConditions() {
|
||||
try {
|
||||
// Get recent metrics from analyzer
|
||||
const endTime = new Date();
|
||||
const startTime = new Date(endTime - 5 * 60 * 1000);
|
||||
|
||||
// Check error rate
|
||||
const errorMetrics = await this.getErrorMetrics(startTime, endTime);
|
||||
if (errorMetrics.rate > config.alerts.rules.errorRate.threshold) {
|
||||
await this.triggerAlert('error_rate', {
|
||||
severity: 'high',
|
||||
title: 'High Error Rate Detected',
|
||||
message: `Error rate is ${(errorMetrics.rate * 100).toFixed(2)}% (threshold: ${(config.alerts.rules.errorRate.threshold * 100).toFixed(2)}%)`,
|
||||
details: errorMetrics
|
||||
});
|
||||
}
|
||||
|
||||
// Check response time
|
||||
const performanceMetrics = await this.getPerformanceMetrics(startTime, endTime);
|
||||
if (performanceMetrics.avgResponseTime > config.alerts.rules.responseTime.threshold) {
|
||||
await this.triggerAlert('response_time', {
|
||||
severity: 'medium',
|
||||
title: 'High Response Time Detected',
|
||||
message: `Average response time is ${performanceMetrics.avgResponseTime}ms (threshold: ${config.alerts.rules.responseTime.threshold}ms)`,
|
||||
details: performanceMetrics
|
||||
});
|
||||
}
|
||||
|
||||
// Check system resources
|
||||
const resourceMetrics = await this.getResourceMetrics();
|
||||
if (resourceMetrics.cpu > config.alerts.rules.systemResources.cpu) {
|
||||
await this.triggerAlert('cpu_usage', {
|
||||
severity: 'high',
|
||||
title: 'High CPU Usage',
|
||||
message: `CPU usage is ${resourceMetrics.cpu}% (threshold: ${config.alerts.rules.systemResources.cpu}%)`,
|
||||
details: resourceMetrics
|
||||
});
|
||||
}
|
||||
|
||||
if (resourceMetrics.memory > config.alerts.rules.systemResources.memory) {
|
||||
await this.triggerAlert('memory_usage', {
|
||||
severity: 'high',
|
||||
title: 'High Memory Usage',
|
||||
message: `Memory usage is ${resourceMetrics.memory}% (threshold: ${config.alerts.rules.systemResources.memory}%)`,
|
||||
details: resourceMetrics
|
||||
});
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
logger.error('Failed to check alert conditions:', error);
|
||||
}
|
||||
}
|
||||
|
||||
async triggerAlert(type, alertData) {
|
||||
// Check cooldown to prevent spam
|
||||
const cooldownKey = `${type}:${alertData.severity}`;
|
||||
const lastAlert = this.alertCooldown.get(cooldownKey);
|
||||
const cooldownPeriod = 15 * 60 * 1000; // 15 minutes
|
||||
|
||||
if (lastAlert && Date.now() - lastAlert < cooldownPeriod) {
|
||||
logger.debug(`Alert ${type} is in cooldown period`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Create alert object
|
||||
const alert = {
|
||||
id: `alert_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
||||
type,
|
||||
timestamp: new Date().toISOString(),
|
||||
...alertData
|
||||
};
|
||||
|
||||
// Store in history
|
||||
this.alertHistory.set(alert.id, alert);
|
||||
|
||||
// Send through enabled channels
|
||||
const results = await Promise.allSettled([
|
||||
this.sendEmailAlert(alert),
|
||||
this.sendSlackAlert(alert),
|
||||
this.sendWebhookAlert(alert)
|
||||
]);
|
||||
|
||||
// Update cooldown
|
||||
this.alertCooldown.set(cooldownKey, Date.now());
|
||||
|
||||
// Log results
|
||||
const sentChannels = results
|
||||
.map((result, index) => {
|
||||
const channels = ['email', 'slack', 'webhook'];
|
||||
return result.status === 'fulfilled' && result.value ? channels[index] : null;
|
||||
})
|
||||
.filter(Boolean);
|
||||
|
||||
logger.info(`Alert triggered: ${type}`, { channels: sentChannels, alertId: alert.id });
|
||||
}
|
||||
|
||||
async sendEmailAlert(alert) {
|
||||
if (!config.alerts.channels.email.enabled || !this.emailTransporter) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
const mailOptions = {
|
||||
from: config.alerts.channels.email.from,
|
||||
to: config.alerts.channels.email.to.join(', '),
|
||||
subject: `[${alert.severity.toUpperCase()}] ${alert.title}`,
|
||||
html: this.formatEmailAlert(alert)
|
||||
};
|
||||
|
||||
await this.emailTransporter.sendMail(mailOptions);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error('Failed to send email alert:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
formatEmailAlert(alert) {
|
||||
return `
|
||||
<html>
|
||||
<body style="font-family: Arial, sans-serif; color: #333;">
|
||||
<div style="max-width: 600px; margin: 0 auto; padding: 20px;">
|
||||
<div style="background-color: ${this.getSeverityColor(alert.severity)}; color: white; padding: 10px; border-radius: 5px 5px 0 0;">
|
||||
<h2 style="margin: 0;">${alert.title}</h2>
|
||||
</div>
|
||||
<div style="border: 1px solid #ddd; padding: 20px; border-radius: 0 0 5px 5px;">
|
||||
<p><strong>Time:</strong> ${new Date(alert.timestamp).toLocaleString()}</p>
|
||||
<p><strong>Type:</strong> ${alert.type}</p>
|
||||
<p><strong>Severity:</strong> ${alert.severity}</p>
|
||||
<p><strong>Message:</strong> ${alert.message}</p>
|
||||
|
||||
${alert.details ? `
|
||||
<h3>Details:</h3>
|
||||
<pre style="background: #f5f5f5; padding: 10px; border-radius: 3px; overflow-x: auto;">
|
||||
${JSON.stringify(alert.details, null, 2)}
|
||||
</pre>
|
||||
` : ''}
|
||||
|
||||
<hr style="margin: 20px 0; border: none; border-top: 1px solid #ddd;">
|
||||
<p style="font-size: 12px; color: #666;">
|
||||
This alert was generated by the Telegram Marketing Agent System.
|
||||
Alert ID: ${alert.id}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
}
|
||||
|
||||
getSeverityColor(severity) {
|
||||
switch (severity) {
|
||||
case 'critical': return '#d32f2f';
|
||||
case 'high': return '#f44336';
|
||||
case 'medium': return '#ff9800';
|
||||
case 'low': return '#4caf50';
|
||||
default: return '#2196f3';
|
||||
}
|
||||
}
|
||||
|
||||
async sendSlackAlert(alert) {
|
||||
if (!config.alerts.channels.slack.enabled || !config.alerts.channels.slack.webhookUrl) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
const payload = {
|
||||
text: alert.title,
|
||||
attachments: [{
|
||||
color: this.getSeverityColor(alert.severity),
|
||||
fields: [
|
||||
{ title: 'Type', value: alert.type, short: true },
|
||||
{ title: 'Severity', value: alert.severity, short: true },
|
||||
{ title: 'Time', value: new Date(alert.timestamp).toLocaleString(), short: true },
|
||||
{ title: 'Message', value: alert.message, short: false }
|
||||
],
|
||||
footer: `Alert ID: ${alert.id}`,
|
||||
ts: Math.floor(Date.now() / 1000)
|
||||
}]
|
||||
};
|
||||
|
||||
await axios.post(config.alerts.channels.slack.webhookUrl, payload);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error('Failed to send Slack alert:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async sendWebhookAlert(alert) {
|
||||
if (!config.alerts.channels.webhook.enabled || !config.alerts.channels.webhook.url) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
await axios.post(config.alerts.channels.webhook.url, {
|
||||
alert,
|
||||
metadata: {
|
||||
service: 'telegram-marketing-agent',
|
||||
environment: process.env.NODE_ENV || 'development'
|
||||
}
|
||||
});
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error('Failed to send webhook alert:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async getErrorMetrics(startTime, endTime) {
|
||||
// In a real implementation, this would query the log storage
|
||||
// For now, return mock data
|
||||
return {
|
||||
rate: 0.02,
|
||||
total: 120,
|
||||
byType: {
|
||||
'timeout': 50,
|
||||
'connection_refused': 30,
|
||||
'internal_error': 40
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
async getPerformanceMetrics(startTime, endTime) {
|
||||
// In a real implementation, this would query the log storage
|
||||
// For now, return mock data
|
||||
return {
|
||||
avgResponseTime: 450,
|
||||
p95ResponseTime: 850,
|
||||
p99ResponseTime: 1200,
|
||||
slowestEndpoints: [
|
||||
{ path: '/api/campaigns/execute', avg: 1500 },
|
||||
{ path: '/api/analytics/report', avg: 1200 }
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
async getResourceMetrics() {
|
||||
// Get system resource usage
|
||||
const usage = process.cpuUsage();
|
||||
const memUsage = process.memoryUsage();
|
||||
|
||||
return {
|
||||
cpu: Math.min(100, Math.random() * 100), // Mock CPU usage
|
||||
memory: (memUsage.heapUsed / memUsage.heapTotal) * 100,
|
||||
disk: 45 // Mock disk usage
|
||||
};
|
||||
}
|
||||
|
||||
async getAlertHistory(filters = {}) {
|
||||
const alerts = Array.from(this.alertHistory.values());
|
||||
|
||||
// Apply filters
|
||||
let filtered = alerts;
|
||||
|
||||
if (filters.type) {
|
||||
filtered = filtered.filter(a => a.type === filters.type);
|
||||
}
|
||||
|
||||
if (filters.severity) {
|
||||
filtered = filtered.filter(a => a.severity === filters.severity);
|
||||
}
|
||||
|
||||
if (filters.startTime) {
|
||||
filtered = filtered.filter(a => new Date(a.timestamp) >= new Date(filters.startTime));
|
||||
}
|
||||
|
||||
if (filters.endTime) {
|
||||
filtered = filtered.filter(a => new Date(a.timestamp) <= new Date(filters.endTime));
|
||||
}
|
||||
|
||||
// Sort by timestamp descending
|
||||
filtered.sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp));
|
||||
|
||||
return filtered;
|
||||
}
|
||||
|
||||
async acknowledgeAlert(alertId) {
|
||||
const alert = this.alertHistory.get(alertId);
|
||||
if (!alert) {
|
||||
throw new Error('Alert not found');
|
||||
}
|
||||
|
||||
alert.acknowledged = true;
|
||||
alert.acknowledgedAt = new Date().toISOString();
|
||||
|
||||
return alert;
|
||||
}
|
||||
|
||||
async clearAlert(alertId) {
|
||||
return this.alertHistory.delete(alertId);
|
||||
}
|
||||
|
||||
isRunning() {
|
||||
return this.running;
|
||||
}
|
||||
|
||||
async stop() {
|
||||
this.running = false;
|
||||
|
||||
// Close email transporter
|
||||
if (this.emailTransporter) {
|
||||
this.emailTransporter.close();
|
||||
}
|
||||
|
||||
logger.info('Alert manager stopped');
|
||||
}
|
||||
}
|
||||
548
marketing-agent/services/logging/src/services/logAnalyzer.js
Normal file
548
marketing-agent/services/logging/src/services/logAnalyzer.js
Normal file
@@ -0,0 +1,548 @@
|
||||
import cron from 'node-cron';
|
||||
import { config } from '../config/index.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
export class LogAnalyzer {
|
||||
constructor(logStorage) {
|
||||
this.logStorage = logStorage;
|
||||
this.running = false;
|
||||
this.patterns = config.analysis.patterns;
|
||||
this.anomalyDetector = null;
|
||||
this.cronJobs = [];
|
||||
}
|
||||
|
||||
async start() {
|
||||
try {
|
||||
// Initialize anomaly detection
|
||||
if (config.analysis.anomaly.enabled) {
|
||||
await this.initializeAnomalyDetection();
|
||||
}
|
||||
|
||||
// Schedule analysis jobs
|
||||
this.scheduleAnalysisJobs();
|
||||
|
||||
this.running = true;
|
||||
logger.info('Log analyzer started successfully');
|
||||
} catch (error) {
|
||||
logger.error('Failed to start log analyzer:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
scheduleAnalysisJobs() {
|
||||
// Analyze error patterns every 5 minutes
|
||||
const errorAnalysisJob = cron.schedule('*/5 * * * *', async () => {
|
||||
await this.analyzeErrorPatterns();
|
||||
});
|
||||
this.cronJobs.push(errorAnalysisJob);
|
||||
|
||||
// Analyze performance metrics every 10 minutes
|
||||
const performanceJob = cron.schedule('*/10 * * * *', async () => {
|
||||
await this.analyzePerformanceMetrics();
|
||||
});
|
||||
this.cronJobs.push(performanceJob);
|
||||
|
||||
// Security analysis every hour
|
||||
const securityJob = cron.schedule('0 * * * *', async () => {
|
||||
await this.analyzeSecurityEvents();
|
||||
});
|
||||
this.cronJobs.push(securityJob);
|
||||
|
||||
// Anomaly detection every 15 minutes
|
||||
if (config.analysis.anomaly.enabled) {
|
||||
const anomalyJob = cron.schedule('*/15 * * * *', async () => {
|
||||
await this.detectAnomalies();
|
||||
});
|
||||
this.cronJobs.push(anomalyJob);
|
||||
}
|
||||
|
||||
// Daily report generation
|
||||
const reportJob = cron.schedule('0 9 * * *', async () => {
|
||||
await this.generateDailyReport();
|
||||
});
|
||||
this.cronJobs.push(reportJob);
|
||||
}
|
||||
|
||||
async analyzeErrorPatterns() {
|
||||
try {
|
||||
const endTime = new Date();
|
||||
const startTime = new Date(endTime - 5 * 60 * 1000); // Last 5 minutes
|
||||
|
||||
// Get error logs
|
||||
const errors = await this.logStorage.search({
|
||||
index: this.logStorage.indices.errors,
|
||||
query: {
|
||||
range: {
|
||||
'@timestamp': {
|
||||
gte: startTime.toISOString(),
|
||||
lte: endTime.toISOString()
|
||||
}
|
||||
}
|
||||
},
|
||||
size: 1000
|
||||
});
|
||||
|
||||
// Analyze patterns
|
||||
const analysis = {
|
||||
timestamp: new Date().toISOString(),
|
||||
period: { start: startTime, end: endTime },
|
||||
totalErrors: errors.total,
|
||||
patterns: {},
|
||||
topErrors: [],
|
||||
services: {}
|
||||
};
|
||||
|
||||
// Count error types
|
||||
const errorCounts = new Map();
|
||||
const serviceCounts = new Map();
|
||||
|
||||
for (const error of errors.hits) {
|
||||
// Count by error type
|
||||
const errorType = error.error?.type || 'Unknown';
|
||||
errorCounts.set(errorType, (errorCounts.get(errorType) || 0) + 1);
|
||||
|
||||
// Count by service
|
||||
const service = error.service || 'Unknown';
|
||||
serviceCounts.set(service, (serviceCounts.get(service) || 0) + 1);
|
||||
|
||||
// Pattern matching
|
||||
const message = error.error?.message || error.message || '';
|
||||
for (const [patternName, patterns] of Object.entries(this.patterns.error)) {
|
||||
if (patterns.some(pattern => pattern.test(message))) {
|
||||
analysis.patterns[patternName] = (analysis.patterns[patternName] || 0) + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Top errors
|
||||
analysis.topErrors = Array.from(errorCounts.entries())
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, 10)
|
||||
.map(([type, count]) => ({ type, count }));
|
||||
|
||||
// Service breakdown
|
||||
analysis.services = Object.fromEntries(serviceCounts);
|
||||
|
||||
// Store analysis result
|
||||
await this.logStorage.store('metrics', {
|
||||
metric: 'error_analysis',
|
||||
value: errors.total,
|
||||
dimensions: {
|
||||
type: 'pattern_analysis'
|
||||
},
|
||||
analysis
|
||||
});
|
||||
|
||||
// Check thresholds
|
||||
await this.checkErrorThresholds(analysis);
|
||||
|
||||
logger.debug('Error pattern analysis completed', { totalErrors: errors.total });
|
||||
} catch (error) {
|
||||
logger.error('Failed to analyze error patterns:', error);
|
||||
}
|
||||
}
|
||||
|
||||
async analyzePerformanceMetrics() {
|
||||
try {
|
||||
const endTime = new Date();
|
||||
const startTime = new Date(endTime - 10 * 60 * 1000); // Last 10 minutes
|
||||
|
||||
// Aggregate performance metrics
|
||||
const metrics = await this.logStorage.aggregate({
|
||||
index: this.logStorage.indices.metrics,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ range: { '@timestamp': { gte: startTime.toISOString(), lte: endTime.toISOString() } } },
|
||||
{ term: { metric: 'response_time' } }
|
||||
]
|
||||
}
|
||||
},
|
||||
aggregations: {
|
||||
by_service: {
|
||||
terms: { field: 'service', size: 50 },
|
||||
aggs: {
|
||||
avg_response: { avg: { field: 'value' } },
|
||||
p95_response: { percentiles: { field: 'value', percents: [95] } },
|
||||
p99_response: { percentiles: { field: 'value', percents: [99] } }
|
||||
}
|
||||
},
|
||||
overall: {
|
||||
avg: { field: 'value' },
|
||||
max: { field: 'value' },
|
||||
min: { field: 'value' }
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Store aggregated metrics
|
||||
const analysis = {
|
||||
timestamp: new Date().toISOString(),
|
||||
period: { start: startTime, end: endTime },
|
||||
overall: {
|
||||
avg: metrics.overall.avg.value,
|
||||
max: metrics.overall.max.value,
|
||||
min: metrics.overall.min.value
|
||||
},
|
||||
services: {}
|
||||
};
|
||||
|
||||
// Process service metrics
|
||||
for (const bucket of metrics.by_service.buckets) {
|
||||
analysis.services[bucket.key] = {
|
||||
count: bucket.doc_count,
|
||||
avg: bucket.avg_response.value,
|
||||
p95: bucket.p95_response.values['95.0'],
|
||||
p99: bucket.p99_response.values['99.0']
|
||||
};
|
||||
}
|
||||
|
||||
// Store analysis
|
||||
await this.logStorage.store('metrics', {
|
||||
metric: 'performance_analysis',
|
||||
value: analysis.overall.avg,
|
||||
dimensions: {
|
||||
type: 'service_performance'
|
||||
},
|
||||
analysis
|
||||
});
|
||||
|
||||
// Check performance thresholds
|
||||
await this.checkPerformanceThresholds(analysis);
|
||||
|
||||
logger.debug('Performance analysis completed');
|
||||
} catch (error) {
|
||||
logger.error('Failed to analyze performance metrics:', error);
|
||||
}
|
||||
}
|
||||
|
||||
async analyzeSecurityEvents() {
|
||||
try {
|
||||
const endTime = new Date();
|
||||
const startTime = new Date(endTime - 60 * 60 * 1000); // Last hour
|
||||
|
||||
// Search for security-related logs
|
||||
const securityLogs = await this.logStorage.search({
|
||||
index: `${this.logStorage.indices.logs}*`,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ range: { '@timestamp': { gte: startTime.toISOString(), lte: endTime.toISOString() } } }
|
||||
],
|
||||
should: this.patterns.security.map(pattern => ({
|
||||
regexp: { message: pattern.source }
|
||||
})),
|
||||
minimum_should_match: 1
|
||||
}
|
||||
},
|
||||
size: 1000
|
||||
});
|
||||
|
||||
// Analyze security events
|
||||
const analysis = {
|
||||
timestamp: new Date().toISOString(),
|
||||
period: { start: startTime, end: endTime },
|
||||
totalEvents: securityLogs.total,
|
||||
eventTypes: {},
|
||||
sources: {},
|
||||
targets: {}
|
||||
};
|
||||
|
||||
// Categorize events
|
||||
for (const log of securityLogs.hits) {
|
||||
const message = log.message || '';
|
||||
|
||||
// Categorize by type
|
||||
if (/unauthorized/i.test(message)) {
|
||||
analysis.eventTypes.unauthorized = (analysis.eventTypes.unauthorized || 0) + 1;
|
||||
} else if (/forbidden/i.test(message)) {
|
||||
analysis.eventTypes.forbidden = (analysis.eventTypes.forbidden || 0) + 1;
|
||||
} else if (/invalid token/i.test(message)) {
|
||||
analysis.eventTypes.invalidToken = (analysis.eventTypes.invalidToken || 0) + 1;
|
||||
} else if (/authentication failed/i.test(message)) {
|
||||
analysis.eventTypes.authFailed = (analysis.eventTypes.authFailed || 0) + 1;
|
||||
}
|
||||
|
||||
// Track sources
|
||||
if (log.ip) {
|
||||
analysis.sources[log.ip] = (analysis.sources[log.ip] || 0) + 1;
|
||||
}
|
||||
|
||||
// Track targets
|
||||
if (log.path) {
|
||||
analysis.targets[log.path] = (analysis.targets[log.path] || 0) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Store analysis
|
||||
await this.logStorage.store('metrics', {
|
||||
metric: 'security_analysis',
|
||||
value: securityLogs.total,
|
||||
dimensions: {
|
||||
type: 'security_events'
|
||||
},
|
||||
analysis
|
||||
});
|
||||
|
||||
// Check for security threats
|
||||
await this.checkSecurityThreats(analysis);
|
||||
|
||||
logger.debug('Security analysis completed', { totalEvents: securityLogs.total });
|
||||
} catch (error) {
|
||||
logger.error('Failed to analyze security events:', error);
|
||||
}
|
||||
}
|
||||
|
||||
async detectAnomalies() {
|
||||
try {
|
||||
// Simple anomaly detection based on statistical deviation
|
||||
const endTime = new Date();
|
||||
const currentWindow = new Date(endTime - 15 * 60 * 1000); // Last 15 minutes
|
||||
const historicalStart = new Date(endTime - 7 * 24 * 60 * 60 * 1000); // Last 7 days
|
||||
|
||||
// Get current metrics
|
||||
const currentMetrics = await this.getMetricStats(currentWindow, endTime);
|
||||
|
||||
// Get historical baseline
|
||||
const historicalMetrics = await this.getHistoricalBaseline(historicalStart, currentWindow);
|
||||
|
||||
// Detect anomalies
|
||||
const anomalies = [];
|
||||
|
||||
// Check error rate anomaly
|
||||
if (currentMetrics.errorRate > historicalMetrics.errorRate.mean + 2 * historicalMetrics.errorRate.stdDev) {
|
||||
anomalies.push({
|
||||
type: 'error_rate',
|
||||
severity: 'high',
|
||||
current: currentMetrics.errorRate,
|
||||
expected: historicalMetrics.errorRate.mean,
|
||||
deviation: (currentMetrics.errorRate - historicalMetrics.errorRate.mean) / historicalMetrics.errorRate.stdDev
|
||||
});
|
||||
}
|
||||
|
||||
// Check response time anomaly
|
||||
if (currentMetrics.avgResponseTime > historicalMetrics.responseTime.mean + 2 * historicalMetrics.responseTime.stdDev) {
|
||||
anomalies.push({
|
||||
type: 'response_time',
|
||||
severity: 'medium',
|
||||
current: currentMetrics.avgResponseTime,
|
||||
expected: historicalMetrics.responseTime.mean,
|
||||
deviation: (currentMetrics.avgResponseTime - historicalMetrics.responseTime.mean) / historicalMetrics.responseTime.stdDev
|
||||
});
|
||||
}
|
||||
|
||||
// Store anomalies
|
||||
if (anomalies.length > 0) {
|
||||
await this.logStorage.store('metrics', {
|
||||
metric: 'anomaly_detection',
|
||||
value: anomalies.length,
|
||||
dimensions: {
|
||||
type: 'statistical_anomaly'
|
||||
},
|
||||
anomalies
|
||||
});
|
||||
|
||||
logger.warn('Anomalies detected:', anomalies);
|
||||
}
|
||||
|
||||
return anomalies;
|
||||
} catch (error) {
|
||||
logger.error('Failed to detect anomalies:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async getMetricStats(startTime, endTime) {
|
||||
const logs = await this.logStorage.aggregate({
|
||||
index: `${this.logStorage.indices.logs}*`,
|
||||
query: {
|
||||
range: {
|
||||
'@timestamp': {
|
||||
gte: startTime.toISOString(),
|
||||
lte: endTime.toISOString()
|
||||
}
|
||||
}
|
||||
},
|
||||
aggregations: {
|
||||
total_requests: { value_count: { field: '_id' } },
|
||||
error_count: {
|
||||
filter: { range: { statusCode: { gte: 400 } } }
|
||||
},
|
||||
avg_response_time: {
|
||||
avg: { field: 'responseTime' }
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const totalRequests = logs.total_requests.value || 1;
|
||||
const errorCount = logs.error_count.doc_count || 0;
|
||||
|
||||
return {
|
||||
errorRate: errorCount / totalRequests,
|
||||
avgResponseTime: logs.avg_response_time.value || 0,
|
||||
totalRequests
|
||||
};
|
||||
}
|
||||
|
||||
async getHistoricalBaseline(startTime, endTime) {
|
||||
// Get hourly stats for the historical period
|
||||
const hourlyStats = await this.logStorage.aggregate({
|
||||
index: `${this.logStorage.indices.logs}*`,
|
||||
query: {
|
||||
range: {
|
||||
'@timestamp': {
|
||||
gte: startTime.toISOString(),
|
||||
lte: endTime.toISOString()
|
||||
}
|
||||
}
|
||||
},
|
||||
aggregations: {
|
||||
hourly: {
|
||||
date_histogram: {
|
||||
field: '@timestamp',
|
||||
fixed_interval: '1h'
|
||||
},
|
||||
aggs: {
|
||||
total: { value_count: { field: '_id' } },
|
||||
errors: {
|
||||
filter: { range: { statusCode: { gte: 400 } } }
|
||||
},
|
||||
avg_response: { avg: { field: 'responseTime' } }
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Calculate statistics
|
||||
const errorRates = [];
|
||||
const responseTimes = [];
|
||||
|
||||
for (const bucket of hourlyStats.hourly.buckets) {
|
||||
const total = bucket.total.value || 1;
|
||||
const errors = bucket.errors.doc_count || 0;
|
||||
|
||||
errorRates.push(errors / total);
|
||||
if (bucket.avg_response.value) {
|
||||
responseTimes.push(bucket.avg_response.value);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
errorRate: this.calculateStats(errorRates),
|
||||
responseTime: this.calculateStats(responseTimes)
|
||||
};
|
||||
}
|
||||
|
||||
calculateStats(values) {
|
||||
const n = values.length;
|
||||
if (n === 0) return { mean: 0, stdDev: 0 };
|
||||
|
||||
const mean = values.reduce((a, b) => a + b, 0) / n;
|
||||
const variance = values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / n;
|
||||
const stdDev = Math.sqrt(variance);
|
||||
|
||||
return { mean, stdDev };
|
||||
}
|
||||
|
||||
async generateDailyReport() {
|
||||
try {
|
||||
const endTime = new Date();
|
||||
endTime.setHours(0, 0, 0, 0);
|
||||
const startTime = new Date(endTime - 24 * 60 * 60 * 1000);
|
||||
|
||||
// Get comprehensive stats
|
||||
const stats = await this.logStorage.getStats(null, '24h');
|
||||
|
||||
const report = {
|
||||
timestamp: new Date().toISOString(),
|
||||
period: { start: startTime, end: endTime },
|
||||
summary: {
|
||||
totalLogs: stats.total_count.value,
|
||||
byLevel: Object.fromEntries(
|
||||
stats.by_level.buckets.map(b => [b.key, b.doc_count])
|
||||
),
|
||||
byService: Object.fromEntries(
|
||||
stats.by_service.buckets.map(b => [b.key, b.doc_count])
|
||||
)
|
||||
},
|
||||
timeline: stats.over_time.buckets.map(bucket => ({
|
||||
time: bucket.key_as_string,
|
||||
total: bucket.doc_count,
|
||||
byLevel: Object.fromEntries(
|
||||
bucket.by_level.buckets.map(b => [b.key, b.doc_count])
|
||||
)
|
||||
}))
|
||||
};
|
||||
|
||||
// Store report
|
||||
await this.logStorage.store('metrics', {
|
||||
metric: 'daily_report',
|
||||
value: 1,
|
||||
dimensions: {
|
||||
type: 'report',
|
||||
date: endTime.toISOString().split('T')[0]
|
||||
},
|
||||
report
|
||||
});
|
||||
|
||||
logger.info('Daily report generated', { date: endTime.toISOString().split('T')[0] });
|
||||
|
||||
return report;
|
||||
} catch (error) {
|
||||
logger.error('Failed to generate daily report:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async checkErrorThresholds(analysis) {
|
||||
const errorRate = analysis.totalErrors / (5 * 60); // Errors per second
|
||||
if (errorRate > config.alerts.rules.errorRate.threshold) {
|
||||
logger.warn('Error rate threshold exceeded', {
|
||||
rate: errorRate,
|
||||
threshold: config.alerts.rules.errorRate.threshold
|
||||
});
|
||||
// Trigger alert (handled by AlertManager)
|
||||
}
|
||||
}
|
||||
|
||||
async checkPerformanceThresholds(analysis) {
|
||||
if (analysis.overall.avg > config.alerts.rules.responseTime.threshold) {
|
||||
logger.warn('Response time threshold exceeded', {
|
||||
avg: analysis.overall.avg,
|
||||
threshold: config.alerts.rules.responseTime.threshold
|
||||
});
|
||||
// Trigger alert (handled by AlertManager)
|
||||
}
|
||||
}
|
||||
|
||||
async checkSecurityThreats(analysis) {
|
||||
// Check for brute force attempts
|
||||
for (const [ip, count] of Object.entries(analysis.sources)) {
|
||||
if (count > 100) { // More than 100 security events from single IP in an hour
|
||||
logger.warn('Potential brute force attack detected', { ip, count });
|
||||
// Trigger security alert
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async initializeAnomalyDetection() {
|
||||
// Initialize anomaly detection model
|
||||
// In a real implementation, this would load ML models
|
||||
logger.info('Anomaly detection initialized');
|
||||
}
|
||||
|
||||
isRunning() {
|
||||
return this.running;
|
||||
}
|
||||
|
||||
async stop() {
|
||||
this.running = false;
|
||||
|
||||
// Stop all cron jobs
|
||||
for (const job of this.cronJobs) {
|
||||
job.stop();
|
||||
}
|
||||
|
||||
logger.info('Log analyzer stopped');
|
||||
}
|
||||
}
|
||||
246
marketing-agent/services/logging/src/services/logCollector.js
Normal file
246
marketing-agent/services/logging/src/services/logCollector.js
Normal file
@@ -0,0 +1,246 @@
|
||||
import Bull from 'bull';
|
||||
import Redis from 'ioredis';
|
||||
import { config } from '../config/index.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
export class LogCollector {
|
||||
constructor(logStorage) {
|
||||
this.logStorage = logStorage;
|
||||
this.redis = new Redis(config.redis);
|
||||
this.queues = {};
|
||||
this.batchBuffer = new Map();
|
||||
this.flushInterval = null;
|
||||
this.running = false;
|
||||
}
|
||||
|
||||
async start() {
|
||||
try {
|
||||
// Initialize queues for different log types
|
||||
this.queues.logs = new Bull('log-queue', { redis: config.redis });
|
||||
this.queues.metrics = new Bull('metric-queue', { redis: config.redis });
|
||||
this.queues.errors = new Bull('error-queue', { redis: config.redis });
|
||||
|
||||
// Process queues
|
||||
this.setupQueueProcessors();
|
||||
|
||||
// Start batch processing
|
||||
this.startBatchProcessing();
|
||||
|
||||
// Subscribe to Redis pub/sub for real-time logs
|
||||
await this.subscribeToLogChannels();
|
||||
|
||||
this.running = true;
|
||||
logger.info('Log collector started successfully');
|
||||
} catch (error) {
|
||||
logger.error('Failed to start log collector:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
setupQueueProcessors() {
|
||||
// Process log queue
|
||||
this.queues.logs.process(async (job) => {
|
||||
await this.processLog('logs', job.data);
|
||||
});
|
||||
|
||||
// Process metric queue
|
||||
this.queues.metrics.process(async (job) => {
|
||||
await this.processLog('metrics', job.data);
|
||||
});
|
||||
|
||||
// Process error queue
|
||||
this.queues.errors.process(async (job) => {
|
||||
await this.processLog('errors', job.data);
|
||||
});
|
||||
|
||||
// Handle queue errors
|
||||
Object.values(this.queues).forEach(queue => {
|
||||
queue.on('error', (error) => {
|
||||
logger.error('Queue error:', error);
|
||||
});
|
||||
|
||||
queue.on('failed', (job, error) => {
|
||||
logger.error(`Job ${job.id} failed:`, error);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async processLog(type, data) {
|
||||
try {
|
||||
// Add to batch buffer
|
||||
if (!this.batchBuffer.has(type)) {
|
||||
this.batchBuffer.set(type, []);
|
||||
}
|
||||
|
||||
const buffer = this.batchBuffer.get(type);
|
||||
buffer.push(data);
|
||||
|
||||
// Flush if buffer is full
|
||||
if (buffer.length >= config.collection.batchSize) {
|
||||
await this.flushBuffer(type);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to process ${type} log:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
startBatchProcessing() {
|
||||
// Periodic flush
|
||||
this.flushInterval = setInterval(async () => {
|
||||
for (const [type, buffer] of this.batchBuffer.entries()) {
|
||||
if (buffer.length > 0) {
|
||||
await this.flushBuffer(type);
|
||||
}
|
||||
}
|
||||
}, config.collection.flushInterval);
|
||||
}
|
||||
|
||||
async flushBuffer(type) {
|
||||
const buffer = this.batchBuffer.get(type);
|
||||
if (!buffer || buffer.length === 0) return;
|
||||
|
||||
try {
|
||||
// Store in Elasticsearch
|
||||
await this.logStorage.bulkStore(type, buffer);
|
||||
|
||||
// Clear buffer
|
||||
this.batchBuffer.set(type, []);
|
||||
|
||||
logger.debug(`Flushed ${buffer.length} ${type} logs to storage`);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to flush ${type} buffer:`, error);
|
||||
// Re-queue failed logs
|
||||
for (const log of buffer) {
|
||||
await this.queues[type].add(log, { delay: 5000 });
|
||||
}
|
||||
this.batchBuffer.set(type, []);
|
||||
}
|
||||
}
|
||||
|
||||
async subscribeToLogChannels() {
|
||||
const subscriber = new Redis(config.redis);
|
||||
|
||||
// Subscribe to log channels
|
||||
await subscriber.subscribe('logs:app', 'logs:metrics', 'logs:errors');
|
||||
|
||||
subscriber.on('message', async (channel, message) => {
|
||||
try {
|
||||
const data = JSON.parse(message);
|
||||
const type = channel.split(':')[1];
|
||||
|
||||
// Enrich log data
|
||||
const enrichedData = this.enrichLogData(data, type);
|
||||
|
||||
// Add to appropriate queue
|
||||
const queue = this.queues[type] || this.queues.logs;
|
||||
await queue.add(enrichedData);
|
||||
} catch (error) {
|
||||
logger.error('Failed to process message from channel:', error);
|
||||
}
|
||||
});
|
||||
|
||||
this.subscriber = subscriber;
|
||||
}
|
||||
|
||||
enrichLogData(data, type) {
|
||||
const enriched = {
|
||||
...data,
|
||||
collectedAt: new Date().toISOString(),
|
||||
environment: process.env.NODE_ENV || 'development',
|
||||
host: process.env.HOSTNAME || 'unknown'
|
||||
};
|
||||
|
||||
// Add type-specific enrichment
|
||||
switch (type) {
|
||||
case 'metrics':
|
||||
enriched.metric = enriched.metric || 'unknown';
|
||||
enriched.value = parseFloat(enriched.value) || 0;
|
||||
break;
|
||||
|
||||
case 'errors':
|
||||
if (enriched.error && typeof enriched.error === 'string') {
|
||||
enriched.error = {
|
||||
message: enriched.error,
|
||||
type: 'Error'
|
||||
};
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return enriched;
|
||||
}
|
||||
|
||||
async collectFromService(serviceName, endpoint) {
|
||||
try {
|
||||
// HTTP endpoint for pulling logs from services
|
||||
const response = await fetch(`${endpoint}/logs/recent`);
|
||||
const logs = await response.json();
|
||||
|
||||
for (const log of logs) {
|
||||
const enrichedLog = {
|
||||
...log,
|
||||
service: serviceName,
|
||||
collectedAt: new Date().toISOString()
|
||||
};
|
||||
|
||||
await this.queues.logs.add(enrichedLog);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to collect logs from ${serviceName}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
async getQueueStats() {
|
||||
const stats = {};
|
||||
|
||||
for (const [name, queue] of Object.entries(this.queues)) {
|
||||
const counts = await queue.getJobCounts();
|
||||
stats[name] = {
|
||||
waiting: counts.waiting,
|
||||
active: counts.active,
|
||||
completed: counts.completed,
|
||||
failed: counts.failed,
|
||||
delayed: counts.delayed
|
||||
};
|
||||
}
|
||||
|
||||
// Add buffer stats
|
||||
stats.buffers = {};
|
||||
for (const [type, buffer] of this.batchBuffer.entries()) {
|
||||
stats.buffers[type] = buffer.length;
|
||||
}
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
isRunning() {
|
||||
return this.running;
|
||||
}
|
||||
|
||||
async stop() {
|
||||
this.running = false;
|
||||
|
||||
// Flush all buffers
|
||||
for (const type of this.batchBuffer.keys()) {
|
||||
await this.flushBuffer(type);
|
||||
}
|
||||
|
||||
// Clear interval
|
||||
if (this.flushInterval) {
|
||||
clearInterval(this.flushInterval);
|
||||
}
|
||||
|
||||
// Close queues
|
||||
for (const queue of Object.values(this.queues)) {
|
||||
await queue.close();
|
||||
}
|
||||
|
||||
// Close Redis connections
|
||||
if (this.subscriber) {
|
||||
await this.subscriber.disconnect();
|
||||
}
|
||||
await this.redis.disconnect();
|
||||
|
||||
logger.info('Log collector stopped');
|
||||
}
|
||||
}
|
||||
370
marketing-agent/services/logging/src/services/logStorage.js
Normal file
370
marketing-agent/services/logging/src/services/logStorage.js
Normal file
@@ -0,0 +1,370 @@
|
||||
import { Client } from '@elastic/elasticsearch';
|
||||
import { config } from '../config/index.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
export class LogStorage {
|
||||
constructor() {
|
||||
this.client = new Client({
|
||||
node: config.elasticsearch.node,
|
||||
auth: config.elasticsearch.auth
|
||||
});
|
||||
this.indices = config.elasticsearch.indices;
|
||||
this.connected = false;
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
try {
|
||||
// Check connection
|
||||
await this.client.info();
|
||||
this.connected = true;
|
||||
logger.info('Connected to Elasticsearch');
|
||||
|
||||
// Create indices if they don't exist
|
||||
await this.createIndices();
|
||||
|
||||
// Set up index templates
|
||||
await this.createIndexTemplates();
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error('Failed to initialize Elasticsearch:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async createIndices() {
|
||||
for (const [name, index] of Object.entries(this.indices)) {
|
||||
try {
|
||||
const exists = await this.client.indices.exists({ index });
|
||||
|
||||
if (!exists) {
|
||||
await this.client.indices.create({
|
||||
index,
|
||||
body: {
|
||||
settings: {
|
||||
number_of_shards: 3,
|
||||
number_of_replicas: 1,
|
||||
'index.lifecycle.name': `${name}-policy`,
|
||||
'index.lifecycle.rollover_alias': index
|
||||
},
|
||||
mappings: this.getMappings(name)
|
||||
}
|
||||
});
|
||||
|
||||
logger.info(`Created index: ${index}`);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to create index ${index}:`, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
getMappings(indexType) {
|
||||
const baseMapping = {
|
||||
properties: {
|
||||
'@timestamp': { type: 'date' },
|
||||
service: { type: 'keyword' },
|
||||
level: { type: 'keyword' },
|
||||
message: { type: 'text' },
|
||||
host: { type: 'keyword' },
|
||||
environment: { type: 'keyword' }
|
||||
}
|
||||
};
|
||||
|
||||
switch (indexType) {
|
||||
case 'logs':
|
||||
return {
|
||||
...baseMapping,
|
||||
properties: {
|
||||
...baseMapping.properties,
|
||||
userId: { type: 'keyword' },
|
||||
requestId: { type: 'keyword' },
|
||||
method: { type: 'keyword' },
|
||||
path: { type: 'keyword' },
|
||||
statusCode: { type: 'integer' },
|
||||
responseTime: { type: 'float' },
|
||||
userAgent: { type: 'text' },
|
||||
ip: { type: 'ip' },
|
||||
metadata: { type: 'object', enabled: false }
|
||||
}
|
||||
};
|
||||
|
||||
case 'metrics':
|
||||
return {
|
||||
...baseMapping,
|
||||
properties: {
|
||||
...baseMapping.properties,
|
||||
metric: { type: 'keyword' },
|
||||
value: { type: 'float' },
|
||||
unit: { type: 'keyword' },
|
||||
tags: { type: 'keyword' },
|
||||
dimensions: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
campaign: { type: 'keyword' },
|
||||
user: { type: 'keyword' },
|
||||
action: { type: 'keyword' }
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
case 'errors':
|
||||
return {
|
||||
...baseMapping,
|
||||
properties: {
|
||||
...baseMapping.properties,
|
||||
error: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
type: { type: 'keyword' },
|
||||
message: { type: 'text' },
|
||||
stack: { type: 'text' },
|
||||
code: { type: 'keyword' }
|
||||
}
|
||||
},
|
||||
context: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
userId: { type: 'keyword' },
|
||||
campaignId: { type: 'keyword' },
|
||||
action: { type: 'keyword' }
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
default:
|
||||
return baseMapping;
|
||||
}
|
||||
}
|
||||
|
||||
async createIndexTemplates() {
|
||||
// Create lifecycle policies
|
||||
for (const [name, days] of Object.entries(config.retention)) {
|
||||
try {
|
||||
await this.client.ilm.putLifecycle({
|
||||
name: `${name}-policy`,
|
||||
body: {
|
||||
policy: {
|
||||
phases: {
|
||||
hot: {
|
||||
actions: {
|
||||
rollover: {
|
||||
max_size: '50GB',
|
||||
max_age: '7d'
|
||||
}
|
||||
}
|
||||
},
|
||||
delete: {
|
||||
min_age: `${days}d`,
|
||||
actions: {
|
||||
delete: {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error(`Failed to create lifecycle policy for ${name}:`, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async store(type, document) {
|
||||
try {
|
||||
const index = this.indices[type] || this.indices.logs;
|
||||
|
||||
await this.client.index({
|
||||
index,
|
||||
body: {
|
||||
'@timestamp': new Date().toISOString(),
|
||||
...document
|
||||
}
|
||||
});
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error('Failed to store document:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async bulkStore(type, documents) {
|
||||
try {
|
||||
const index = this.indices[type] || this.indices.logs;
|
||||
const body = documents.flatMap(doc => [
|
||||
{ index: { _index: index } },
|
||||
{ '@timestamp': new Date().toISOString(), ...doc }
|
||||
]);
|
||||
|
||||
const result = await this.client.bulk({ body });
|
||||
|
||||
if (result.errors) {
|
||||
logger.error('Bulk store had errors:', result.errors);
|
||||
}
|
||||
|
||||
return !result.errors;
|
||||
} catch (error) {
|
||||
logger.error('Failed to bulk store documents:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async search(params) {
|
||||
try {
|
||||
const result = await this.client.search({
|
||||
index: params.index || `${this.indices.logs}*`,
|
||||
body: {
|
||||
query: params.query || { match_all: {} },
|
||||
sort: params.sort || [{ '@timestamp': { order: 'desc' } }],
|
||||
size: params.size || 100,
|
||||
from: params.from || 0,
|
||||
aggs: params.aggregations
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
total: result.hits.total.value,
|
||||
hits: result.hits.hits.map(hit => ({
|
||||
_id: hit._id,
|
||||
_index: hit._index,
|
||||
...hit._source
|
||||
})),
|
||||
aggregations: result.aggregations
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error('Search failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async aggregate(params) {
|
||||
try {
|
||||
const result = await this.client.search({
|
||||
index: params.index || `${this.indices.logs}*`,
|
||||
body: {
|
||||
size: 0,
|
||||
query: params.query || { match_all: {} },
|
||||
aggs: params.aggregations
|
||||
}
|
||||
});
|
||||
|
||||
return result.aggregations;
|
||||
} catch (error) {
|
||||
logger.error('Aggregation failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async getStats(index, timeRange = '24h') {
|
||||
const query = {
|
||||
range: {
|
||||
'@timestamp': {
|
||||
gte: `now-${timeRange}`
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const aggregations = {
|
||||
total_count: {
|
||||
value_count: {
|
||||
field: '_id'
|
||||
}
|
||||
},
|
||||
by_level: {
|
||||
terms: {
|
||||
field: 'level',
|
||||
size: 10
|
||||
}
|
||||
},
|
||||
by_service: {
|
||||
terms: {
|
||||
field: 'service',
|
||||
size: 20
|
||||
}
|
||||
},
|
||||
over_time: {
|
||||
date_histogram: {
|
||||
field: '@timestamp',
|
||||
fixed_interval: this.getInterval(timeRange),
|
||||
extended_bounds: {
|
||||
min: `now-${timeRange}`,
|
||||
max: 'now'
|
||||
}
|
||||
},
|
||||
aggs: {
|
||||
by_level: {
|
||||
terms: {
|
||||
field: 'level'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return await this.aggregate({
|
||||
index: index || `${this.indices.logs}*`,
|
||||
query,
|
||||
aggregations
|
||||
});
|
||||
}
|
||||
|
||||
getInterval(timeRange) {
|
||||
const match = timeRange.match(/(\d+)([hdwMy])/);
|
||||
if (!match) return '1h';
|
||||
|
||||
const [, value, unit] = match;
|
||||
const num = parseInt(value);
|
||||
|
||||
switch (unit) {
|
||||
case 'h':
|
||||
return num <= 24 ? '5m' : '1h';
|
||||
case 'd':
|
||||
return num <= 7 ? '1h' : '1d';
|
||||
case 'w':
|
||||
return num <= 4 ? '1d' : '1w';
|
||||
case 'M':
|
||||
return num <= 3 ? '1d' : '1w';
|
||||
case 'y':
|
||||
return '1M';
|
||||
default:
|
||||
return '1h';
|
||||
}
|
||||
}
|
||||
|
||||
async deleteOldData() {
|
||||
for (const [type, days] of Object.entries(config.retention)) {
|
||||
try {
|
||||
const index = this.indices[type];
|
||||
|
||||
await this.client.deleteByQuery({
|
||||
index: `${index}*`,
|
||||
body: {
|
||||
query: {
|
||||
range: {
|
||||
'@timestamp': {
|
||||
lt: `now-${days}d`
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
logger.info(`Deleted old data from ${index} older than ${days} days`);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to delete old data from ${type}:`, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
isConnected() {
|
||||
return this.connected;
|
||||
}
|
||||
|
||||
async close() {
|
||||
await this.client.close();
|
||||
this.connected = false;
|
||||
}
|
||||
}
|
||||
56
marketing-agent/services/logging/src/utils/logger.js
Normal file
56
marketing-agent/services/logging/src/utils/logger.js
Normal file
@@ -0,0 +1,56 @@
|
||||
import winston from 'winston';
|
||||
import DailyRotateFile from 'winston-daily-rotate-file';
|
||||
|
||||
const { combine, timestamp, errors, json, printf } = winston.format;
|
||||
|
||||
// Custom format for console output
|
||||
const consoleFormat = printf(({ level, message, timestamp, ...metadata }) => {
|
||||
let msg = `${timestamp} [${level}]: ${message}`;
|
||||
if (Object.keys(metadata).length > 0) {
|
||||
msg += ` ${JSON.stringify(metadata)}`;
|
||||
}
|
||||
return msg;
|
||||
});
|
||||
|
||||
// Create logger instance
|
||||
export const logger = winston.createLogger({
|
||||
level: process.env.LOG_LEVEL || 'info',
|
||||
format: combine(
|
||||
errors({ stack: true }),
|
||||
timestamp(),
|
||||
json()
|
||||
),
|
||||
defaultMeta: { service: 'logging-service' },
|
||||
transports: [
|
||||
// Console transport
|
||||
new winston.transports.Console({
|
||||
format: combine(
|
||||
winston.format.colorize(),
|
||||
timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
|
||||
consoleFormat
|
||||
)
|
||||
}),
|
||||
// File transport for errors
|
||||
new winston.transports.File({
|
||||
filename: 'logs/error.log',
|
||||
level: 'error',
|
||||
maxsize: 10485760, // 10MB
|
||||
maxFiles: 5
|
||||
}),
|
||||
// Daily rotate file for all logs
|
||||
new DailyRotateFile({
|
||||
filename: 'logs/application-%DATE%.log',
|
||||
datePattern: 'YYYY-MM-DD',
|
||||
zippedArchive: true,
|
||||
maxSize: '20m',
|
||||
maxFiles: '14d'
|
||||
})
|
||||
]
|
||||
});
|
||||
|
||||
// Create stream for Morgan HTTP logging
|
||||
logger.stream = {
|
||||
write: (message) => {
|
||||
logger.info(message.trim());
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user