monitoring.ts•10.7 kB
import { log } from './logger.js';
import { getConfig } from './config.js';
// Monitoring metrics interface
export interface ServerMetrics {
uptime: number;
memory: NodeJS.MemoryUsage;
requestCount: number;
errorCount: number;
responseTime: {
average: number;
min: number;
max: number;
percentile95: number;
};
toolUsage: Record<string, number>;
lastRequestTime: number;
startTime: number;
}
// Request metrics tracking
interface RequestMetric {
timestamp: number;
duration: number;
tool: string;
success: boolean;
error?: string;
}
class MonitoringService {
private metrics: ServerMetrics;
private requestMetrics: RequestMetric[] = [];
private maxRequestMetrics = 1000;
private requestIdCounter = 0;
private config = getConfig();
constructor() {
this.metrics = {
uptime: 0,
memory: process.memoryUsage(),
requestCount: 0,
errorCount: 0,
responseTime: {
average: 0,
min: 0,
max: 0,
percentile95: 0,
},
toolUsage: {},
lastRequestTime: 0,
startTime: Date.now(),
};
// Update metrics periodically
setInterval(() => this.updateMetrics(), 60000); // Every minute
}
// Generate unique request ID
generateRequestId(): string {
return `req-${Date.now()}-${++this.requestIdCounter}`;
}
// Track request start
startRequest(tool: string, requestId?: string): string {
const id = requestId || this.generateRequestId();
this.metrics.requestCount++;
this.metrics.lastRequestTime = Date.now();
if (!this.metrics.toolUsage[tool]) {
this.metrics.toolUsage[tool] = 0;
}
this.metrics.toolUsage[tool]++;
log.info(`Request started: ${tool}`, { tool, requestId: id });
return id;
}
// Track request completion
completeRequest(tool: string, startTime: number, success: boolean, error?: string, requestId?: string): void {
const duration = Date.now() - startTime;
// Add to request metrics
this.requestMetrics.push({
timestamp: Date.now(),
duration,
tool,
success,
error,
});
// Keep only recent metrics
if (this.requestMetrics.length > this.maxRequestMetrics) {
this.requestMetrics.shift();
}
// Update error count
if (!success) {
this.metrics.errorCount++;
}
// Update response time metrics
this.updateResponseTimeMetrics();
log.info(`Request completed: ${tool}`, {
tool,
duration,
success,
error,
requestId,
});
}
// Update response time metrics
private updateResponseTimeMetrics(): void {
if (this.requestMetrics.length === 0) return;
const durations = this.requestMetrics.map(m => m.duration).sort((a, b) => a - b);
const sum = durations.reduce((a, b) => a + b, 0);
this.metrics.responseTime = {
average: Math.round(sum / durations.length),
min: durations[0],
max: durations[durations.length - 1],
percentile95: durations[Math.floor(durations.length * 0.95)],
};
}
// Update general metrics
private updateMetrics(): void {
this.metrics.uptime = Date.now() - this.metrics.startTime;
this.metrics.memory = process.memoryUsage();
log.debug('Metrics updated', {
uptime: this.metrics.uptime,
memory: this.metrics.memory,
requestCount: this.metrics.requestCount,
errorCount: this.metrics.errorCount,
});
}
// Get current metrics
getMetrics(): ServerMetrics {
this.updateMetrics();
return { ...this.metrics };
}
// Get health status
getHealthStatus(): {
status: 'healthy' | 'degraded' | 'unhealthy';
checks: Record<string, any>;
metrics: ServerMetrics;
} {
const metrics = this.getMetrics();
const memoryUsage = metrics.memory.heapUsed / metrics.memory.heapTotal;
const errorRate = metrics.requestCount > 0 ? metrics.errorCount / metrics.requestCount : 0;
const recentErrors = this.requestMetrics
.filter(m => m.timestamp > Date.now() - 300000 && !m.success) // Last 5 minutes
.length;
const checks = {
memory: {
status: memoryUsage < 0.8 ? 'healthy' : memoryUsage < 0.9 ? 'degraded' : 'unhealthy',
usage: Math.round(memoryUsage * 100),
limit: 90,
},
errorRate: {
status: errorRate < 0.05 ? 'healthy' : errorRate < 0.1 ? 'degraded' : 'unhealthy',
rate: Math.round(errorRate * 100),
limit: 10,
},
recentErrors: {
status: recentErrors < 5 ? 'healthy' : recentErrors < 10 ? 'degraded' : 'unhealthy',
count: recentErrors,
limit: 10,
},
responseTime: {
status: metrics.responseTime.average < 5000 ? 'healthy' : metrics.responseTime.average < 10000 ? 'degraded' : 'unhealthy',
average: metrics.responseTime.average,
limit: 10000,
},
};
// Determine overall status
const statuses = Object.values(checks).map(check => check.status);
const overallStatus = statuses.includes('unhealthy') ? 'unhealthy' :
statuses.includes('degraded') ? 'degraded' : 'healthy';
return {
status: overallStatus,
checks,
metrics,
};
}
// Get recent request metrics
getRecentRequests(limit: number = 50): RequestMetric[] {
return this.requestMetrics
.slice(-limit)
.sort((a, b) => b.timestamp - a.timestamp);
}
// Get tool usage statistics
getToolUsageStats(): Array<{
tool: string;
count: number;
percentage: number;
averageResponseTime: number;
errorRate: number;
}> {
const totalRequests = this.metrics.requestCount;
return Object.entries(this.metrics.toolUsage).map(([tool, count]) => {
const toolMetrics = this.requestMetrics.filter(m => m.tool === tool);
const toolErrors = toolMetrics.filter(m => !m.success).length;
const avgResponseTime = toolMetrics.length > 0
? toolMetrics.reduce((sum, m) => sum + m.duration, 0) / toolMetrics.length
: 0;
return {
tool,
count,
percentage: Math.round((count / totalRequests) * 100),
averageResponseTime: Math.round(avgResponseTime),
errorRate: Math.round((toolErrors / count) * 100),
};
}).sort((a, b) => b.count - a.count);
}
// Get performance insights
getPerformanceInsights(): {
slowestTools: Array<{ tool: string; avgTime: number }>;
mostErrorProneTools: Array<{ tool: string; errorRate: number }>;
peakUsageHours: Array<{ hour: number; requestCount: number }>;
recommendations: string[];
} {
const toolStats = this.getToolUsageStats();
const slowestTools = toolStats
.sort((a, b) => b.averageResponseTime - a.averageResponseTime)
.slice(0, 5)
.map(t => ({ tool: t.tool, avgTime: t.averageResponseTime }));
const mostErrorProneTools = toolStats
.filter(t => t.errorRate > 0)
.sort((a, b) => b.errorRate - a.errorRate)
.slice(0, 5)
.map(t => ({ tool: t.tool, errorRate: t.errorRate }));
// Calculate peak usage hours
const hourlyUsage = new Array(24).fill(0);
this.requestMetrics.forEach(metric => {
const hour = new Date(metric.timestamp).getHours();
hourlyUsage[hour]++;
});
const peakUsageHours = hourlyUsage
.map((count, hour) => ({ hour, requestCount: count }))
.filter(h => h.requestCount > 0)
.sort((a, b) => b.requestCount - a.requestCount)
.slice(0, 5);
// Generate recommendations
const recommendations: string[] = [];
const metrics = this.getMetrics();
if (metrics.responseTime.average > 5000) {
recommendations.push('Consider implementing response caching for frequently accessed data');
}
if (metrics.errorCount / metrics.requestCount > 0.05) {
recommendations.push('High error rate detected - review error handling and API limits');
}
if (metrics.memory.heapUsed / metrics.memory.heapTotal > 0.8) {
recommendations.push('Memory usage is high - consider implementing garbage collection optimization');
}
if (slowestTools.length > 0 && slowestTools[0].avgTime > 10000) {
recommendations.push(`${slowestTools[0].tool} is slow - consider implementing chunking or pagination`);
}
if (mostErrorProneTools.length > 0 && mostErrorProneTools[0].errorRate > 20) {
recommendations.push(`${mostErrorProneTools[0].tool} has high error rate - review implementation`);
}
return {
slowestTools,
mostErrorProneTools,
peakUsageHours,
recommendations,
};
}
// Export metrics to JSON
exportMetrics(): string {
return JSON.stringify({
timestamp: new Date().toISOString(),
metrics: this.getMetrics(),
health: this.getHealthStatus(),
toolUsage: this.getToolUsageStats(),
recentRequests: this.getRecentRequests(100),
insights: this.getPerformanceInsights(),
}, null, 2);
}
// Reset metrics (for testing or maintenance)
resetMetrics(): void {
this.metrics = {
uptime: 0,
memory: process.memoryUsage(),
requestCount: 0,
errorCount: 0,
responseTime: {
average: 0,
min: 0,
max: 0,
percentile95: 0,
},
toolUsage: {},
lastRequestTime: 0,
startTime: Date.now(),
};
this.requestMetrics = [];
this.requestIdCounter = 0;
log.info('Metrics reset');
}
}
// Global monitoring instance
export const monitoring = new MonitoringService();
// Helper function to wrap tool handlers with monitoring
export function monitorTool<T extends (...args: any[]) => any>(
toolName: string,
handler: T
): T {
return ((...args: any[]) => {
const requestId = monitoring.generateRequestId();
const startTime = Date.now();
monitoring.startRequest(toolName, requestId);
try {
const result = handler(...args);
// Handle both sync and async results
if (result && typeof result.then === 'function') {
return result
.then((data: any) => {
monitoring.completeRequest(toolName, startTime, true, undefined, requestId);
return data;
})
.catch((error: any) => {
monitoring.completeRequest(toolName, startTime, false, error.message, requestId);
throw error;
});
} else {
monitoring.completeRequest(toolName, startTime, true, undefined, requestId);
return result;
}
} catch (error: any) {
monitoring.completeRequest(toolName, startTime, false, error.message, requestId);
throw error;
}
}) as T;
}
// Export utilities
export default monitoring;