#!/usr/bin/env node
import path from 'path';
import fs from 'fs/promises';
import { performance } from 'perf_hooks';
import { createIndexer } from '../src/services/indexer.mjs';
import { createSearchService } from '../src/services/searchService.mjs';
import { createFsAdapter } from '../src/adapters/fsio.mjs';
import { loadConfig } from '../src/adapters/config.mjs';
import { log } from '../src/utils/logger.mjs';
/**
* Benchmark script for OnCall Runbook MCP Server
* Generates synthetic corpus and measures performance metrics
*/
const logger = {
log: (message, meta = {}) => {
console.log(`[BENCHMARK] ${message}`);
if (Object.keys(meta).length > 0) {
console.log(JSON.stringify(meta, null, 2));
}
}
};
// Benchmark configuration
const BENCHMARK_SIZES = [10, 50, 100, 300];
const QUERIES_PER_SIZE = 20;
const TOPK = 5;
// Sample runbook templates for corpus generation
const RUNBOOK_TEMPLATES = [
{
category: 'database',
templates: [
{
title: 'Database Connection Pool Exhaustion',
keywords: ['database', 'connection', 'pool', 'timeout', 'postgresql', 'mysql'],
severity: 'P2',
content: `
# Database Connection Pool Exhaustion
## Symptoms
- Connection timeout errors
- Application slow response
- Pool utilization at 100%
## Initial Diagnosis
1. Check connection pool metrics
2. Review active connections
3. Analyze slow query logs
4. Monitor CPU and memory usage
## Safe Operations
- \`SHOW PROCESSLIST\` - Check active queries
- Monitor connection counts
- Review application logs
## Risk Operations
- Restart database service
- Kill long-running queries
- Increase pool size
`
},
{
title: 'Database Replica Lag',
keywords: ['database', 'replica', 'lag', 'replication', 'sync'],
severity: 'P3',
content: `
# Database Replica Lag
## Symptoms
- Read inconsistency
- Replication lag alerts
- Stale data in reports
## Initial Diagnosis
1. Check replica lag metrics
2. Review master-slave connection
3. Analyze network latency
4. Check disk I/O on replica
## Safe Operations
- Monitor lag metrics
- Check network connectivity
- Review replica logs
## Risk Operations
- Restart replica
- Re-sync from master
- Switch to different replica
`
}
]
},
{
category: 'web-service',
templates: [
{
title: 'HTTP 5xx Error Rate Spike',
keywords: ['http', '500', 'error', 'web', 'service', 'nginx', 'apache'],
severity: 'P2',
content: `
# HTTP 5xx Error Rate Spike
## Symptoms
- High 5xx error rate
- User complaints
- Service degradation
## Initial Diagnosis
1. Check error logs
2. Review application metrics
3. Analyze traffic patterns
4. Monitor resource usage
## Safe Operations
- Review logs for patterns
- Check upstream dependencies
- Monitor traffic distribution
## Risk Operations
- Restart web servers
- Deploy rollback
- Failover to backup region
`
},
{
title: 'Load Balancer Issues',
keywords: ['load', 'balancer', 'nginx', 'haproxy', 'traffic', 'upstream'],
severity: 'P1',
content: `
# Load Balancer Configuration Issues
## Symptoms
- Uneven traffic distribution
- Backend server overload
- Connection failures
## Initial Diagnosis
1. Check backend health status
2. Review load balancer logs
3. Analyze traffic routing rules
4. Monitor connection pools
## Safe Operations
- Check backend health checks
- Review routing configuration
- Monitor connection metrics
## Risk Operations
- Restart load balancer
- Update routing rules
- Remove unhealthy backends
`
}
]
},
{
category: 'network',
templates: [
{
title: 'Network Latency Issues',
keywords: ['network', 'latency', 'ping', 'connection', 'timeout'],
severity: 'P2',
content: `
# Network Latency Issues
## Symptoms
- High response times
- Intermittent timeouts
- Poor user experience
## Initial Diagnosis
1. Check network latency metrics
2. Perform ping/traceroute tests
3. Review network topology
4. Analyze traffic patterns
## Safe Operations
- Network diagnostic tools
- Monitor bandwidth usage
- Check routing tables
## Risk Operations
- Restart network equipment
- Update routing configuration
- Switch network providers
`
}
]
}
];
// Sample queries for benchmarking
const BENCHMARK_QUERIES = [
'database connection timeout',
'web service 500 error',
'load balancer configuration',
'network latency problems',
'replica lag issues',
'http error rate spike',
'connection pool exhaustion',
'service restart procedure',
'nginx configuration issue',
'postgresql performance',
'mysql slow queries',
'haproxy backend failure',
'apache server errors',
'network routing problems',
'timeout troubleshooting',
'performance degradation',
'service unavailable',
'database failover',
'web server restart',
'load balancing issues'
];
/**
* Generate synthetic runbook corpus
*/
async function generateCorpus(size, outputDir) {
await fs.mkdir(outputDir, { recursive: true });
const generated = [];
for (let i = 0; i < size; i++) {
const categoryIndex = i % RUNBOOK_TEMPLATES.length;
const category = RUNBOOK_TEMPLATES[categoryIndex];
const templateIndex = i % category.templates.length;
const template = category.templates[templateIndex];
const docId = `${category.category}-${String(i).padStart(3, '0')}.md`;
const variation = Math.floor(i / category.templates.length) + 1;
const content = `---
title: ${template.title} - Variant ${variation}
category: ${category.category}
severity: ${template.severity}
keywords: ${template.keywords.join(', ')}
last_updated: ${new Date(2025, 9, 13 - Math.floor(Math.random() * 180)).toISOString().split('T')[0]}
version: "1.${variation}"
---
${template.content}
## Additional Context (Variant ${variation})
- Generated for benchmark corpus
- Document ID: ${docId}
- Category: ${category.category}
- Timestamp: ${new Date().toISOString()}
## Troubleshooting Steps (Variant ${variation})
${generateTroubleshootingSteps(template.keywords, variation)}
## Prevention Measures
- Monitor key metrics continuously
- Set up appropriate alerting thresholds
- Maintain runbook documentation
- Regular capacity planning reviews
`;
const filePath = path.join(outputDir, docId);
await fs.writeFile(filePath, content, 'utf8');
generated.push({ docId, filePath, category: category.category });
}
return generated;
}
/**
* Generate variant troubleshooting steps
*/
function generateTroubleshootingSteps(keywords, variant) {
const baseSteps = [
'Check system metrics and resource utilization',
'Review recent changes and deployments',
'Analyze error logs for patterns',
'Verify configuration parameters',
'Test connectivity and dependencies'
];
const variantSteps = baseSteps.map((step, index) =>
`${index + 1}. ${step} (${keywords[index % keywords.length]} focus)`
);
return variantSteps.join('\n');
}
/**
* Run benchmark for specific corpus size
*/
async function runBenchmark(corpusSize, tempDir) {
logger.log(`\n=== Benchmarking corpus size: ${corpusSize} ===`);
// Generate corpus
const corpusDir = path.join(tempDir, `corpus-${corpusSize}`);
const startGeneration = performance.now();
const documents = await generateCorpus(corpusSize, corpusDir);
const generationTime = performance.now() - startGeneration;
logger.log(`Generated ${documents.length} documents in ${generationTime.toFixed(2)}ms`);
// Build index
const fsAdapter = createFsAdapter(corpusDir);
// Create a minimal config for benchmark
const config = { freshnessDays: 90, topKDefault: 5 };
const indexer = createIndexer({ fsAdapter, config, logger });
const startIndexing = performance.now();
const index = await indexer.buildIndex();
const indexingTime = performance.now() - startIndexing;
logger.log(`Built index with ${index.documents.length} docs, ${index.chunks.length} chunks in ${indexingTime.toFixed(2)}ms`);
// Run search benchmarks
const searchService = createSearchService({ index, config, logger });
const searchTimes = [];
for (let i = 0; i < QUERIES_PER_SIZE; i++) {
const query = BENCHMARK_QUERIES[i % BENCHMARK_QUERIES.length];
const startSearch = performance.now();
const results = await searchService.search(query, { topK: TOPK });
const searchTime = performance.now() - startSearch;
searchTimes.push(searchTime);
if (i === 0) {
logger.log(`Sample query "${query}" returned ${results.length} results`);
}
}
// Calculate statistics
searchTimes.sort((a, b) => a - b);
const stats = {
corpusSize,
documents: documents.length,
chunks: index.chunks.length,
generationTimeMs: Math.round(generationTime),
indexingTimeMs: Math.round(indexingTime),
totalQueries: searchTimes.length,
avgSearchTimeMs: Math.round(searchTimes.reduce((a, b) => a + b, 0) / searchTimes.length * 100) / 100,
p50SearchTimeMs: Math.round(searchTimes[Math.floor(searchTimes.length * 0.5)] * 100) / 100,
p95SearchTimeMs: Math.round(searchTimes[Math.floor(searchTimes.length * 0.95)] * 100) / 100,
p99SearchTimeMs: Math.round(searchTimes[Math.floor(searchTimes.length * 0.99)] * 100) / 100,
maxSearchTimeMs: Math.round(Math.max(...searchTimes) * 100) / 100,
minSearchTimeMs: Math.round(Math.min(...searchTimes) * 100) / 100
};
return stats;
}
/**
* Clean up temporary directories
*/
async function cleanup(tempDir) {
try {
await fs.rm(tempDir, { recursive: true, force: true });
logger.log(`Cleaned up temporary directory: ${tempDir}`);
} catch (error) {
logger.log(`Warning: Could not clean up ${tempDir}: ${error.message}`);
}
}
/**
* Main benchmark execution
*/
async function main() {
logger.log('š OnCall Runbook MCP Server - Performance Benchmark');
logger.log(`Testing corpus sizes: ${BENCHMARK_SIZES.join(', ')} documents`);
logger.log(`Queries per size: ${QUERIES_PER_SIZE}, TopK: ${TOPK}`);
const tempDir = path.join(process.cwd(), 'temp-benchmark');
const results = [];
try {
for (const size of BENCHMARK_SIZES) {
const stats = await runBenchmark(size, tempDir);
results.push(stats);
}
// Display results summary
logger.log('\nš BENCHMARK RESULTS SUMMARY');
logger.log('=' .repeat(80));
logger.log('Corpus | Docs | Chunks | Index(ms) | Avg(ms) | P95(ms) | P99(ms) | Max(ms)');
logger.log('-'.repeat(80));
for (const stats of results) {
const row = [
String(stats.corpusSize).padStart(6),
String(stats.documents).padStart(4),
String(stats.chunks).padStart(6),
String(stats.indexingTimeMs).padStart(9),
String(stats.avgSearchTimeMs).padStart(7),
String(stats.p95SearchTimeMs).padStart(7),
String(stats.p99SearchTimeMs).padStart(7),
String(stats.maxSearchTimeMs).padStart(7)
].join(' | ');
logger.log(row);
}
logger.log('-'.repeat(80));
// Performance assessment
const largest = results[results.length - 1];
logger.log(`\nā
Performance Assessment:`);
logger.log(` - Largest corpus (${largest.corpusSize} docs): P95 = ${largest.p95SearchTimeMs}ms`);
logger.log(` - Target: P95 < 100ms for production workloads`);
if (largest.p95SearchTimeMs < 100) {
logger.log(` - š PASS: System meets performance targets!`);
} else if (largest.p95SearchTimeMs < 500) {
logger.log(` - ā ļø ACCEPTABLE: Performance adequate for most use cases`);
} else {
logger.log(` - ā NEEDS OPTIMIZATION: Consider caching or indexing improvements`);
}
// Save detailed results
const reportPath = path.join(process.cwd(), 'benchmark-report.json');
const report = {
timestamp: new Date().toISOString(),
environment: {
nodeVersion: process.version,
platform: process.platform,
arch: process.arch
},
configuration: {
corpusSizes: BENCHMARK_SIZES,
queriesPerSize: QUERIES_PER_SIZE,
topK: TOPK
},
results
};
await fs.writeFile(reportPath, JSON.stringify(report, null, 2), 'utf8');
logger.log(`\nš Detailed report saved to: ${reportPath}`);
} catch (error) {
logger.log(`ā Benchmark failed: ${error.message}`);
console.error(error);
process.exit(1);
} finally {
await cleanup(tempDir);
}
logger.log('\nš Benchmark completed successfully!');
}
// Execute if run directly
if (process.argv[1] && import.meta.url.endsWith(process.argv[1].replace(/\\/g, '/'))) {
await main();
}
export { main as runBenchmark };