Skip to main content
Glama

WebSee MCP Server

by 1AQuantum
evaluation.jsโ€ข39.7 kB
/** * WebSee MCP Server Evaluation Framework * * This module provides a comprehensive evaluation system for the WebSee MCP server * following Anthropic's MCP builder standards for quality assurance. * * @module evaluation */ import { chromium } from 'playwright'; import * as fs from 'fs/promises'; import * as path from 'path'; // ============================================================================ // Test Case Definitions // ============================================================================ export const TEST_CASES = [ // 1. Debugging React Component State Issues { id: 'eval-001', category: 'Component Debugging', description: 'Debug React component state issues by inspecting component props and state', tool: 'inspect_component_state', input: { url: 'http://localhost:3000/app', selector: '#user-profile', waitForSelector: true, includeChildren: true, }, expectedOutput: { fields: ['selector', 'component', 'children'], conditions: [ { field: 'component.name', type: 'exists' }, { field: 'component.framework', type: 'exists' }, { field: 'component.props', type: 'exists' }, { field: 'component.state', type: 'exists' }, ], }, scoring: { maxPoints: 100, criteria: [ { description: 'Component name is correctly identified', points: 20, validator: 'output.component && output.component.name && output.component.name.length > 0', }, { description: 'Framework is detected (React/Vue/Angular)', points: 20, validator: "output.component && ['react', 'vue', 'angular'].includes(output.component.framework?.toLowerCase())", }, { description: 'Props are extracted and non-empty', points: 20, validator: 'output.component && output.component.props && Object.keys(output.component.props).length > 0', }, { description: 'State is captured', points: 20, validator: 'output.component && output.component.state !== undefined', }, { description: 'Child components are included when requested', points: 20, validator: 'output.children && Array.isArray(output.children)', }, ], }, performanceBenchmark: { maxResponseTime: 5000, expectedAccuracy: 90, }, }, // 2. Analyzing Slow Network Requests { id: 'eval-002', category: 'Network Analysis', description: 'Identify and analyze slow network requests with timing information', tool: 'analyze_performance', input: { url: 'http://localhost:3000/app', metrics: ['network'], }, expectedOutput: { fields: ['url', 'timestamp', 'metrics'], conditions: [ { field: 'metrics.network', type: 'exists' }, { field: 'metrics.network.totalRequests', type: 'greaterThan', value: 0 }, { field: 'metrics.network.averageDuration', type: 'exists' }, ], }, scoring: { maxPoints: 100, criteria: [ { description: 'Total request count is captured', points: 15, validator: 'output.metrics?.network?.totalRequests >= 0', }, { description: 'Slow requests are identified (>1000ms)', points: 25, validator: 'output.metrics?.network?.slowRequests !== undefined', }, { description: 'Average duration is calculated', points: 20, validator: 'output.metrics?.network?.averageDuration > 0', }, { description: 'Slowest requests are listed with details', points: 25, validator: 'Array.isArray(output.metrics?.network?.slowestRequests) && output.metrics.network.slowestRequests.every(r => r.url && r.duration)', }, { description: 'Stack trace shows what triggered the request', points: 15, validator: 'output.metrics?.network?.slowestRequests?.some(r => r.triggeredBy)', }, ], }, performanceBenchmark: { maxResponseTime: 8000, expectedAccuracy: 95, }, }, // 3. Resolving Minified Error Stack Traces { id: 'eval-003', category: 'Error Resolution', description: 'Resolve minified error stack traces to original source code locations', tool: 'resolve_minified_error', input: { url: 'http://localhost:3000/app', errorStack: "Error: Cannot read property 'name' of undefined\n at t.render (app.min.js:1:28473)", triggerError: false, }, expectedOutput: { fields: ['resolved', 'original', 'sourceMap'], conditions: [ { field: 'resolved', type: 'equals', value: true }, { field: 'sourceMap', type: 'exists' }, { field: 'sourceMap', type: 'arrayLength', value: 1 }, ], }, scoring: { maxPoints: 100, criteria: [ { description: 'Stack trace is successfully resolved', points: 30, validator: 'output.resolved === true', }, { description: 'Original source file is identified', points: 25, validator: "output.sourceMap && output.sourceMap.some(line => line.includes('.tsx') || line.includes('.ts') || line.includes('.jsx') || line.includes('.js'))", }, { description: 'Line and column numbers are provided', points: 20, validator: 'output.sourceMap && output.sourceMap.some(line => /:\\d+:\\d+/.test(line))', }, { description: 'Original error is preserved', points: 15, validator: 'output.original && output.original.length > 0', }, { description: 'Source map resolution message is clear', points: 10, validator: "output.message && output.message.includes('source map')", }, ], }, performanceBenchmark: { maxResponseTime: 6000, expectedAccuracy: 85, }, }, // 4. Finding Large Bundle Modules { id: 'eval-004', category: 'Bundle Analysis', description: 'Identify large modules in the JavaScript bundle that exceed size thresholds', tool: 'analyze_bundle_size', input: { url: 'http://localhost:3000/app', moduleName: 'lodash', threshold: 50, }, expectedOutput: { fields: ['url', 'scripts', 'stylesheets', 'modules', 'recommendations'], conditions: [ { field: 'scripts.total', type: 'greaterThan', value: 0 }, { field: 'scripts.files', type: 'exists' }, { field: 'recommendations', type: 'exists' }, ], }, scoring: { maxPoints: 100, criteria: [ { description: 'Total script count is captured', points: 15, validator: 'output.scripts && output.scripts.total >= 0', }, { description: 'Script files are listed with sources', points: 20, validator: 'Array.isArray(output.scripts?.files) && output.scripts.files.length > 0', }, { description: 'Module search works when specified', points: 25, validator: 'Array.isArray(output.modules) && (output.modules.length === 0 || output.modules[0].name)', }, { description: 'Size threshold recommendations are generated', points: 25, validator: 'Array.isArray(output.recommendations)', }, { description: 'Stylesheets are analyzed', points: 15, validator: 'output.stylesheets && output.stylesheets.total >= 0', }, ], }, performanceBenchmark: { maxResponseTime: 7000, expectedAccuracy: 88, }, }, // 5. Tracing User Interaction Flows { id: 'eval-005', category: 'Interaction Tracing', description: 'Trace network activity triggered by user interactions', tool: 'trace_network_requests', input: { url: 'http://localhost:3000/app', pattern: '/api/*', method: 'GET', waitTime: 3000, }, expectedOutput: { fields: ['url', 'pattern', 'method', 'totalRequests', 'requests'], conditions: [ { field: 'totalRequests', type: 'exists' }, { field: 'requests', type: 'exists' }, ], }, scoring: { maxPoints: 100, criteria: [ { description: 'URL pattern filtering works correctly', points: 25, validator: "output.pattern && output.requests.every(r => !output.pattern || r.url.includes(output.pattern.replace('*', '')))", }, { description: 'HTTP method filtering is applied', points: 20, validator: "output.method && (output.method === 'ALL' || output.requests.every(r => r.method === output.method))", }, { description: 'Request details include URL, method, status', points: 20, validator: 'output.requests && output.requests.every(r => r.url && r.method && r.status !== undefined)', }, { description: 'Request timing information is captured', points: 20, validator: 'output.requests && output.requests.every(r => r.duration !== undefined && r.timestamp)', }, { description: 'Stack traces show request origin', points: 15, validator: 'output.requests && output.requests.some(r => r.triggeredBy)', }, ], }, performanceBenchmark: { maxResponseTime: 6000, expectedAccuracy: 92, }, }, // 6. Memory Leak Detection Scenarios { id: 'eval-006', category: 'Memory Analysis', description: 'Analyze memory usage and detect potential memory leaks', tool: 'analyze_performance', input: { url: 'http://localhost:3000/app', metrics: ['memory', 'components'], }, expectedOutput: { fields: ['metrics'], conditions: [ { field: 'metrics.memory', type: 'exists' }, { field: 'metrics.components', type: 'exists' }, ], }, scoring: { maxPoints: 100, criteria: [ { description: 'Memory metrics are captured', points: 30, validator: 'output.metrics?.memory !== undefined && output.metrics?.memory !== null', }, { description: 'Heap size information is provided', points: 25, validator: 'output.metrics?.memory && (output.metrics.memory.usedJSHeapSize || output.metrics.memory === null)', }, { description: 'Component count is tracked', points: 20, validator: 'output.metrics?.components?.totalComponents >= 0', }, { description: 'Component nesting depth is analyzed', points: 15, validator: 'output.metrics?.components?.deepestNesting >= 0', }, { description: 'Components grouped by framework', points: 10, validator: "output.metrics?.components?.byFramework && typeof output.metrics.components.byFramework === 'object'", }, ], }, performanceBenchmark: { maxResponseTime: 7000, expectedAccuracy: 80, }, }, // 7. Cross-Browser Compatibility Checks { id: 'eval-007', category: 'Cross-Browser Testing', description: 'Test frontend functionality across different browsers', tool: 'debug_frontend_issue', input: { url: 'http://localhost:3000/app', selector: '#main-content', screenshot: false, }, expectedOutput: { fields: ['url', 'timestamp', 'issues', 'components', 'network', 'console'], conditions: [ { field: 'console', type: 'exists' }, { field: 'network', type: 'exists' }, { field: 'issues', type: 'exists' }, ], }, scoring: { maxPoints: 100, criteria: [ { description: 'Console errors and warnings are captured', points: 25, validator: 'Array.isArray(output.console)', }, { description: 'Network requests are tracked', points: 20, validator: 'Array.isArray(output.network) && output.network.length >= 0', }, { description: 'Component information is extracted', points: 20, validator: 'Array.isArray(output.components)', }, { description: 'Issues are identified and categorized', points: 20, validator: 'Array.isArray(output.issues)', }, { description: 'Timestamp for debugging timeline', points: 15, validator: 'output.timestamp && new Date(output.timestamp).getTime() > 0', }, ], }, performanceBenchmark: { maxResponseTime: 8000, expectedAccuracy: 90, }, }, // 8. Performance Bottleneck Identification { id: 'eval-008', category: 'Performance Optimization', description: 'Identify performance bottlenecks in frontend applications', tool: 'analyze_performance', input: { url: 'http://localhost:3000/app', metrics: ['network', 'bundle', 'components'], interactions: [{ action: 'scroll', selector: undefined, value: undefined }], }, expectedOutput: { fields: ['metrics'], conditions: [ { field: 'metrics.network', type: 'exists' }, { field: 'metrics.bundle', type: 'exists' }, { field: 'metrics.components', type: 'exists' }, ], }, scoring: { maxPoints: 100, criteria: [ { description: 'Network performance metrics are comprehensive', points: 30, validator: 'output.metrics?.network && output.metrics.network.totalRequests >= 0 && output.metrics.network.averageDuration >= 0', }, { description: 'Bundle size analysis is performed', points: 25, validator: 'output.metrics?.bundle && output.metrics.bundle.totalScripts >= 0', }, { description: 'Largest scripts are identified', points: 20, validator: 'Array.isArray(output.metrics?.bundle?.largestScripts)', }, { description: 'Component metrics show optimization opportunities', points: 15, validator: 'output.metrics?.components?.totalComponents >= 0 && output.metrics.components.deepestNesting >= 0', }, { description: 'User interactions are properly executed', points: 10, validator: 'output.url && output.timestamp', }, ], }, performanceBenchmark: { maxResponseTime: 10000, expectedAccuracy: 85, }, }, // 9. Component Tree Analysis { id: 'eval-009', category: 'Component Architecture', description: 'Analyze component hierarchy and relationships', tool: 'analyze_performance', input: { url: 'http://localhost:3000/app', metrics: ['components'], }, expectedOutput: { fields: ['metrics.components'], conditions: [ { field: 'metrics.components.totalComponents', type: 'exists' }, { field: 'metrics.components.byFramework', type: 'exists' }, { field: 'metrics.components.deepestNesting', type: 'exists' }, ], }, scoring: { maxPoints: 100, criteria: [ { description: 'Total component count is accurate', points: 30, validator: 'output.metrics?.components?.totalComponents >= 0', }, { description: 'Components are grouped by framework', points: 25, validator: "output.metrics?.components?.byFramework && typeof output.metrics.components.byFramework === 'object'", }, { description: 'Nesting depth is calculated', points: 25, validator: 'output.metrics?.components?.deepestNesting >= 0', }, { description: 'Framework detection is working', points: 20, validator: 'output.metrics?.components?.byFramework && Object.keys(output.metrics.components.byFramework).length >= 0', }, ], }, performanceBenchmark: { maxResponseTime: 6000, expectedAccuracy: 88, }, }, // 10. Build Optimization Recommendations { id: 'eval-010', category: 'Build Optimization', description: 'Generate actionable recommendations for build optimization', tool: 'analyze_bundle_size', input: { url: 'http://localhost:3000/app', threshold: 100, }, expectedOutput: { fields: ['recommendations'], conditions: [ { field: 'recommendations', type: 'exists' }, { field: 'scripts', type: 'exists' }, ], }, scoring: { maxPoints: 100, criteria: [ { description: 'Recommendations are generated', points: 30, validator: 'Array.isArray(output.recommendations)', }, { description: 'Recommendations are actionable and specific', points: 25, validator: "output.recommendations && output.recommendations.every(r => typeof r === 'string' && r.length > 20)", }, { description: 'Threshold-based warnings are included', points: 20, validator: "output.recommendations && output.recommendations.some(r => r.includes('KB') || r.includes('threshold'))", }, { description: 'Code splitting suggestions when appropriate', points: 15, validator: "output.recommendations && (output.recommendations.length === 0 || output.recommendations.some(r => r.toLowerCase().includes('split')))", }, { description: 'Total bundle size is calculated', points: 10, validator: 'output.scripts && output.scripts.totalSize >= 0', }, ], }, performanceBenchmark: { maxResponseTime: 7000, expectedAccuracy: 85, }, }, ]; // ============================================================================ // Evaluation Engine // ============================================================================ export class EvaluationEngine { browser = null; testCases; results = []; constructor(testCases = TEST_CASES) { this.testCases = testCases; } /** * Initialize the evaluation engine */ async initialize() { this.browser = await chromium.launch({ headless: true }); console.log('โœ… Evaluation engine initialized'); } /** * Run a single test case */ async runTestCase(testCase) { const startTime = Date.now(); const result = { testId: testCase.id, passed: false, score: 0, maxScore: testCase.scoring.maxPoints, responseTime: 0, errors: [], warnings: [], output: null, validationResults: [], }; try { // Simulate MCP tool call // In real implementation, this would call the actual MCP server result.output = await this.simulateToolCall(testCase.tool, testCase.input); // Validate output structure this.validateOutputStructure(testCase, result); // Evaluate scoring criteria this.evaluateScoringCriteria(testCase, result); // Check performance benchmarks result.responseTime = Date.now() - startTime; if (result.responseTime > testCase.performanceBenchmark.maxResponseTime) { result.warnings.push(`Response time ${result.responseTime}ms exceeds benchmark of ${testCase.performanceBenchmark.maxResponseTime}ms`); } // Determine pass/fail const scorePercentage = (result.score / result.maxScore) * 100; result.passed = scorePercentage >= testCase.performanceBenchmark.expectedAccuracy; } catch (error) { result.errors.push(`Test execution failed: ${error instanceof Error ? error.message : String(error)}`); result.passed = false; } return result; } /** * Validate that output contains expected fields and meets conditions */ validateOutputStructure(testCase, result) { const { expectedOutput } = testCase; // Check required fields for (const field of expectedOutput.fields) { const value = this.getNestedValue(result.output, field); if (value === undefined) { result.warnings.push(`Expected field '${field}' not found in output`); } } // Check conditions for (const condition of expectedOutput.conditions) { const value = this.getNestedValue(result.output, condition.field); let conditionMet = false; switch (condition.type) { case 'exists': conditionMet = value !== undefined && value !== null; break; case 'equals': conditionMet = value === condition.value; break; case 'contains': conditionMet = typeof value === 'string' && value.includes(String(condition.value)); break; case 'greaterThan': conditionMet = typeof value === 'number' && value > (condition.value || 0); break; case 'lessThan': conditionMet = typeof value === 'number' && value < (condition.value || 0); break; case 'arrayLength': conditionMet = Array.isArray(value) && value.length >= (condition.value || 0); break; } if (!conditionMet) { result.warnings.push(`Condition not met: ${condition.field} ${condition.type} ${condition.value !== undefined ? condition.value : ''}`); } } } /** * Evaluate scoring criteria using validator expressions */ evaluateScoringCriteria(testCase, result) { for (const criterion of testCase.scoring.criteria) { let passed = false; try { // Create a safe evaluation context const evalFunc = new Function('output', `return ${criterion.validator}`); passed = Boolean(evalFunc(result.output)); } catch (error) { result.warnings.push(`Validator error for "${criterion.description}": ${error instanceof Error ? error.message : String(error)}`); } result.validationResults.push({ criterion: criterion.description, passed, points: passed ? criterion.points : 0, }); if (passed) { result.score += criterion.points; } } } /** * Get nested value from object using dot notation */ getNestedValue(obj, path) { return path.split('.').reduce((current, key) => current?.[key], obj); } /** * Simulate MCP tool call (placeholder for actual implementation) */ async simulateToolCall(tool, input) { // This is a placeholder that returns mock data // In real implementation, this would call the actual MCP server console.log(`[SIMULATION] Calling tool: ${tool} with input:`, JSON.stringify(input, null, 2)); // Return mock responses based on tool switch (tool) { case 'inspect_component_state': return { selector: input.selector, component: { name: 'UserProfile', framework: 'react', props: { userId: '123' }, state: { loading: false }, source: { file: 'src/components/UserProfile.tsx', line: 42 }, parents: ['App', 'Dashboard'], }, children: input.includeChildren ? [{ name: 'Avatar', props: {} }] : undefined, }; case 'analyze_performance': return { url: input.url, timestamp: new Date().toISOString(), metrics: this.getMockMetrics(input.metrics), }; case 'resolve_minified_error': return { resolved: true, original: input.errorStack, sourceMap: [ "Error: Cannot read property 'name' of undefined", ' at UserProfile.render (src/components/UserProfile.tsx:87:15)', ], message: 'Stack trace resolved using source maps', }; case 'analyze_bundle_size': return { url: input.url, scripts: { total: 5, totalSize: 524288, files: [ { src: 'main.js', size: 262144, async: false, defer: false }, { src: 'vendor.js', size: 262144, async: false, defer: false }, ], }, stylesheets: { total: 2, files: [{ href: 'styles.css', media: 'all' }], }, modules: input.moduleName ? [{ name: input.moduleName, found: false }] : [], recommendations: [ 'Found 2 script(s) larger than 100 KB. Consider code splitting for better performance.', ], }; case 'trace_network_requests': return { url: input.url, pattern: input.pattern, method: input.method, totalRequests: 3, requests: [ { url: '/api/users/123', method: 'GET', status: 200, duration: 245, size: 1024, triggeredBy: { file: 'UserProfile.tsx', line: 42 }, timestamp: Date.now(), }, ], }; case 'debug_frontend_issue': return { url: input.url, timestamp: new Date().toISOString(), issues: [], components: input.selector ? [ { selector: input.selector, name: 'MainContent', framework: 'react', props: {}, state: {}, }, ] : [], network: [{ url: '/api/data', method: 'GET', status: 200, duration: 150 }], console: [], }; default: return {}; } } /** * Get mock metrics based on requested types */ getMockMetrics(metricTypes) { const metrics = {}; if (metricTypes.includes('network')) { metrics.network = { totalRequests: 15, slowRequests: 2, averageDuration: 342, slowestRequests: [ { url: '/api/data', duration: 1250, triggeredBy: { file: 'App.tsx', line: 23 } }, { url: '/api/users', duration: 1100, triggeredBy: { file: 'UserList.tsx', line: 56 } }, ], }; } if (metricTypes.includes('components')) { metrics.components = { totalComponents: 12, byFramework: { react: 12 }, deepestNesting: 5, }; } if (metricTypes.includes('bundle')) { metrics.bundle = { totalScripts: 5, totalSize: 524288, largestScripts: [ { src: 'vendor.js', size: 262144 }, { src: 'main.js', size: 131072 }, ], }; } if (metricTypes.includes('memory')) { metrics.memory = { usedJSHeapSize: '45 MB', totalJSHeapSize: '60 MB', limit: '2048 MB', }; } return metrics; } /** * Run all test cases */ async runAllTests() { console.log(`\n๐Ÿงช Running ${this.testCases.length} evaluation tests...\n`); this.results = []; for (const testCase of this.testCases) { console.log(`Running: ${testCase.id} - ${testCase.description}`); const result = await this.runTestCase(testCase); this.results.push(result); const status = result.passed ? 'โœ… PASS' : 'โŒ FAIL'; console.log(` ${status} - Score: ${result.score}/${result.maxScore} (${result.responseTime}ms)\n`); } return this.generateReport(); } /** * Generate comprehensive evaluation report */ generateReport() { const passedTests = this.results.filter(r => r.passed).length; const totalScore = this.results.reduce((sum, r) => sum + r.score, 0); const maxPossibleScore = this.results.reduce((sum, r) => sum + r.maxScore, 0); const averageResponseTime = this.results.reduce((sum, r) => sum + r.responseTime, 0) / this.results.length; // Category breakdown const categoryBreakdown = {}; for (const result of this.results) { const testCase = this.testCases.find(tc => tc.id === result.testId); if (!categoryBreakdown[testCase.category]) { categoryBreakdown[testCase.category] = { passed: 0, failed: 0, score: 0, maxScore: 0, }; } if (result.passed) { categoryBreakdown[testCase.category].passed++; } else { categoryBreakdown[testCase.category].failed++; } categoryBreakdown[testCase.category].score += result.score; categoryBreakdown[testCase.category].maxScore += result.maxScore; } // Performance metrics by tool const toolMetrics = {}; for (const result of this.results) { const testCase = this.testCases.find(tc => tc.id === result.testId); if (!toolMetrics[testCase.tool]) { toolMetrics[testCase.tool] = { times: [], successes: 0, total: 0 }; } toolMetrics[testCase.tool].times.push(result.responseTime); toolMetrics[testCase.tool].total++; if (result.passed) { toolMetrics[testCase.tool].successes++; } } const performanceMetrics = Object.entries(toolMetrics).map(([tool, data]) => ({ toolName: tool, averageTime: data.times.reduce((a, b) => a + b, 0) / data.times.length, minTime: Math.min(...data.times), maxTime: Math.max(...data.times), successRate: (data.successes / data.total) * 100, })); return { timestamp: new Date().toISOString(), totalTests: this.testCases.length, passedTests, failedTests: this.testCases.length - passedTests, totalScore, maxPossibleScore, scorePercentage: (totalScore / maxPossibleScore) * 100, averageResponseTime, testResults: this.results, categoryBreakdown, performanceMetrics, }; } /** * Print report to console */ printReport(report) { console.log('\n' + '='.repeat(80)); console.log('WEBSEE MCP SERVER - EVALUATION REPORT'); console.log('='.repeat(80)); console.log(`\nTimestamp: ${report.timestamp}`); console.log(`\nOverall Results:`); console.log(` Total Tests: ${report.totalTests}`); console.log(` Passed: ${report.passedTests} โœ…`); console.log(` Failed: ${report.failedTests} โŒ`); console.log(` Score: ${report.totalScore}/${report.maxPossibleScore} (${report.scorePercentage.toFixed(2)}%)`); console.log(` Average Response Time: ${report.averageResponseTime.toFixed(2)}ms`); console.log(`\nCategory Breakdown:`); for (const [category, stats] of Object.entries(report.categoryBreakdown)) { const percentage = (stats.score / stats.maxScore) * 100; console.log(` ${category}:`); console.log(` Passed: ${stats.passed}, Failed: ${stats.failed}`); console.log(` Score: ${stats.score}/${stats.maxScore} (${percentage.toFixed(2)}%)`); } console.log(`\nPerformance Metrics by Tool:`); for (const metric of report.performanceMetrics) { console.log(` ${metric.toolName}:`); console.log(` Average Time: ${metric.averageTime.toFixed(2)}ms`); console.log(` Min/Max: ${metric.minTime}ms / ${metric.maxTime}ms`); console.log(` Success Rate: ${metric.successRate.toFixed(2)}%`); } console.log(`\nDetailed Results:`); for (const result of report.testResults) { const testCase = this.testCases.find(tc => tc.id === result.testId); const status = result.passed ? 'โœ… PASS' : 'โŒ FAIL'; console.log(`\n ${status} ${result.testId}: ${testCase.description}`); console.log(` Score: ${result.score}/${result.maxScore} | Time: ${result.responseTime}ms`); if (result.errors.length > 0) { console.log(` Errors:`); result.errors.forEach(e => console.log(` - ${e}`)); } if (result.warnings.length > 0) { console.log(` Warnings:`); result.warnings.forEach(w => console.log(` - ${w}`)); } } console.log('\n' + '='.repeat(80) + '\n'); } /** * Save report to JSON file */ async saveReport(report, filepath) { await fs.writeFile(filepath, JSON.stringify(report, null, 2)); console.log(`๐Ÿ“„ Report saved to: ${filepath}`); } /** * Cleanup resources */ async destroy() { if (this.browser) { await this.browser.close(); this.browser = null; } } } // ============================================================================ // CLI Runner // ============================================================================ export async function runEvaluation(outputPath) { const engine = new EvaluationEngine(); try { await engine.initialize(); const report = await engine.runAllTests(); engine.printReport(report); if (outputPath) { await engine.saveReport(report, outputPath); } else { const defaultPath = path.join(process.cwd(), 'eval', `evaluation-report-${Date.now()}.json`); await engine.saveReport(report, defaultPath); } } finally { await engine.destroy(); } } // Run if executed directly if (require.main === module) { runEvaluation().catch(console.error); } //# sourceMappingURL=evaluation.js.map

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/1AQuantum/websee-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server