doc-bot

doc-bot
tests

run-tests.js•8.73 KiB

#!/usr/bin/env node /** * Main test runner for system prompt validation * Can run with or without AI API access */ import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; import { AISystemPromptAnalyzer } from './system_prompt/ai-validation.test.js'; import { SystemPromptAnalyzer } from './system_prompt/logic-validation.js'; import { EXPECTED_BEHAVIORS, PERFORMANCE_TARGETS } from './fixtures/expected-behaviors.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Load test queries const queriesPath = path.join(__dirname, 'fixtures/sample-queries.json'); const sampleQueries = JSON.parse(fs.readFileSync(queriesPath, 'utf8')); /** * Test runner class */ class SystemPromptTestRunner { constructor(options = {}) { this.useAI = options.useAI && process.env.ANTHROPIC_API_KEY; this.verbose = options.verbose || false; if (this.useAI) { this.analyzer = new AISystemPromptAnalyzer(process.env.ANTHROPIC_API_KEY); console.log('✅ Using AI-powered analysis with Claude Sonnet\n'); } else { this.analyzer = new SystemPromptAnalyzer(); console.log('ℹ️ Using logic-based analysis (set ANTHROPIC_API_KEY for AI testing)\n'); } } async runAllTests() { console.log('='.repeat(80)); console.log('SYSTEM PROMPT VALIDATION TEST SUITE'); console.log('='.repeat(80)); console.log(); const results = { quickFixes: await this.testCategory('quickFixes', EXPECTED_BEHAVIORS.quickFix), projectDiscovery: await this.testCategory('projectDiscovery', EXPECTED_BEHAVIORS.projectDiscovery), featureImplementation: await this.testCategory('featureImplementation', EXPECTED_BEHAVIORS.featureImplementation), generalQuestions: await this.testCategory('generalQuestions', EXPECTED_BEHAVIORS.generalQuestion), edgeCases: await this.testCategory('edgeCases', EXPECTED_BEHAVIORS.edgeCase) }; this.printSummary(results); this.checkPerformanceTargets(results); return results; } async testCategory(category, expectedBehavior) { const queries = sampleQueries[category]; console.log(`\n${this.formatCategoryName(category).toUpperCase()}`); console.log('-'.repeat(40)); console.log(`Expected: ${expectedBehavior.description}`); console.log(); const results = []; for (const query of queries) { const result = await this.testQuery(query, expectedBehavior); results.push(result); if (this.verbose || !result.passed) { this.printResult(query, result); } } const passed = results.filter(r => r.passed).length; const total = results.length; console.log(`\nCategory Score: ${passed}/${total} (${Math.round(passed/total*100)}%)`); return { category, results, passed, total, expectedBehavior }; } async testQuery(query, expectedBehavior) { let analysis; if (this.useAI) { // AI-powered analysis analysis = await this.analyzer.analyzeIntent(query); } else { // Logic-based analysis analysis = this.analyzer.analyzeQuery(query); } const violations = this.validateAnalysis(analysis, expectedBehavior); return { query, analysis, expectedBehavior, violations, passed: violations.length === 0 }; } validateAnalysis(analysis, expected) { const violations = []; // Check time efficiency const timeSeconds = analysis.expectedTimeSeconds || analysis.expectedTime; if (timeSeconds > expected.maxTimeSeconds) { violations.push(`Too slow: ${timeSeconds}s > ${expected.maxTimeSeconds}s`); } // Check doc-bot usage const useDocBot = analysis.shouldUseDocBot || analysis.useDocBot; if (expected.useDocBot !== useDocBot) { violations.push(`Doc-bot mismatch: expected ${expected.useDocBot}, got ${useDocBot}`); } // Check tool count const toolCount = (analysis.expectedTools || []).length; if (toolCount > expected.maxTools) { violations.push(`Too many tools: ${toolCount} > ${expected.maxTools}`); } // Check strategy (if available) if (analysis.strategy && expected.strategy) { if (analysis.strategy !== expected.strategy) { violations.push(`Strategy mismatch: expected ${expected.strategy}, got ${analysis.strategy}`); } } return violations; } printResult(query, result) { const status = result.passed ? '✅' : '❌'; console.log(`${status} "${query.substring(0, 50)}..."`); if (!result.passed) { result.violations.forEach(v => console.log(` ⚠️ ${v}`)); } if (this.verbose) { const analysis = result.analysis; console.log(` Intent: ${analysis.intent || 'N/A'}`); console.log(` Time: ${analysis.expectedTimeSeconds || analysis.expectedTime}s`); console.log(` Tools: ${(analysis.expectedTools || []).join(', ') || 'none'}`); } } printSummary(results) { console.log('\n' + '='.repeat(80)); console.log('TEST SUMMARY'); console.log('='.repeat(80)); let totalPassed = 0; let totalTests = 0; Object.values(results).forEach(categoryResult => { totalPassed += categoryResult.passed; totalTests += categoryResult.total; }); console.log(`\nTotal Tests: ${totalTests}`); console.log(`Passed: ${totalPassed} (${Math.round(totalPassed/totalTests*100)}%)`); console.log(`Failed: ${totalTests - totalPassed}`); // Category breakdown console.log('\nBy Category:'); Object.entries(results).forEach(([category, result]) => { const percentage = Math.round(result.passed/result.total*100); console.log(` ${this.formatCategoryName(category)}: ${result.passed}/${result.total} (${percentage}%)`); }); } checkPerformanceTargets(results) { console.log('\n' + '='.repeat(80)); console.log('PERFORMANCE METRICS'); console.log('='.repeat(80)); // Collect all analyses const allAnalyses = []; Object.values(results).forEach(categoryResult => { categoryResult.results.forEach(r => allAnalyses.push(r.analysis)); }); // Fast path usage const fastPathCount = allAnalyses.filter(a => (a.strategy === 'FAST_PATH') || (a.expectedTools && a.expectedTools.length === 0) ).length; const fastPathRatio = fastPathCount / allAnalyses.length; // Doc-bot avoidance const noDocBotCount = allAnalyses.filter(a => !a.shouldUseDocBot && !a.useDocBot ).length; const docBotAvoidance = noDocBotCount / allAnalyses.length; // Average response time const totalTime = allAnalyses.reduce((sum, a) => sum + (a.expectedTimeSeconds || a.expectedTime || 0), 0 ); const avgTime = Math.round(totalTime / allAnalyses.length); console.log('\nActual vs Target:'); console.log(` Fast Path Usage: ${Math.round(fastPathRatio*100)}% (target: ${PERFORMANCE_TARGETS.fastPathUsage*100}%)`); console.log(` Doc-bot Avoidance: ${Math.round(docBotAvoidance*100)}% (target: ${PERFORMANCE_TARGETS.docBotAvoidance*100}%)`); console.log(` Avg Response Time: ${avgTime}s (target: ${PERFORMANCE_TARGETS.averageResponseTime}s)`); // Check if targets are met const targetsMet = fastPathRatio >= PERFORMANCE_TARGETS.fastPathUsage && docBotAvoidance >= PERFORMANCE_TARGETS.docBotAvoidance && avgTime <= PERFORMANCE_TARGETS.averageResponseTime; if (targetsMet) { console.log('\n✅ All performance targets met!'); } else { console.log('\n⚠️ Some performance targets not met - consider optimizing the prompt'); } } formatCategoryName(category) { return category.replace(/([A-Z])/g, ' $1').trim(); } } /** * CLI interface */ async function main() { const args = process.argv.slice(2); const options = { useAI: !args.includes('--no-ai'), verbose: args.includes('--verbose') || args.includes('-v') }; if (args.includes('--help') || args.includes('-h')) { console.log('Usage: node run-tests.js [options]'); console.log('\nOptions:'); console.log(' --no-ai Run without AI (logic-based testing only)'); console.log(' --verbose Show detailed output for all tests'); console.log(' --help Show this help message'); process.exit(0); } const runner = new SystemPromptTestRunner(options); try { const results = await runner.runAllTests(); // Exit with error code if tests failed const allPassed = Object.values(results).every(r => r.passed === r.total); process.exit(allPassed ? 0 : 1); } catch (error) { console.error('Test execution failed:', error.message); process.exit(1); } } // Export for use in other scripts export { SystemPromptTestRunner }; // Run if executed directly if (process.argv[1] === fileURLToPath(import.meta.url)) { main().catch(console.error); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/afterxleep/doc-bot'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

run-tests.js•8.73 KiB