Thoughtbox

cli.ts•5.17 KiB

#!/usr/bin/env npx tsx /** * CLI for SIL Benchmark Harness * SPEC-SIL-100: Benchmark Harness * * Usage: * npx tsx dgm-specs/harness/cli.ts run # Run benchmarks, compare to baseline * npx tsx dgm-specs/harness/cli.ts baseline # Establish new baseline * npx tsx dgm-specs/harness/cli.ts report # Generate comparison report * npx tsx dgm-specs/harness/cli.ts --help # Show help */ import { config } from 'dotenv'; config(); import { runAllBenchmarks, formatBenchmarkRun, TEST_CONFIGS, } from './benchmark-runner.js'; import { loadBaseline, saveBaseline, saveToHistory, compareToBaseline, formatComparison, } from './baseline.js'; const HELP_TEXT = ` SIL Benchmark Harness CLI SPEC-SIL-100: Benchmark Harness Usage: npx tsx dgm-specs/harness/cli.ts <command> [options] Commands: run Run all benchmarks, compare to baseline if exists baseline Run benchmarks and save as new baseline report Show comparison between current run and baseline list List available test configurations help Show this help message Options: --json Output results as JSON --url <url> MCP server URL (default: http://localhost:1731/mcp) Examples: # Run benchmarks and compare to baseline npx tsx dgm-specs/harness/cli.ts run # Establish new baseline npx tsx dgm-specs/harness/cli.ts baseline # Run with custom MCP URL npx tsx dgm-specs/harness/cli.ts run --url http://localhost:4000/mcp # Output as JSON for CI integration npx tsx dgm-specs/harness/cli.ts run --json `; async function main() { const args = process.argv.slice(2); const command = args[0]; const jsonOutput = args.includes('--json'); const urlIndex = args.indexOf('--url'); const mcpUrl = urlIndex !== -1 ? args[urlIndex + 1] : 'http://localhost:1731/mcp'; // Validate API key early if (!process.env.ANTHROPIC_API_KEY && command !== 'list' && command !== 'help' && command !== '--help' && command !== '-h') { console.error(` Error: ANTHROPIC_API_KEY environment variable is not set. The benchmark harness uses the Claude Agent SDK which requires an Anthropic API key. To fix this: 1. Create a .env file with: ANTHROPIC_API_KEY=your-key 2. Or set the environment variable directly `); process.exit(1); } switch (command) { case 'run': { const currentRun = await runAllBenchmarks(TEST_CONFIGS, mcpUrl); const baseline = loadBaseline(); if (jsonOutput) { if (baseline) { const comparison = compareToBaseline(currentRun, baseline); console.log(JSON.stringify({ run: currentRun, comparison }, null, 2)); process.exit(comparison.verdict === 'PASS' ? 0 : 1); } else { console.log(JSON.stringify({ run: currentRun, comparison: null }, null, 2)); } } else { console.log(formatBenchmarkRun(currentRun)); if (baseline) { const comparison = compareToBaseline(currentRun, baseline); console.log(formatComparison(comparison)); process.exit(comparison.verdict === 'PASS' ? 0 : 1); } else { console.log('\nNo baseline found. Run `npx tsx dgm-specs/harness/cli.ts baseline` to establish one.\n'); } } // Save to history saveToHistory(currentRun); break; } case 'baseline': { console.log('Establishing new baseline...\n'); const run = await runAllBenchmarks(TEST_CONFIGS, mcpUrl); if (jsonOutput) { console.log(JSON.stringify(run, null, 2)); } else { console.log(formatBenchmarkRun(run)); } saveBaseline(run); saveToHistory(run); if (!jsonOutput) { console.log(`\n✓ Baseline established: ${run.runId}`); console.log(' Future runs will compare against this baseline.\n'); } break; } case 'report': { const baseline = loadBaseline(); if (!baseline) { console.error('No baseline found. Run `baseline` command first.'); process.exit(1); } const currentRun = await runAllBenchmarks(TEST_CONFIGS, mcpUrl); const comparison = compareToBaseline(currentRun, baseline); if (jsonOutput) { console.log(JSON.stringify({ baseline, current: currentRun, comparison }, null, 2)); } else { console.log(formatBenchmarkRun(currentRun)); console.log(formatComparison(comparison)); } saveToHistory(currentRun); process.exit(comparison.verdict === 'PASS' ? 0 : 1); } case 'list': { console.log('\nAvailable Test Configurations:\n'); for (const config of TEST_CONFIGS) { console.log(` ${config.id}`); console.log(` Name: ${config.name}`); console.log(` Toolhost: ${config.toolhost}`); console.log(` Steps: ${config.steps.length}`); console.log(''); } break; } case 'help': case '--help': case '-h': case undefined: { console.log(HELP_TEXT); break; } default: { console.error(`Unknown command: ${command}`); console.log(HELP_TEXT); process.exit(1); } } } main().catch(error => { console.error('Benchmark error:', error); process.exit(1); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/glassBead-tc/Thoughtbox'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

cli.ts•5.17 KiB