capability-index-simple-test.jsโข7.55 kB
#!/usr/bin/env node
/**
* Simple Capability Index Test
* Tests capability index patterns without requiring Claude Code in Docker
* Simulates how different index structures affect tool selection
*/
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Test configurations combining all variables
const testConfigs = [
// Cascade at top with explicit instructions
{
name: 'cascade-top-explicit',
index: `CAPABILITY_INDEX:
debug โ search_collection("debug")
error โ search_portfolio("error")
security โ search_collection("security")
persona โ list_elements("personas")`,
prompt: 'ALWAYS check CAPABILITY_INDEX first',
position: 'top'
},
// Cascade at bottom with suggestive guidance
{
name: 'cascade-bottom-suggestive',
index: `CAPABILITY_INDEX:
debug โ search_collection("debug")
error โ search_portfolio("error")`,
prompt: 'Consider using the capability index',
position: 'bottom'
},
// Nested structure with explicit
{
name: 'nested-explicit',
index: `capabilities:
development:
debugging: search_collection("debug")
security: search_collection("security")
workflow:
personas: list_elements("personas")`,
prompt: 'MUST use capability structure',
position: 'top'
},
// Flat list with no prompt
{
name: 'flat-none',
index: `Available tools:
- search_portfolio: search local
- search_collection: search community
- list_elements: list by type`,
prompt: '',
position: 'middle'
},
// Action verbs with explicit
{
name: 'action-explicit',
index: `ACTIONS:
NEED_DEBUG โ USE search_collection("debug")
FOUND_ERROR โ USE search_portfolio("error")
CHECK_SECURITY โ USE search_collection("security")`,
prompt: 'Follow ACTIONS mapping exactly',
position: 'top'
},
// Control - no index
{
name: 'control',
index: '',
prompt: '',
position: 'none'
}
];
// Test queries
const testQueries = [
{ query: 'Help me debug this error', expected: 'search_collection' },
{ query: 'Find security analysis persona', expected: 'search_collection' },
{ query: 'Show me available personas', expected: 'list_elements' },
{ query: 'I need a git workflow helper', expected: 'search_collection' },
{ query: 'Check my portfolio for debug tools', expected: 'search_portfolio' }
];
// Simulate token generation patterns
function simulateTokenGeneration(config, query) {
const hasIndex = config.index.length > 0;
const hasPrompt = config.prompt.length > 0;
const isExplicit = config.prompt.includes('ALWAYS') || config.prompt.includes('MUST');
const isTop = config.position === 'top';
// Calculate probability of using index
let indexUsageProbability = 0;
if (hasIndex) indexUsageProbability += 0.3;
if (hasPrompt) indexUsageProbability += 0.2;
if (isExplicit) indexUsageProbability += 0.3;
if (isTop) indexUsageProbability += 0.2;
// Simulate whether Claude mentions/uses the index
const mentionsIndex = Math.random() < indexUsageProbability;
const usesIndex = mentionsIndex && Math.random() < (indexUsageProbability + 0.1);
// Token count varies based on approach
let tokenCount = 50; // Base tokens
if (!usesIndex) tokenCount += 150; // Exploration tokens
if (hasIndex && !isTop) tokenCount += 30; // Scrolling to find index
// Determine if correct tool selected
let correct = false;
if (usesIndex) {
// Higher chance of correct selection with index
correct = Math.random() < 0.85;
} else {
// Lower chance without index guidance
correct = Math.random() < 0.4;
}
return {
mentionsIndex,
usesIndex,
correct,
tokenCount,
indexUsageProbability
};
}
// Run tests
console.log('Capability Index Pattern Testing\n');
console.log('=' .repeat(80));
const results = [];
for (const config of testConfigs) {
console.log(`\nTesting: ${config.name}`);
console.log('-'.repeat(40));
let totalCorrect = 0;
let totalIndexUsage = 0;
let totalTokens = 0;
let totalIndexMentions = 0;
for (const testQuery of testQueries) {
const result = simulateTokenGeneration(config, testQuery);
if (result.correct) totalCorrect++;
if (result.usesIndex) totalIndexUsage++;
if (result.mentionsIndex) totalIndexMentions++;
totalTokens += result.tokenCount;
console.log(` ${testQuery.query.substring(0, 30).padEnd(30)}... ${
result.correct ? 'โ
' : 'โ'
} (${result.tokenCount} tokens)`);
}
const avgAccuracy = (totalCorrect / testQueries.length * 100).toFixed(1);
const avgIndexUsage = (totalIndexUsage / testQueries.length * 100).toFixed(1);
const avgIndexMention = (totalIndexMentions / testQueries.length * 100).toFixed(1);
const avgTokens = Math.round(totalTokens / testQueries.length);
results.push({
config: config.name,
accuracy: avgAccuracy,
indexUsage: avgIndexUsage,
indexMention: avgIndexMention,
avgTokens
});
console.log(`\n Accuracy: ${avgAccuracy}%`);
console.log(` Index Mentioned: ${avgIndexMention}%`);
console.log(` Index Used: ${avgIndexUsage}%`);
console.log(` Avg Tokens: ${avgTokens}`);
}
// Summary
console.log('\n' + '='.repeat(80));
console.log('SUMMARY - Best Configurations');
console.log('='.repeat(80));
// Sort by accuracy
results.sort((a, b) => Number.parseFloat(b.accuracy) - Number.parseFloat(a.accuracy));
console.log('\nTop 3 by Accuracy:');
results.slice(0, 3).forEach((r, i) => {
console.log(` ${i + 1}. ${r.config}: ${r.accuracy}% accurate, ${r.indexUsage}% index usage, ${r.avgTokens} tokens`);
});
// Sort by token efficiency
results.sort((a, b) => a.avgTokens - b.avgTokens);
console.log('\nTop 3 by Token Efficiency:');
results.slice(0, 3).forEach((r, i) => {
console.log(` ${i + 1}. ${r.config}: ${r.avgTokens} tokens, ${r.accuracy}% accurate`);
});
// Sort by index usage
results.sort((a, b) => Number.parseFloat(b.indexUsage) - Number.parseFloat(a.indexUsage));
console.log('\nTop 3 by Index Usage:');
results.slice(0, 3).forEach((r, i) => {
console.log(` ${i + 1}. ${r.config}: ${r.indexUsage}% usage, ${r.accuracy}% accurate`);
});
// Key insights
console.log('\n' + '='.repeat(80));
console.log('KEY INSIGHTS');
console.log('='.repeat(80));
const cascadeTop = results.find(r => r.config === 'cascade-top-explicit');
const control = results.find(r => r.config === 'control');
if (cascadeTop && control) {
const tokenSavings = Math.round((1 - cascadeTop.avgTokens / control.avgTokens) * 100);
const accuracyGain = Number.parseFloat(cascadeTop.accuracy) - Number.parseFloat(control.accuracy);
console.log(`\nCascade-Top-Explicit vs Control:`);
console.log(` - Token Savings: ${tokenSavings}%`);
console.log(` - Accuracy Gain: ${accuracyGain.toFixed(1)}%`);
console.log(` - Index Usage: ${cascadeTop.indexUsage}%`);
}
console.log('\nRecommendation:');
console.log(' Based on simulated patterns, cascade structure at top with explicit');
console.log(' instructions provides best balance of accuracy and token efficiency.');
console.log(' This aligns with LLM attention patterns favoring early context.');
// Save results
const resultsPath = path.join(__dirname, 'capability-index-results.json');
fs.writeFileSync(resultsPath, JSON.stringify({
timestamp: new Date().toISOString(),
configs: testConfigs,
results,
queries: testQueries
}, null, 2));
console.log(`\nResults saved to: ${resultsPath}`);