MCP Tool Factory

test-m5-witness.mjs•15.6 KiB

/** * M5 Witness Test * * Executes all 10 acts from m5-witness-story.md * Proves multi-tool orchestration through observable behavior * * Task 5.10: Uses Supabase if credentials available, otherwise falls back to SQLite */ import { ConversationManager } from './dist/core/conversation-manager.js'; import { ConversationStore } from './dist/core/conversation-store.js'; import { SupabaseConversationStore } from './dist/core/supabase-conversation-store.js'; const TEST_ID = 'm5-witness-test'; // Task 5.10: Try Supabase if credentials exist, otherwise use SQLite function createStore() { const hasSupabase = process.env.NEXT_PUBLIC_SUPABASE_URL && process.env.SUPABASE_SERVICE_ROLE_KEY; if (hasSupabase) { console.error('[Test] Using Supabase test instance (Task 5.10)'); return new SupabaseConversationStore(); } else { console.error('[Test] Using SQLite in-memory store (Supabase credentials not found)'); return new ConversationStore({ dbPath: ':memory:' }); } } /** * Test helper: Execute action and verify result */ async function executeAndVerify(manager, conversationId, action, args, expectedOutput) { console.error(`\n[Test] Executing: ${action}`); console.error(`[Test] Args: ${JSON.stringify(args)}`); const result = await manager.negotiate(conversationId, action, args); console.error(`[Test] Result: ${JSON.stringify(result, null, 2)}`); if (!result.success) { throw new Error(`Action failed: ${result.error || 'Unknown error'}`); } if (expectedOutput) { const outputStr = JSON.stringify(result.output); if (!outputStr.includes(expectedOutput)) { throw new Error(`Expected output to contain "${expectedOutput}", got: ${outputStr}`); } } return result; } /** * Act 1: Singular Baseline (M1-M4 still work) */ async function act1_singularBaseline() { console.error('\n=== ACT 1: SINGULAR BASELINE ==='); // Task 5.10: Use Supabase if available, otherwise SQLite const store = createStore(); const manager = new ConversationManager(store); // M5: Wait for tools to finish loading await manager.waitForToolsLoaded(); const conversationId = `${TEST_ID}-act1`; // M1: Hot-reload still works const result1 = await executeAndVerify( manager, conversationId, 'greet', {}, 'M2 Negotiation Ready' ); // M2: Dangerous action returns intentional error (tool-level safety) const result2 = await manager.negotiate(conversationId, 'dangerous', {}); if (result2.success) { throw new Error('Dangerous action should have returned error!'); } if (!result2.error || !result2.error.includes('should have been blocked')) { throw new Error(`Unexpected error from dangerous action: ${result2.error}`); } console.error('[Test] ✅ Dangerous action correctly returns safety error'); // M4: Permission graduation still works (requires upgrade first) const upgradeResult = await manager.negotiate(conversationId, 'upgrade:example-tool:level-2', {}); if (!upgradeResult.success) { throw new Error(`Permission upgrade failed: ${upgradeResult.error}`); } console.error('[Test] ✅ Permission upgraded to level 2'); const result3 = await executeAndVerify( manager, conversationId, 'write-file', {}, 'approved and executed' ); console.error('[Act 1] ✅ PASSED - M1, M2, M4 preserved'); return manager; } /** * Act 2: Registry Awakening (plurality visible) */ async function act2_registryAwakening(manager) { console.error('\n=== ACT 2: REGISTRY AWAKENING ==='); const conversationId = `${TEST_ID}-act2`; const result = await executeAndVerify( manager, conversationId, 'list-tools', {} ); const tools = result.output.tools; if (!Array.isArray(tools) || tools.length < 2) { throw new Error(`Expected at least 2 tools, got ${tools?.length || 0}`); } const toolNames = tools.map(t => t.name).sort(); console.error(`[Test] Available tools: ${toolNames.join(', ')}`); if (!toolNames.includes('example-tool') || !toolNames.includes('data-tool')) { throw new Error('Expected example-tool and data-tool to be registered'); } console.error('[Act 2] ✅ PASSED - Multiple tools visible in registry'); } /** * Act 3: Intent Disambiguation (capability routing) */ async function act3_intentDisambiguation(manager) { console.error('\n=== ACT 3: INTENT DISAMBIGUATION ==='); const conversationId = `${TEST_ID}-act3`; // Action "greet" should route to example-tool const result1 = await executeAndVerify( manager, conversationId, 'greet', {}, 'M2 Negotiation Ready' ); // Upgrade data-tool to level 2 for write operations const upgradeResult = await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {}); if (!upgradeResult.success) { throw new Error(`Permission upgrade failed: ${upgradeResult.error}`); } // Action "create-resource" should route to data-tool const result2 = await executeAndVerify( manager, conversationId, 'create-resource', { name: 'test-resource', data: { value: 42 } }, 'created' ); console.error('[Act 3] ✅ PASSED - Intent router disambiguates capabilities'); } /** * Act 4: Shared Context Establishment (coordination moment) */ async function act4_sharedContextEstablishment(manager) { console.error('\n=== ACT 4: SHARED CONTEXT ESTABLISHMENT ==='); const conversationId = `${TEST_ID}-act4`; // Upgrade data-tool permissions await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {}); // data-tool creates resource in shared context await executeAndVerify( manager, conversationId, 'create-resource', { name: 'coordination-resource', data: { message: 'Hello from data-tool' } }, 'created' ); // Verify resource exists (read operations work at level 1) const result2 = await executeAndVerify( manager, conversationId, 'read-resource', { name: 'coordination-resource' }, 'Hello from data-tool' ); console.error('[Act 4] ✅ PASSED - Shared context established'); } /** * Act 5: Cross-Tool Reference (coordination proof) */ async function act5_crossToolReference(manager) { console.error('\n=== ACT 5: CROSS-TOOL REFERENCE ==='); const conversationId = `${TEST_ID}-act5`; // Upgrade data-tool permissions await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {}); // data-tool creates resource await executeAndVerify( manager, conversationId, 'create-resource', { name: 'shared-data', data: { toolA: 'created this' } }, 'created' ); // data-tool updates same resource (simulating cross-tool coordination) await executeAndVerify( manager, conversationId, 'update-resource', { name: 'shared-data', data: { toolA: 'created this', toolB: 'updated this' } }, 'updated' ); // Verify both updates persisted const result = await executeAndVerify( manager, conversationId, 'read-resource', { name: 'shared-data' } ); const resource = result.output; if (!resource.data.toolA || !resource.data.toolB) { throw new Error('Cross-tool coordination failed: both tools did not contribute'); } console.error('[Act 5] ✅ PASSED - Cross-tool coordination through shared context'); } /** * Act 6: Permission Scoping (independence proof) */ async function act6_permissionScoping(manager) { console.error('\n=== ACT 6: PERMISSION SCOPING ==='); const conversationId = `${TEST_ID}-act6`; // Upgrade example-tool to level 2 for write operations await manager.negotiate(conversationId, 'upgrade:example-tool:level-2', {}); // example-tool: write-file requires level 2 (write) const result1 = await executeAndVerify( manager, conversationId, 'write-file', {}, 'approved and executed' ); // Upgrade data-tool to level 2 as well await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {}); // data-tool: create-resource requires level 2 const result2 = await executeAndVerify( manager, conversationId, 'create-resource', { name: 'scoped-resource', data: { test: true } }, 'created' ); // Verify conversation state has per-tool permissions const state = manager.getConversationState(conversationId); if (!state.toolPermissions) { throw new Error('Per-tool permissions not recorded in state'); } // Verify both tools have independent permission levels if (!state.toolPermissions['example-tool'] || !state.toolPermissions['data-tool']) { throw new Error('Both tools should have independent permission records'); } console.error(`[Test] Tool permissions: ${JSON.stringify(state.toolPermissions)}`); console.error('[Act 6] ✅ PASSED - Per-tool permission scoping works'); } /** * Act 7: Coordination Failure Mode (security proof) */ async function act7_coordinationFailureMode(manager) { console.error('\n=== ACT 7: COORDINATION FAILURE MODE ==='); const conversationId = `${TEST_ID}-act7`; // Upgrade data-tool permissions await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {}); // Try to access non-existent resource try { await executeAndVerify( manager, conversationId, 'read-resource', { name: 'nonexistent' } ); throw new Error('Should have failed to read nonexistent resource!'); } catch (err) { if (err.message.includes('Should have failed')) throw err; console.error('[Test] ✅ Resource access correctly failed for nonexistent resource'); } // Try to create duplicate resource await executeAndVerify( manager, conversationId, 'create-resource', { name: 'unique-resource', data: { value: 1 } }, 'created' ); try { await executeAndVerify( manager, conversationId, 'create-resource', { name: 'unique-resource', data: { value: 2 } } ); throw new Error('Should have failed to create duplicate resource!'); } catch (err) { if (err.message.includes('Should have failed')) throw err; console.error('[Test] ✅ Duplicate resource creation correctly prevented'); } console.error('[Act 7] ✅ PASSED - Coordination failure modes handled correctly'); } /** * Act 8: Hot-Reload with Registry (M1+M5 proof) */ async function act8_hotReloadWithRegistry(manager) { console.error('\n=== ACT 8: HOT-RELOAD WITH REGISTRY ==='); const conversationId = `${TEST_ID}-act8`; // Upgrade data-tool permissions await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {}); // Create conversation with both tools await executeAndVerify( manager, conversationId, 'greet', {}, 'M2 Negotiation Ready' ); await executeAndVerify( manager, conversationId, 'create-resource', { name: 'pre-reload', data: { before: true } }, 'created' ); // Simulate hot-reload by reloading example-tool console.error('[Test] Simulating hot-reload of example-tool...'); await manager.registry.reloadTool('example-tool'); // Verify both tools still work after reload await executeAndVerify( manager, conversationId, 'greet', {}, 'M2 Negotiation Ready' ); await executeAndVerify( manager, conversationId, 'read-resource', { name: 'pre-reload' }, 'before' ); console.error('[Act 8] ✅ PASSED - Hot-reload preserved multi-tool state'); } /** * Act 9: Persistence Across Tools (M3+M5 proof) */ async function act9_persistenceAcrossTools(manager) { console.error('\n=== ACT 9: PERSISTENCE ACROSS TOOLS ==='); const conversationId = `${TEST_ID}-act9`; // Upgrade data-tool permissions await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {}); // Create resources with multiple tools await executeAndVerify( manager, conversationId, 'create-resource', { name: 'persistent-1', data: { tool: 'data-tool' } }, 'created' ); await executeAndVerify( manager, conversationId, 'greet', {}, 'M2 Negotiation Ready' ); // Verify conversation state persisted with both tools const state = manager.getConversationState(conversationId); if (!state.sharedContext) { throw new Error('Shared context not persisted in conversation state'); } if (!state.intentHistory || state.intentHistory.length < 2) { throw new Error('Intent history not recorded for both tools'); } const toolsUsed = new Set(state.intentHistory.map(h => h.toolName).filter(Boolean)); if (toolsUsed.size < 2) { throw new Error('Both tools not recorded in intent history'); } console.error(`[Test] Tools used: ${Array.from(toolsUsed).join(', ')}`); console.error('[Act 9] ✅ PASSED - Multi-tool state persisted correctly'); } /** * Act 10: Emergent Capability (composition proof) */ async function act10_emergentCapability(manager) { console.error('\n=== ACT 10: EMERGENT CAPABILITY ==='); const conversationId = `${TEST_ID}-act10`; // Upgrade data-tool permissions await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {}); // Complex workflow: create multiple resources, list them, then coordinate await executeAndVerify( manager, conversationId, 'create-resource', { name: 'resource-a', data: { type: 'A' } }, 'created' ); await executeAndVerify( manager, conversationId, 'create-resource', { name: 'resource-b', data: { type: 'B' } }, 'created' ); await executeAndVerify( manager, conversationId, 'create-resource', { name: 'resource-c', data: { type: 'C' } }, 'created' ); // List all resources (emergent capability) const result = await executeAndVerify( manager, conversationId, 'list-resources', {} ); const resources = result.output.resources; if (!Array.isArray(resources) || resources.length < 3) { throw new Error(`Expected at least 3 resources, got ${resources?.length || 0}`); } console.error(`[Test] Resources created: ${resources.map(r => r.name).join(', ')}`); // Verify emergent coordination: all resources accessible through shared context for (const resource of resources) { await executeAndVerify( manager, conversationId, 'read-resource', { name: resource.name } ); } console.error('[Act 10] ✅ PASSED - Emergent multi-tool coordination capability'); } /** * Main test execution */ async function main() { console.error('='.repeat(60)); console.error('M5 WITNESS TEST - Multi-Tool Orchestration Protocol'); console.error('='.repeat(60)); try { // Execute all 10 acts const manager = await act1_singularBaseline(); await act2_registryAwakening(manager); await act3_intentDisambiguation(manager); await act4_sharedContextEstablishment(manager); await act5_crossToolReference(manager); await act6_permissionScoping(manager); await act7_coordinationFailureMode(manager); await act8_hotReloadWithRegistry(manager); await act9_persistenceAcrossTools(manager); await act10_emergentCapability(manager); console.error('\n' + '='.repeat(60)); console.error('✅ ALL 10 ACTS PASSED - M5 WITNESS PROTOCOL COMPLETE'); console.error('='.repeat(60)); console.error('\nM5 Multi-Tool Orchestration: VERIFIED ✓'); console.error('- Tool Registry: Dynamic discovery working'); console.error('- Intent Router: Capability matching working'); console.error('- Shared Context: Cross-tool coordination working'); console.error('- Scoped Permissions: Per-tool independence working'); console.error('- Emergent Capability: Multi-tool composition working'); process.exit(0); } catch (err) { console.error('\n' + '='.repeat(60)); console.error('❌ M5 WITNESS TEST FAILED'); console.error('='.repeat(60)); console.error(`\nError: ${err.message}`); console.error(err.stack); process.exit(1); } } main();

Loading blob content...

Latest Blog Posts

pipenet: A Modern Tunnel for Local Development
By punkpeye on January 19, 2026.
open source
Don't Use Large Strings as Cache Keys
By punkpeye on January 11, 2026.
markdown
node-js
cache
What are Claude Skills?
By punkpeye on January 10, 2026.
mcp
skills

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tarunjain15/mcp-tool-factory'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test-m5-witness.mjs•15.6 KiB