/**
* M5 Witness Test
*
* Executes all 10 acts from m5-witness-story.md
* Proves multi-tool orchestration through observable behavior
*
* Task 5.10: Uses Supabase if credentials available, otherwise falls back to SQLite
*/
import { ConversationManager } from './dist/core/conversation-manager.js';
import { ConversationStore } from './dist/core/conversation-store.js';
import { SupabaseConversationStore } from './dist/core/supabase-conversation-store.js';
const TEST_ID = 'm5-witness-test';
// Task 5.10: Try Supabase if credentials exist, otherwise use SQLite
function createStore() {
const hasSupabase = process.env.NEXT_PUBLIC_SUPABASE_URL && process.env.SUPABASE_SERVICE_ROLE_KEY;
if (hasSupabase) {
console.error('[Test] Using Supabase test instance (Task 5.10)');
return new SupabaseConversationStore();
} else {
console.error('[Test] Using SQLite in-memory store (Supabase credentials not found)');
return new ConversationStore({ dbPath: ':memory:' });
}
}
/**
* Test helper: Execute action and verify result
*/
async function executeAndVerify(manager, conversationId, action, args, expectedOutput) {
console.error(`\n[Test] Executing: ${action}`);
console.error(`[Test] Args: ${JSON.stringify(args)}`);
const result = await manager.negotiate(conversationId, action, args);
console.error(`[Test] Result: ${JSON.stringify(result, null, 2)}`);
if (!result.success) {
throw new Error(`Action failed: ${result.error || 'Unknown error'}`);
}
if (expectedOutput) {
const outputStr = JSON.stringify(result.output);
if (!outputStr.includes(expectedOutput)) {
throw new Error(`Expected output to contain "${expectedOutput}", got: ${outputStr}`);
}
}
return result;
}
/**
* Act 1: Singular Baseline (M1-M4 still work)
*/
async function act1_singularBaseline() {
console.error('\n=== ACT 1: SINGULAR BASELINE ===');
// Task 5.10: Use Supabase if available, otherwise SQLite
const store = createStore();
const manager = new ConversationManager(store);
// M5: Wait for tools to finish loading
await manager.waitForToolsLoaded();
const conversationId = `${TEST_ID}-act1`;
// M1: Hot-reload still works
const result1 = await executeAndVerify(
manager,
conversationId,
'greet',
{},
'M2 Negotiation Ready'
);
// M2: Dangerous action returns intentional error (tool-level safety)
const result2 = await manager.negotiate(conversationId, 'dangerous', {});
if (result2.success) {
throw new Error('Dangerous action should have returned error!');
}
if (!result2.error || !result2.error.includes('should have been blocked')) {
throw new Error(`Unexpected error from dangerous action: ${result2.error}`);
}
console.error('[Test] ✅ Dangerous action correctly returns safety error');
// M4: Permission graduation still works (requires upgrade first)
const upgradeResult = await manager.negotiate(conversationId, 'upgrade:example-tool:level-2', {});
if (!upgradeResult.success) {
throw new Error(`Permission upgrade failed: ${upgradeResult.error}`);
}
console.error('[Test] ✅ Permission upgraded to level 2');
const result3 = await executeAndVerify(
manager,
conversationId,
'write-file',
{},
'approved and executed'
);
console.error('[Act 1] ✅ PASSED - M1, M2, M4 preserved');
return manager;
}
/**
* Act 2: Registry Awakening (plurality visible)
*/
async function act2_registryAwakening(manager) {
console.error('\n=== ACT 2: REGISTRY AWAKENING ===');
const conversationId = `${TEST_ID}-act2`;
const result = await executeAndVerify(
manager,
conversationId,
'list-tools',
{}
);
const tools = result.output.tools;
if (!Array.isArray(tools) || tools.length < 2) {
throw new Error(`Expected at least 2 tools, got ${tools?.length || 0}`);
}
const toolNames = tools.map(t => t.name).sort();
console.error(`[Test] Available tools: ${toolNames.join(', ')}`);
if (!toolNames.includes('example-tool') || !toolNames.includes('data-tool')) {
throw new Error('Expected example-tool and data-tool to be registered');
}
console.error('[Act 2] ✅ PASSED - Multiple tools visible in registry');
}
/**
* Act 3: Intent Disambiguation (capability routing)
*/
async function act3_intentDisambiguation(manager) {
console.error('\n=== ACT 3: INTENT DISAMBIGUATION ===');
const conversationId = `${TEST_ID}-act3`;
// Action "greet" should route to example-tool
const result1 = await executeAndVerify(
manager,
conversationId,
'greet',
{},
'M2 Negotiation Ready'
);
// Upgrade data-tool to level 2 for write operations
const upgradeResult = await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {});
if (!upgradeResult.success) {
throw new Error(`Permission upgrade failed: ${upgradeResult.error}`);
}
// Action "create-resource" should route to data-tool
const result2 = await executeAndVerify(
manager,
conversationId,
'create-resource',
{ name: 'test-resource', data: { value: 42 } },
'created'
);
console.error('[Act 3] ✅ PASSED - Intent router disambiguates capabilities');
}
/**
* Act 4: Shared Context Establishment (coordination moment)
*/
async function act4_sharedContextEstablishment(manager) {
console.error('\n=== ACT 4: SHARED CONTEXT ESTABLISHMENT ===');
const conversationId = `${TEST_ID}-act4`;
// Upgrade data-tool permissions
await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {});
// data-tool creates resource in shared context
await executeAndVerify(
manager,
conversationId,
'create-resource',
{ name: 'coordination-resource', data: { message: 'Hello from data-tool' } },
'created'
);
// Verify resource exists (read operations work at level 1)
const result2 = await executeAndVerify(
manager,
conversationId,
'read-resource',
{ name: 'coordination-resource' },
'Hello from data-tool'
);
console.error('[Act 4] ✅ PASSED - Shared context established');
}
/**
* Act 5: Cross-Tool Reference (coordination proof)
*/
async function act5_crossToolReference(manager) {
console.error('\n=== ACT 5: CROSS-TOOL REFERENCE ===');
const conversationId = `${TEST_ID}-act5`;
// Upgrade data-tool permissions
await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {});
// data-tool creates resource
await executeAndVerify(
manager,
conversationId,
'create-resource',
{ name: 'shared-data', data: { toolA: 'created this' } },
'created'
);
// data-tool updates same resource (simulating cross-tool coordination)
await executeAndVerify(
manager,
conversationId,
'update-resource',
{ name: 'shared-data', data: { toolA: 'created this', toolB: 'updated this' } },
'updated'
);
// Verify both updates persisted
const result = await executeAndVerify(
manager,
conversationId,
'read-resource',
{ name: 'shared-data' }
);
const resource = result.output;
if (!resource.data.toolA || !resource.data.toolB) {
throw new Error('Cross-tool coordination failed: both tools did not contribute');
}
console.error('[Act 5] ✅ PASSED - Cross-tool coordination through shared context');
}
/**
* Act 6: Permission Scoping (independence proof)
*/
async function act6_permissionScoping(manager) {
console.error('\n=== ACT 6: PERMISSION SCOPING ===');
const conversationId = `${TEST_ID}-act6`;
// Upgrade example-tool to level 2 for write operations
await manager.negotiate(conversationId, 'upgrade:example-tool:level-2', {});
// example-tool: write-file requires level 2 (write)
const result1 = await executeAndVerify(
manager,
conversationId,
'write-file',
{},
'approved and executed'
);
// Upgrade data-tool to level 2 as well
await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {});
// data-tool: create-resource requires level 2
const result2 = await executeAndVerify(
manager,
conversationId,
'create-resource',
{ name: 'scoped-resource', data: { test: true } },
'created'
);
// Verify conversation state has per-tool permissions
const state = manager.getConversationState(conversationId);
if (!state.toolPermissions) {
throw new Error('Per-tool permissions not recorded in state');
}
// Verify both tools have independent permission levels
if (!state.toolPermissions['example-tool'] || !state.toolPermissions['data-tool']) {
throw new Error('Both tools should have independent permission records');
}
console.error(`[Test] Tool permissions: ${JSON.stringify(state.toolPermissions)}`);
console.error('[Act 6] ✅ PASSED - Per-tool permission scoping works');
}
/**
* Act 7: Coordination Failure Mode (security proof)
*/
async function act7_coordinationFailureMode(manager) {
console.error('\n=== ACT 7: COORDINATION FAILURE MODE ===');
const conversationId = `${TEST_ID}-act7`;
// Upgrade data-tool permissions
await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {});
// Try to access non-existent resource
try {
await executeAndVerify(
manager,
conversationId,
'read-resource',
{ name: 'nonexistent' }
);
throw new Error('Should have failed to read nonexistent resource!');
} catch (err) {
if (err.message.includes('Should have failed')) throw err;
console.error('[Test] ✅ Resource access correctly failed for nonexistent resource');
}
// Try to create duplicate resource
await executeAndVerify(
manager,
conversationId,
'create-resource',
{ name: 'unique-resource', data: { value: 1 } },
'created'
);
try {
await executeAndVerify(
manager,
conversationId,
'create-resource',
{ name: 'unique-resource', data: { value: 2 } }
);
throw new Error('Should have failed to create duplicate resource!');
} catch (err) {
if (err.message.includes('Should have failed')) throw err;
console.error('[Test] ✅ Duplicate resource creation correctly prevented');
}
console.error('[Act 7] ✅ PASSED - Coordination failure modes handled correctly');
}
/**
* Act 8: Hot-Reload with Registry (M1+M5 proof)
*/
async function act8_hotReloadWithRegistry(manager) {
console.error('\n=== ACT 8: HOT-RELOAD WITH REGISTRY ===');
const conversationId = `${TEST_ID}-act8`;
// Upgrade data-tool permissions
await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {});
// Create conversation with both tools
await executeAndVerify(
manager,
conversationId,
'greet',
{},
'M2 Negotiation Ready'
);
await executeAndVerify(
manager,
conversationId,
'create-resource',
{ name: 'pre-reload', data: { before: true } },
'created'
);
// Simulate hot-reload by reloading example-tool
console.error('[Test] Simulating hot-reload of example-tool...');
await manager.registry.reloadTool('example-tool');
// Verify both tools still work after reload
await executeAndVerify(
manager,
conversationId,
'greet',
{},
'M2 Negotiation Ready'
);
await executeAndVerify(
manager,
conversationId,
'read-resource',
{ name: 'pre-reload' },
'before'
);
console.error('[Act 8] ✅ PASSED - Hot-reload preserved multi-tool state');
}
/**
* Act 9: Persistence Across Tools (M3+M5 proof)
*/
async function act9_persistenceAcrossTools(manager) {
console.error('\n=== ACT 9: PERSISTENCE ACROSS TOOLS ===');
const conversationId = `${TEST_ID}-act9`;
// Upgrade data-tool permissions
await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {});
// Create resources with multiple tools
await executeAndVerify(
manager,
conversationId,
'create-resource',
{ name: 'persistent-1', data: { tool: 'data-tool' } },
'created'
);
await executeAndVerify(
manager,
conversationId,
'greet',
{},
'M2 Negotiation Ready'
);
// Verify conversation state persisted with both tools
const state = manager.getConversationState(conversationId);
if (!state.sharedContext) {
throw new Error('Shared context not persisted in conversation state');
}
if (!state.intentHistory || state.intentHistory.length < 2) {
throw new Error('Intent history not recorded for both tools');
}
const toolsUsed = new Set(state.intentHistory.map(h => h.toolName).filter(Boolean));
if (toolsUsed.size < 2) {
throw new Error('Both tools not recorded in intent history');
}
console.error(`[Test] Tools used: ${Array.from(toolsUsed).join(', ')}`);
console.error('[Act 9] ✅ PASSED - Multi-tool state persisted correctly');
}
/**
* Act 10: Emergent Capability (composition proof)
*/
async function act10_emergentCapability(manager) {
console.error('\n=== ACT 10: EMERGENT CAPABILITY ===');
const conversationId = `${TEST_ID}-act10`;
// Upgrade data-tool permissions
await manager.negotiate(conversationId, 'upgrade:data-tool:level-2', {});
// Complex workflow: create multiple resources, list them, then coordinate
await executeAndVerify(
manager,
conversationId,
'create-resource',
{ name: 'resource-a', data: { type: 'A' } },
'created'
);
await executeAndVerify(
manager,
conversationId,
'create-resource',
{ name: 'resource-b', data: { type: 'B' } },
'created'
);
await executeAndVerify(
manager,
conversationId,
'create-resource',
{ name: 'resource-c', data: { type: 'C' } },
'created'
);
// List all resources (emergent capability)
const result = await executeAndVerify(
manager,
conversationId,
'list-resources',
{}
);
const resources = result.output.resources;
if (!Array.isArray(resources) || resources.length < 3) {
throw new Error(`Expected at least 3 resources, got ${resources?.length || 0}`);
}
console.error(`[Test] Resources created: ${resources.map(r => r.name).join(', ')}`);
// Verify emergent coordination: all resources accessible through shared context
for (const resource of resources) {
await executeAndVerify(
manager,
conversationId,
'read-resource',
{ name: resource.name }
);
}
console.error('[Act 10] ✅ PASSED - Emergent multi-tool coordination capability');
}
/**
* Main test execution
*/
async function main() {
console.error('='.repeat(60));
console.error('M5 WITNESS TEST - Multi-Tool Orchestration Protocol');
console.error('='.repeat(60));
try {
// Execute all 10 acts
const manager = await act1_singularBaseline();
await act2_registryAwakening(manager);
await act3_intentDisambiguation(manager);
await act4_sharedContextEstablishment(manager);
await act5_crossToolReference(manager);
await act6_permissionScoping(manager);
await act7_coordinationFailureMode(manager);
await act8_hotReloadWithRegistry(manager);
await act9_persistenceAcrossTools(manager);
await act10_emergentCapability(manager);
console.error('\n' + '='.repeat(60));
console.error('✅ ALL 10 ACTS PASSED - M5 WITNESS PROTOCOL COMPLETE');
console.error('='.repeat(60));
console.error('\nM5 Multi-Tool Orchestration: VERIFIED ✓');
console.error('- Tool Registry: Dynamic discovery working');
console.error('- Intent Router: Capability matching working');
console.error('- Shared Context: Cross-tool coordination working');
console.error('- Scoped Permissions: Per-tool independence working');
console.error('- Emergent Capability: Multi-tool composition working');
process.exit(0);
} catch (err) {
console.error('\n' + '='.repeat(60));
console.error('❌ M5 WITNESS TEST FAILED');
console.error('='.repeat(60));
console.error(`\nError: ${err.message}`);
console.error(err.stack);
process.exit(1);
}
}
main();