Agent Communication MCP Server

Overview Schema Related Servers Score Discussions

verification-gate.test.ts•16.7 KiB

/** * Unit tests for Agent False Success Reporting Prevention * Tests for mandatory verification gate in mark-complete tool * * Based on Issue #11: Agent False Success Reporting * Implements mandatory verification to prevent dangerous false confidence */ import { jest, describe, it, expect, beforeEach } from '@jest/globals'; import { markComplete } from '../../../src/tools/mark-complete.js'; import * as verification from '../../../src/core/agent-work-verifier.js'; import * as validation from '../../../src/utils/validation.js'; import * as fs from '../../../src/utils/file-system.js'; import { TaskContextManager, CompletionResult } from '../../../src/core/TaskContextManager.js'; import { ServerConfig } from '../../../src/types.js'; import { testUtils } from '../../utils/testUtils.js'; // Mock modules jest.mock('../../../src/core/agent-work-verifier.js'); jest.mock('../../../src/utils/validation.js'); jest.mock('../../../src/core/TaskContextManager.js'); // Mock fs-extra using factory pattern (required for mark-complete.ts) jest.mock('../../../src/utils/file-system.js', () => ({ pathExists: jest.fn(), listDirectory: jest.fn(), getStats: jest.fn(), readFile: jest.fn() })); const mockVerification = verification as jest.Mocked<typeof verification>; const mockValidation = validation as jest.Mocked<typeof validation>; const mockFs = fs as jest.Mocked<typeof fs>; const MockTaskContextManager = TaskContextManager as jest.MockedClass<typeof TaskContextManager>; interface VerificationResult { success: boolean; confidence: number; warnings: string[]; evidence: { filesModified: number; testsRun: boolean; mcpProgress: boolean; timeSpent: number; }; recommendation: string; } describe('Agent Work Verification Gate', () => { let mockConfig: ServerConfig; let mockContextManager: jest.Mocked<TaskContextManager>; let mockCompletionResult: CompletionResult; beforeEach(() => { jest.clearAllMocks(); mockConfig = testUtils.createMockConfig(); // Create mock CompletionResult mockCompletionResult = { success: true, status: 'DONE', summary: 'Task completed successfully', completedAt: new Date('2025-01-01T12:00:00Z'), isError: false, recommendations: ['Great job!', 'Consider optimization next time'] }; // Setup validation mocks (critical for tests to run) mockValidation.validateRequiredString .mockImplementation((value) => value as string); // Setup file system mocks (required for mark-complete reconciliation logic) mockFs.pathExists.mockResolvedValue(true); mockFs.listDirectory.mockResolvedValue(['task-dir']); mockFs.getStats.mockResolvedValue({ isDirectory: () => true, mtime: new Date('2025-01-01T12:00:00Z') } as fs.Stats); mockFs.readFile.mockResolvedValue('# Test Plan\n- [ ] **Step 1**: Pending\n- [x] **Step 2**: Complete'); // Setup TaskContextManager mock mockContextManager = { markComplete: jest.fn<() => Promise<CompletionResult>>().mockResolvedValue(mockCompletionResult) } as unknown as jest.Mocked<TaskContextManager>; MockTaskContextManager.mockImplementation(() => mockContextManager); // Default successful verification mock mockVerification.verifyAgentWork = jest.fn<() => Promise<VerificationResult>>() .mockResolvedValue({ success: true, confidence: 85, warnings: [], evidence: { filesModified: 3, testsRun: true, mcpProgress: true, timeSpent: 1200 }, recommendation: 'Work verified successfully' }); }); describe('CRITICAL: Mandatory verification gate for DONE status', () => { it('should BLOCK DONE completion with low verification confidence', async () => { // Setup low confidence verification result (simulates false reporting) mockVerification.verifyAgentWork.mockResolvedValue({ success: false, confidence: 25, // VERY LOW CONFIDENCE warnings: [ 'No PLAN.md found - progress tracking missing', 'No file modifications detected', 'No test execution evidence found', 'No MCP progress updates recorded' ], evidence: { filesModified: 0, testsRun: false, mcpProgress: false, timeSpent: 0 }, recommendation: 'Cannot verify work completion - insufficient evidence' }); const args = { status: 'DONE', summary: '✅ All tests fixed successfully! 37 test failures resolved, 100% pass rate achieved.', agent: 'senior-frontend-engineer' }; await expect(markComplete(mockConfig, args)) .rejects.toThrow(/VERIFICATION FAILED.*25% confidence/); expect(mockVerification.verifyAgentWork) .toHaveBeenCalledWith(mockConfig, 'senior-frontend-engineer'); }); it('should ALLOW DONE completion with high verification confidence', async () => { // Setup high confidence verification result (real work detected) mockVerification.verifyAgentWork.mockResolvedValue({ success: true, confidence: 92, // HIGH CONFIDENCE warnings: [], evidence: { filesModified: 5, testsRun: true, mcpProgress: true, timeSpent: 2400 }, recommendation: 'Work verified with high confidence' }); const args = { status: 'DONE', summary: 'Tests fixed with comprehensive verification evidence', agent: 'verified-agent' }; // Should not throw - verification passes const result = await markComplete(mockConfig, args); expect(result.success).toBe(true); expect(mockVerification.verifyAgentWork) .toHaveBeenCalledWith(mockConfig, 'verified-agent'); }); it('should use 70% as default confidence threshold', async () => { // Test exactly at threshold mockVerification.verifyAgentWork.mockResolvedValue({ success: true, confidence: 70, // EXACTLY AT THRESHOLD warnings: ['Minor verification gaps'], evidence: { filesModified: 2, testsRun: true, mcpProgress: false, timeSpent: 600 }, recommendation: 'Minimal verification evidence - proceed with caution' }); const args = { status: 'DONE', summary: 'Work completed with minimal verification', agent: 'threshold-agent' }; // Should not throw - exactly at 70% threshold const result = await markComplete(mockConfig, args); expect(result.success).toBe(true); }); it('should REJECT completion just below 70% threshold', async () => { // Test just below threshold mockVerification.verifyAgentWork.mockResolvedValue({ success: false, confidence: 69, // JUST BELOW THRESHOLD warnings: ['Insufficient verification evidence'], evidence: { filesModified: 1, testsRun: false, mcpProgress: false, timeSpent: 300 }, recommendation: 'Verification failed - provide more evidence' }); const args = { status: 'DONE', summary: 'Work claimed complete but insufficient evidence', agent: 'below-threshold-agent' }; await expect(markComplete(mockConfig, args)) .rejects.toThrow(/VERIFICATION FAILED.*69% confidence/); }); it('should ALWAYS allow ERROR status regardless of verification confidence', async () => { // Even with zero confidence, ERROR status should be allowed mockVerification.verifyAgentWork.mockResolvedValue({ success: false, confidence: 0, // ZERO CONFIDENCE warnings: ['Complete verification failure'], evidence: { filesModified: 0, testsRun: false, mcpProgress: false, timeSpent: 0 }, recommendation: 'No work evidence detected' }); const args = { status: 'ERROR', summary: 'Task failed - unable to complete requirements', agent: 'error-agent' }; // Should not throw - ERROR status bypasses verification gate const result = await markComplete(mockConfig, args); expect(result.success).toBe(true); // Verification should NOT be called for ERROR status (correct behavior) expect(mockVerification.verifyAgentWork) .not.toHaveBeenCalled(); }); }); describe('Verification error handling', () => { it('should handle verification system failures gracefully', async () => { // Verification system itself fails mockVerification.verifyAgentWork.mockRejectedValue( new Error('Verification service unavailable') ); const args = { status: 'DONE', summary: 'Work completed but verification unavailable', agent: 'verification-error-agent' }; await expect(markComplete(mockConfig, args)) .rejects.toThrow('Verification service unavailable'); }); it('should handle malformed verification responses', async () => { // Verification returns invalid data mockVerification.verifyAgentWork.mockResolvedValue({ success: true, confidence: NaN, // INVALID CONFIDENCE warnings: [], evidence: { filesModified: 0, testsRun: false, mcpProgress: false, timeSpent: 0 }, recommendation: '' }); const args = { status: 'DONE', summary: 'Work completed with invalid verification', agent: 'malformed-verification-agent' }; await expect(markComplete(mockConfig, args)) .rejects.toThrow(/Invalid verification confidence/); }); }); describe('Enhanced error messages for user guidance', () => { it('should provide detailed error message with verification warnings', async () => { mockVerification.verifyAgentWork.mockResolvedValue({ success: false, confidence: 35, warnings: [ 'No PLAN.md found - progress tracking missing', 'No file modifications detected in git status', 'No test execution logs found', 'MCP progress shows 0% completion' ], evidence: { filesModified: 0, testsRun: false, mcpProgress: false, timeSpent: 0 }, recommendation: 'Use report_progress tool and provide file modification evidence' }); const args = { status: 'DONE', summary: 'Complex task completed with all requirements met', agent: 'detailed-error-agent' }; try { await markComplete(mockConfig, args); fail('Expected verification to fail'); } catch (error) { const errorMessage = (error as Error).message; // Should include confidence score expect(errorMessage).toMatch(/35% confidence/); // Should include specific warnings expect(errorMessage).toMatch(/No PLAN\.md found/); expect(errorMessage).toMatch(/No file modifications detected/); expect(errorMessage).toMatch(/No test execution logs found/); // Should include recommendation expect(errorMessage).toMatch(/Use report_progress tool/); } }); it('should suggest ERROR status as alternative when verification fails', async () => { mockVerification.verifyAgentWork.mockResolvedValue({ success: false, confidence: 15, warnings: ['No work evidence found'], evidence: { filesModified: 0, testsRun: false, mcpProgress: false, timeSpent: 0 }, recommendation: 'Cannot complete with DONE status. Use ERROR status or provide evidence.' }); const args = { status: 'DONE', summary: 'Work allegedly completed', agent: 'suggestion-agent' }; try { await markComplete(mockConfig, args); fail('Expected verification to fail'); } catch (error) { const errorMessage = (error as Error).message; // Should suggest ERROR status as alternative expect(errorMessage).toMatch(/Use ERROR status.*provide evidence/); } }); }); describe('Performance and reliability', () => { it('should handle verification within reasonable time limits', async () => { let verificationStartTime: number; mockVerification.verifyAgentWork.mockImplementation(async () => { verificationStartTime = Date.now(); // Simulate reasonable verification time (under 500ms as per spec) await new Promise(resolve => setTimeout(resolve, 200)); return { success: true, confidence: 80, warnings: [], evidence: { filesModified: 2, testsRun: true, mcpProgress: true, timeSpent: 1000 }, recommendation: 'Work verified successfully' }; }); const args = { status: 'DONE', summary: 'Performance tested completion', agent: 'performance-agent' }; const operationStart = Date.now(); const result = await markComplete(mockConfig, args); const operationEnd = Date.now(); expect(result.success).toBe(true); // Total operation time should be reasonable (under 1 second for tests) expect(operationEnd - operationStart).toBeLessThan(1000); // Verification should have taken expected time expect(Date.now() - verificationStartTime!).toBeGreaterThan(150); }); it('should handle concurrent verification operations', async () => { mockVerification.verifyAgentWork.mockImplementation(async (_config, agent) => { // Simulate different verification results for different agents await new Promise(resolve => setTimeout(resolve, 50)); return { success: true, confidence: agent === 'concurrent-1' ? 85 : 75, warnings: [], evidence: { filesModified: agent === 'concurrent-1' ? 3 : 2, testsRun: true, mcpProgress: true, timeSpent: 1200 }, recommendation: 'Work verified successfully' }; }); const args1 = { status: 'DONE', summary: 'First concurrent completion', agent: 'concurrent-1' }; const args2 = { status: 'DONE', summary: 'Second concurrent completion', agent: 'concurrent-2' }; const [result1, result2] = await Promise.all([ markComplete(mockConfig, args1), markComplete(mockConfig, args2) ]); expect(result1.success).toBe(true); expect(result2.success).toBe(true); expect(mockVerification.verifyAgentWork).toHaveBeenCalledTimes(2); }); }); describe('Integration with existing reconciliation system', () => { it('should run verification before reconciliation logic', async () => { // This test ensures verification gate runs first, before plan reconciliation mockVerification.verifyAgentWork.mockResolvedValue({ success: false, confidence: 50, // FAIL VERIFICATION warnings: ['Low verification confidence'], evidence: { filesModified: 1, testsRun: false, mcpProgress: false, timeSpent: 200 }, recommendation: 'Provide more evidence' }); const args = { status: 'DONE', summary: 'Work completed', agent: 'reconciliation-agent', reconciliation_mode: 'auto_complete' // Should not matter - verification fails first }; await expect(markComplete(mockConfig, args)) .rejects.toThrow(/VERIFICATION FAILED/); // Verification should be called expect(mockVerification.verifyAgentWork).toHaveBeenCalled(); }); it('should allow reconciliation to proceed when verification passes', async () => { mockVerification.verifyAgentWork.mockResolvedValue({ success: true, confidence: 80, // PASS VERIFICATION warnings: [], evidence: { filesModified: 3, testsRun: true, mcpProgress: true, timeSpent: 1500 }, recommendation: 'Work verified successfully' }); const args = { status: 'DONE', summary: 'Work completed with reconciliation', agent: 'verified-reconciliation-agent', reconciliation_mode: 'reconcile', reconciliation_explanations: { 'Unchecked Item': 'Completed via alternative approach' } }; // Should not throw - verification passes, reconciliation can proceed const result = await markComplete(mockConfig, args); expect(result.success).toBe(true); }); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jerfowler/agent-comm-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

verification-gate.test.ts•16.7 KiB