/**
* Challenge-Hypothesis Tool E2E Test
*
* Isolated test for the challenge-hypothesis tool functionality.
* Tests hypothesis evaluation with agreement levels and expert feedback.
*/
import { spawn, ChildProcess } from 'child_process';
import { join } from 'path';
import fetch from 'node-fetch';
describe('Challenge-Hypothesis Tool E2E', () => {
let serverProcess: ChildProcess | null = null;
const serverPath = join(__dirname, '../../../dist/askme-server/main.js');
const testPort = 3002; // Use different port
let requestId: string | null = null;
beforeEach(async () => {
serverProcess = spawn('node', [serverPath, '--port', testPort.toString(), '--exit-after-command'], {
stdio: ['pipe', 'pipe', 'pipe'],
env: {
...process.env,
ASK_ME_MCP_DEBUG: '1'
}
});
await new Promise<void>((resolve, reject) => {
const timeout = setTimeout(() => {
reject(new Error('Server failed to start within 10 seconds'));
}, 10000);
serverProcess!.stderr!.on('data', (data) => {
const message = data.toString();
if (message.includes('Server ready for connections')) {
clearTimeout(timeout);
resolve();
} else if (message.includes('EADDRINUSE')) {
clearTimeout(timeout);
reject(new Error(`Port ${testPort} already in use`));
}
});
serverProcess!.on('error', (error) => {
clearTimeout(timeout);
reject(error);
});
});
});
afterEach(async () => {
if (serverProcess) {
if (!serverProcess.killed) {
serverProcess.kill('SIGTERM');
}
await new Promise<void>((resolve) => {
const timeout = setTimeout(() => {
if (serverProcess && !serverProcess.killed) {
serverProcess.kill('SIGKILL');
}
resolve();
}, 3000);
serverProcess!.once('exit', () => {
clearTimeout(timeout);
resolve();
});
});
serverProcess = null;
}
});
test('should execute challenge-hypothesis tool with multiple hypotheses', async () => {
// Initialize MCP connection
const initRequest = {
jsonrpc: '2.0',
id: 1,
method: 'initialize',
params: {
protocolVersion: '2024-11-05',
capabilities: {
roots: { listChanged: true },
sampling: {}
},
clientInfo: {
name: 'test-client',
version: '1.0.0'
}
}
};
const initPromise = new Promise((resolve, reject) => {
const timeout = setTimeout(() => reject(new Error('Initialize timeout')), 5000);
serverProcess!.stdout!.on('data', (data) => {
const lines = data.toString().split('\n').filter(line => line.trim());
for (const line of lines) {
try {
const response = JSON.parse(line);
if (response.id === 1) {
clearTimeout(timeout);
resolve(response);
return;
}
} catch (e) { /* ignore */ }
}
});
});
serverProcess!.stdin!.write(JSON.stringify(initRequest) + '\n');
await initPromise;
// Call challenge-hypothesis tool with technical hypotheses
const toolRequest = {
jsonrpc: '2.0',
id: 2,
method: 'tools/call',
params: {
name: 'challenge-hypothesis',
arguments: {
title: 'Q4 Technical Architecture Predictions',
description: 'Please evaluate these technical predictions for our Q4 development cycle. Consider current market trends, technical feasibility, and resource constraints.',
hypotheses: [
'Migrating to microservices architecture will reduce system latency by 30%',
'Implementing GraphQL will decrease API response times compared to REST',
'Moving to serverless functions will cut infrastructure costs by 50%',
'Adopting TypeScript will reduce production bugs by 40%',
'Using AI-powered code review will catch 80% more security vulnerabilities'
]
}
}
};
const responsePromise = new Promise((resolve, reject) => {
const timeout = setTimeout(() => reject(new Error('Tool call timeout')), 30000);
serverProcess!.stdout!.on('data', (data) => {
const lines = data.toString().split('\n').filter(line => line.trim());
for (const line of lines) {
try {
const response = JSON.parse(line);
if (response.id === 2) {
clearTimeout(timeout);
resolve(response);
return;
}
} catch (e) { /* ignore */ }
}
});
});
// Set up browser mock for hypothesis evaluation
const browserMockPromise = (async () => {
await new Promise(resolve => setTimeout(resolve, 1000));
try {
const response = await fetch(`http://localhost:${testPort}/mcp/browser-events`);
const reader = response.body!.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (line.startsWith('data: ')) {
try {
const eventData = JSON.parse(line.slice(6));
if (eventData.type === 'new_request' && eventData.data.type === 'challenge-hypothesis') {
requestId = eventData.data.id;
// Simulate expert evaluation with varied agreement levels and detailed feedback
const humanResponse = {
responses: [
{
hypothesisIndex: 0,
agreementLevel: 'somewhat-agree', // 30% latency reduction from microservices
comment: 'Microservices can reduce latency in some scenarios, but 30% might be optimistic. The benefit depends heavily on current monolith bottlenecks and proper service boundaries. Initial migration overhead could temporarily increase latency.'
},
{
hypothesisIndex: 1,
agreementLevel: 'neutral', // GraphQL vs REST performance
comment: 'GraphQL performance compared to REST depends on use case. For complex queries with multiple resources, GraphQL can be faster. For simple CRUD operations, REST might be more efficient. The real benefit is developer experience and reduced over-fetching.'
},
{
hypothesisIndex: 2,
agreementLevel: 'disagree', // 50% cost reduction with serverless
comment: 'While serverless can reduce costs for variable workloads, 50% savings is unlikely for most applications. Cold starts, vendor lock-in costs, and need for specialized monitoring tools can offset savings. More realistic expectation is 20-30% for suitable workloads.'
},
{
hypothesisIndex: 3,
agreementLevel: 'strongly-agree', // TypeScript reducing bugs by 40%
comment: 'Strong agreement here. TypeScript\'s static typing catches many runtime errors at compile time. Studies show 10-15% of JavaScript bugs are type-related, and TypeScript can prevent most of these. The tooling and IDE support also improve code quality significantly.'
},
{
hypothesisIndex: 4,
agreementLevel: 'somewhat-agree', // AI code review catching 80% more vulnerabilities
comment: 'AI-powered tools are improving rapidly and can catch many common security patterns. However, 80% improvement over human review seems high. More realistic expectation is 30-50% improvement in finding common vulnerabilities, while humans remain better at business logic and context-specific security issues.'
}
],
overallComment: 'These hypotheses show good technical thinking, but expectations should be calibrated. Focus on TypeScript adoption as the highest ROI item. Approach microservices and serverless with careful pilot projects first.',
expertiseArea: 'Senior Software Architecture'
};
await fetch(`http://localhost:${testPort}/mcp/response`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
requestId: requestId,
sessionId: 'demo',
response: humanResponse
})
});
reader.releaseLock();
return;
}
} catch (e) { /* ignore */ }
}
}
}
} catch (error) {
console.error('Browser mock error:', error);
}
})();
// Execute tool call and browser simulation
serverProcess!.stdin!.write(JSON.stringify(toolRequest) + '\n');
await Promise.all([browserMockPromise, new Promise(resolve => setTimeout(resolve, 2000))]);
const toolResponse = await responsePromise;
// Validate response structure
expect(toolResponse).toHaveProperty('result');
const result = (toolResponse as any).result;
expect(result).toHaveProperty('content');
expect(Array.isArray(result.content)).toBe(true);
expect(result.content.length).toBeGreaterThan(0);
const content = result.content[0];
expect(content).toHaveProperty('type', 'text');
expect(content).toHaveProperty('text');
expect(typeof content.text).toBe('string');
// Validate content includes hypothesis evaluations
expect(content.text).toContain('microservices');
expect(content.text).toContain('GraphQL');
expect(content.text).toContain('serverless');
expect(content.text).toContain('TypeScript');
expect(content.text).toContain('AI-powered');
// Validate agreement levels are included
expect(content.text).toContain('somewhat-agree');
expect(content.text).toContain('neutral');
expect(content.text).toContain('disagree');
expect(content.text).toContain('strongly-agree');
// Validate expert comments are preserved
expect(content.text).toContain('optimistic');
expect(content.text).toContain('developer experience');
expect(content.text).toContain('vendor lock-in');
expect(content.text).toContain('static typing');
expect(content.text).toContain('business logic');
// Validate overall assessment is included
expect(content.text).toContain('highest ROI');
expect(content.text).toContain('pilot projects');
expect(content.text).toContain('Senior Software Architecture');
// Check for original context preservation
expect(content.text).toContain('Q4 Technical Architecture');
}, 35000);
test('should handle single hypothesis evaluation', async () => {
// Initialize
const initRequest = {
jsonrpc: '2.0',
id: 1,
method: 'initialize',
params: {
protocolVersion: '2024-11-05',
capabilities: { roots: { listChanged: true }, sampling: {} },
clientInfo: { name: 'test-client', version: '1.0.0' }
}
};
const initPromise = new Promise((resolve) => {
serverProcess!.stdout!.on('data', (data) => {
const lines = data.toString().split('\n').filter(line => line.trim());
for (const line of lines) {
try {
const response = JSON.parse(line);
if (response.id === 1) resolve(response);
} catch (e) { /* ignore */ }
}
});
});
serverProcess!.stdin!.write(JSON.stringify(initRequest) + '\n');
await initPromise;
// Single hypothesis test
const toolRequest = {
jsonrpc: '2.0',
id: 2,
method: 'tools/call',
params: {
name: 'challenge-hypothesis',
arguments: {
title: 'Market Prediction',
hypotheses: [
'Remote work will remain the primary work model for tech companies through 2025'
]
}
}
};
const responsePromise = new Promise((resolve, reject) => {
const timeout = setTimeout(() => reject(new Error('Timeout')), 20000);
serverProcess!.stdout!.on('data', (data) => {
const lines = data.toString().split('\n').filter(line => line.trim());
for (const line of lines) {
try {
const response = JSON.parse(line);
if (response.id === 2) {
clearTimeout(timeout);
resolve(response);
}
} catch (e) { /* ignore */ }
}
});
});
// Simple browser mock
setTimeout(async () => {
try {
await new Promise(resolve => setTimeout(resolve, 1000));
const response = await fetch(`http://localhost:${testPort}/mcp/browser-events`);
const reader = response.body!.getReader();
const decoder = new TextDecoder();
let buffer = '';
const readChunk = async (): Promise<void> => {
const { done, value } = await reader.read();
if (done) return;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (line.startsWith('data: ')) {
try {
const eventData = JSON.parse(line.slice(6));
if (eventData.type === 'new_request') {
await fetch(`http://localhost:${testPort}/mcp/response`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
requestId: eventData.data.id,
sessionId: 'demo',
response: {
responses: [{
hypothesisIndex: 0,
agreementLevel: 'agree',
comment: 'Remote work has proven effective for most tech roles. Companies that try to force return-to-office are losing talent to remote-first competitors. The trend is likely to continue.'
}],
overallComment: 'Strong trend toward permanent remote work model'
}
})
});
reader.releaseLock();
return;
}
} catch (e) { /* ignore */ }
}
}
await readChunk();
};
await readChunk();
} catch (error) {
console.error('Mock error:', error);
}
}, 500);
serverProcess!.stdin!.write(JSON.stringify(toolRequest) + '\n');
const toolResponse = await responsePromise;
expect(toolResponse).toHaveProperty('result');
const result = (toolResponse as any).result;
expect(result.content[0].text).toContain('Remote work');
expect(result.content[0].text).toContain('agree');
expect(result.content[0].text).toContain('return-to-office');
}, 25000);
test('should reject invalid hypothesis arguments', async () => {
// Initialize
const initRequest = {
jsonrpc: '2.0',
id: 1,
method: 'initialize',
params: {
protocolVersion: '2024-11-05',
capabilities: { roots: { listChanged: true }, sampling: {} },
clientInfo: { name: 'test-client', version: '1.0.0' }
}
};
const initPromise = new Promise((resolve) => {
serverProcess!.stdout!.on('data', (data) => {
const lines = data.toString().split('\n').filter(line => line.trim());
for (const line of lines) {
try {
const response = JSON.parse(line);
if (response.id === 1) resolve(response);
} catch (e) { /* ignore */ }
}
});
});
serverProcess!.stdin!.write(JSON.stringify(initRequest) + '\n');
await initPromise;
// Invalid arguments - missing required fields
const toolRequest = {
jsonrpc: '2.0',
id: 2,
method: 'tools/call',
params: {
name: 'challenge-hypothesis',
arguments: {
title: 'Test Challenge'
// Missing required 'hypotheses' field
}
}
};
const responsePromise = new Promise((resolve) => {
serverProcess!.stdout!.on('data', (data) => {
const lines = data.toString().split('\n').filter(line => line.trim());
for (const line of lines) {
try {
const response = JSON.parse(line);
if (response.id === 2) resolve(response);
} catch (e) { /* ignore */ }
}
});
});
serverProcess!.stdin!.write(JSON.stringify(toolRequest) + '\n');
const toolResponse = await responsePromise;
expect(toolResponse).toHaveProperty('error');
const error = (toolResponse as any).error;
expect(error.code).toBe(-32602); // Invalid params
expect(error.message).toContain('hypotheses');
}, 15000);
test('should handle empty hypotheses array', async () => {
// Initialize
const initRequest = {
jsonrpc: '2.0',
id: 1,
method: 'initialize',
params: {
protocolVersion: '2024-11-05',
capabilities: { roots: { listChanged: true }, sampling: {} },
clientInfo: { name: 'test-client', version: '1.0.0' }
}
};
const initPromise = new Promise((resolve) => {
serverProcess!.stdout!.on('data', (data) => {
const lines = data.toString().split('\n').filter(line => line.trim());
for (const line of lines) {
try {
const response = JSON.parse(line);
if (response.id === 1) resolve(response);
} catch (e) { /* ignore */ }
}
});
});
serverProcess!.stdin!.write(JSON.stringify(initRequest) + '\n');
await initPromise;
// Empty hypotheses array should be rejected
const toolRequest = {
jsonrpc: '2.0',
id: 2,
method: 'tools/call',
params: {
name: 'challenge-hypothesis',
arguments: {
title: 'Empty Challenge',
hypotheses: []
}
}
};
const responsePromise = new Promise((resolve) => {
serverProcess!.stdout!.on('data', (data) => {
const lines = data.toString().split('\n').filter(line => line.trim());
for (const line of lines) {
try {
const response = JSON.parse(line);
if (response.id === 2) resolve(response);
} catch (e) { /* ignore */ }
}
});
});
serverProcess!.stdin!.write(JSON.stringify(toolRequest) + '\n');
const toolResponse = await responsePromise;
expect(toolResponse).toHaveProperty('error');
const error = (toolResponse as any).error;
expect(error.code).toBe(-32602); // Invalid params
expect(error.message).toContain('hypotheses');
}, 15000);
});