#!/usr/bin/env node
// Comprehensive comparison test - RC5 vs Current Server
// All 13 questions from goodanswers.txt
const { spawn } = require('child_process');
const axios = require('axios');
class ComprehensiveComparison {
constructor() {
this.rc5Process = null;
this.messageId = 1;
this.pendingRequests = new Map();
this.rc5Results = {};
this.currentResults = {};
}
// RC5 Methods
async startRC5() {
console.log('🚀 Starting RC5 server...');
this.rc5Process = spawn('npx', ['tsx', 'src/index.ts'], {
stdio: ['pipe', 'pipe', 'pipe'],
cwd: '/tmp/release-candidate'
});
this.rc5Process.stdout.on('data', (data) => {
const lines = data.toString().split('\n').filter(line => line.trim());
lines.forEach(line => {
try {
const message = JSON.parse(line);
this.handleMessage(message);
} catch (e) {}
});
});
this.rc5Process.stderr.on('data', (data) => {
const output = data.toString();
if (output.includes('Umbrella MCP Server started successfully')) {
console.log('✅ RC5 ready');
}
});
await new Promise(resolve => setTimeout(resolve, 3000));
}
handleMessage(message) {
if (message.id && this.pendingRequests.has(message.id)) {
const resolve = this.pendingRequests.get(message.id);
this.pendingRequests.delete(message.id);
resolve(message);
}
}
async sendRequest(method, params) {
const id = this.messageId++;
const request = { jsonrpc: '2.0', id, method, params };
return new Promise((resolve, reject) => {
this.pendingRequests.set(id, resolve);
this.rc5Process.stdin.write(JSON.stringify(request) + '\n');
setTimeout(() => {
if (this.pendingRequests.has(id)) {
this.pendingRequests.delete(id);
reject(new Error('Timeout'));
}
}, 20000);
});
}
async initializeRC5() {
await this.sendRequest('initialize', {
protocolVersion: '2024-11-05',
capabilities: {},
clientInfo: { name: 'comprehensive-test', version: '1.0.0' }
});
}
async authenticateRC5() {
await this.sendRequest('tools/call', {
name: 'authenticate_user',
arguments: {
username: 'david+saola@umbrellacost.com',
password: 'Dsamsung1!'
}
});
}
// Current Server Methods
async authenticateCurrentServer() {
const loginResponse = await axios.post('http://localhost:3000/auth', {
username: 'david+saola@umbrellacost.com',
password: 'Dsamsung1!'
});
return loginResponse.data.bearerToken;
}
async callCurrentServer(toolName, args, token) {
const headers = {
'Authorization': `Bearer ${token}`,
'Content-Type': 'application/json'
};
const result = await axios.post('http://localhost:3000/mcp', {
jsonrpc: '2.0',
id: this.messageId++,
method: 'tools/call',
params: {
name: toolName,
arguments: args
}
}, { headers });
return result.data.result?.content?.[0]?.text || '';
}
// Parse cost from response
parseCost(response) {
// Try to find total cost in various formats
const patterns = [
/TOTAL.*?:\s*\$?([\d,]+\.?\d*)/i,
/\$?([\d,]+\.?\d*)\s*(?:\(|total)/i,
/total_cost["\s:]+(\d+\.?\d*)/i
];
for (const pattern of patterns) {
const match = response.match(pattern);
if (match) {
return parseFloat(match[1].replace(/,/g, ''));
}
}
// Try to parse JSON and sum costs
try {
const jsonMatch = response.match(/\[[\s\S]*?\]/) || response.match(/\{[\s\S]*?\}/);
if (jsonMatch) {
const data = JSON.parse(jsonMatch[0]);
if (Array.isArray(data)) {
return data.reduce((sum, item) => sum + parseFloat(item.total_cost || 0), 0);
} else if (data.total_cost) {
return parseFloat(data.total_cost);
}
}
} catch (e) {}
return null;
}
// Run all tests
async runAllTests() {
const questions = [
{
id: 'Q1',
description: 'MSP customers list',
tool: 'api___plain_sub_users',
args: {},
expectedValue: 'Multiple customers'
},
{
id: 'Q2',
description: 'Total cost',
tool: 'api___invoices_caui',
args: {
startDate: '2025-08-01',
endDate: '2025-08-27',
periodGranLevel: 'month',
costType: ['cost'],
isAmortized: true,
cloud_context: 'aws',
accountId: '932213950603'
},
expectedValue: 136045.96
},
{
id: 'Q3',
description: 'Total AWS cost',
tool: 'api___invoices_caui',
args: {
startDate: '2025-08-01',
endDate: '2025-08-27',
periodGranLevel: 'month',
costType: ['cost'],
isAmortized: true,
cloud_context: 'aws',
accountId: '932213950603'
},
expectedValue: 136045.96
},
{
id: 'Q4',
description: 'Total GCP cost',
tool: 'api___invoices_caui',
args: {
startDate: '2025-08-01',
endDate: '2025-08-27',
periodGranLevel: 'month',
costType: ['cost'],
isAmortized: true,
cloud_context: 'gcp',
accountId: '932213950603'
},
expectedValue: 0.00
},
{
id: 'Q5',
description: 'Total Azure cost',
tool: 'api___invoices_caui',
args: {
startDate: '2025-08-01',
endDate: '2025-08-27',
periodGranLevel: 'month',
costType: ['cost'],
isAmortized: true,
cloud_context: 'azure',
accountId: '932213950603'
},
expectedValue: 0.00
},
{
id: 'Q6',
description: 'Total cost per month (2 months)',
tool: 'api___invoices_caui',
args: {
startDate: '2025-07-01',
endDate: '2025-08-27',
periodGranLevel: 'month',
costType: ['cost'],
isAmortized: true,
cloud_context: 'aws',
accountId: '932213950603'
},
expectedValue: 320162.54
},
{
id: 'Q7',
description: 'AWS amortized cost per month (8 months)',
tool: 'api___invoices_caui',
args: {
startDate: '2025-01-01',
endDate: '2025-08-27',
periodGranLevel: 'month',
costType: ['cost'],
isAmortized: true,
cloud_context: 'aws',
accountId: '932213950603'
},
expectedValue: 1099357.88
},
{
id: 'Q8',
description: 'Total cost for ALL Azure accounts',
tool: 'api___invoices_caui',
args: {
startDate: '2025-08-01',
endDate: '2025-08-27',
periodGranLevel: 'month',
costType: ['cost'],
isAmortized: true,
cloud_context: 'azure'
},
expectedValue: 0.00
},
{
id: 'Q9',
description: 'All available accounts',
tool: 'api___user_management_accounts',
args: {},
expectedValue: '20 accounts'
},
{
id: 'Q10',
description: 'AWS cost recommendations',
tool: 'api___recommendations_report',
args: {
accountId: '932213950603'
},
expectedValue: 47239.68
},
{
id: 'Q11',
description: 'Potential savings per category',
tool: 'api___recommendations_report',
args: {
accountId: '932213950603'
},
expectedValue: 47239.68
},
{
id: 'Q12',
description: 'Anomalies last 10 days for AWS',
tool: 'api___anomaly_detection',
args: {
startDate: '2025-08-17',
endDate: '2025-08-27',
cloud_context: 'aws',
accountId: '932213950603'
},
expectedValue: 127.65
},
{
id: 'Q13',
description: 'CloudWatch 30-day daily costs',
tool: 'api___invoices_caui',
args: {
startDate: '2025-07-28',
endDate: '2025-08-27',
periodGranLevel: 'day',
costType: ['cost'],
isAmortized: true,
cloud_context: 'aws',
accountId: '932213950603',
service: 'CloudWatch'
},
expectedValue: 5501.30
}
];
const results = [];
// Test RC5
console.log('\n📊 Testing RC5...');
for (const q of questions) {
try {
const result = await this.sendRequest('tools/call', {
name: q.tool,
arguments: q.args
});
const response = result.result?.content?.[0]?.text || '';
const cost = this.parseCost(response);
this.rc5Results[q.id] = {
cost,
success: result.result?.content?.[0]?.text ? true : false
};
console.log(` ${q.id}: ${cost !== null ? '$' + cost.toLocaleString() : 'No data'}`);
} catch (error) {
this.rc5Results[q.id] = { cost: null, success: false, error: error.message };
console.log(` ${q.id}: ❌ Error`);
}
}
// Test Current Server
console.log('\n📊 Testing Current Server...');
const token = await this.authenticateCurrentServer();
for (const q of questions) {
try {
// Map tool names to current server format
let toolName = q.tool;
if (toolName === 'api___invoices_caui') {
toolName = 'get_costs';
} else if (toolName === 'api___user_management_accounts') {
toolName = 'get_accounts';
} else if (toolName === 'api___recommendations_report') {
toolName = 'get_recommendations';
} else if (toolName === 'api___anomaly_detection') {
toolName = 'get_anomalies';
} else if (toolName === 'api___plain_sub_users') {
toolName = 'get_msp_customers';
}
const response = await this.callCurrentServer(toolName, q.args, token);
const cost = this.parseCost(response);
this.currentResults[q.id] = {
cost,
success: response ? true : false
};
console.log(` ${q.id}: ${cost !== null ? '$' + cost.toLocaleString() : 'No data'}`);
} catch (error) {
this.currentResults[q.id] = { cost: null, success: false, error: error.message };
console.log(` ${q.id}: ❌ Error`);
}
}
// Compare results
console.log('\n' + '═'.repeat(80));
console.log('COMPREHENSIVE COMPARISON RESULTS');
console.log('═'.repeat(80));
for (const q of questions) {
const rc5 = this.rc5Results[q.id];
const current = this.currentResults[q.id];
console.log(`\n${q.id}: ${q.description}`);
console.log('─'.repeat(60));
console.log(`Expected (goodanswers.txt): ${typeof q.expectedValue === 'number' ? '$' + q.expectedValue.toLocaleString() : q.expectedValue}`);
console.log(`RC5: ${rc5.cost !== null ? '$' + rc5.cost.toLocaleString() : rc5.error || 'No data'}`);
console.log(`Current Server: ${current.cost !== null ? '$' + current.cost.toLocaleString() : current.error || 'No data'}`);
if (rc5.cost !== null && current.cost !== null) {
const diff = current.cost - rc5.cost;
const percentDiff = rc5.cost !== 0 ? (diff / rc5.cost * 100).toFixed(2) : 0;
if (Math.abs(diff) < 0.01) {
console.log(`Status: ✅ EXACT MATCH`);
} else {
console.log(`Difference: $${diff.toLocaleString()} (${percentDiff}%)`);
console.log(`Status: ❌ MISMATCH`);
}
} else if (!rc5.success && !current.success) {
console.log(`Status: ⚠️ Both failed`);
} else if (!rc5.success) {
console.log(`Status: ⚠️ RC5 failed`);
} else if (!current.success) {
console.log(`Status: ⚠️ Current server failed`);
}
}
// Summary
console.log('\n' + '═'.repeat(80));
console.log('SUMMARY');
console.log('═'.repeat(80));
let matches = 0;
let mismatches = 0;
let failures = 0;
for (const q of questions) {
const rc5 = this.rc5Results[q.id];
const current = this.currentResults[q.id];
if (rc5.cost !== null && current.cost !== null) {
if (Math.abs(rc5.cost - current.cost) < 0.01) {
matches++;
} else {
mismatches++;
}
} else {
failures++;
}
}
console.log(`✅ Exact matches: ${matches}/13`);
console.log(`❌ Mismatches: ${mismatches}/13`);
console.log(`⚠️ Failures: ${failures}/13`);
const successRate = (matches / 13 * 100).toFixed(1);
console.log(`\n📊 Success rate: ${successRate}%`);
if (matches === 13) {
console.log('\n🎉 PERFECT! All questions match exactly between RC5 and current server!');
} else {
console.log(`\n⚠️ WARNING: Only ${matches} out of 13 questions match. Investigation needed.`);
}
}
async disconnect() {
if (this.rc5Process) {
this.rc5Process.kill();
}
}
}
async function runComprehensiveComparison() {
const tester = new ComprehensiveComparison();
try {
console.log('🔍 COMPREHENSIVE COMPARISON TEST - RC5 vs CURRENT SERVER');
console.log('Testing all 13 questions from goodanswers.txt');
console.log('═'.repeat(80));
await tester.startRC5();
await tester.initializeRC5();
await tester.authenticateRC5();
await tester.runAllTests();
} catch (error) {
console.error('\n❌ Test failed:', error.message);
console.error(error.stack);
} finally {
await tester.disconnect();
}
}
runComprehensiveComparison();