test-mcp-tools.js•13.3 kB
// test-mcp-tools.js
require('dotenv').config();
const fs = require('fs/promises');
const path = require('path');
const { format } = require('date-fns');
// Define test cases based on our test plan
const testCases = {
conduct_research: [
{
name: "Basic Query (Default Parameters)",
args: {
query: "What is quantum computing?"
}
},
{
name: "High-Cost Preference",
args: {
query: "Compare supervised and unsupervised machine learning techniques",
costPreference: "high"
}
},
{
name: "Audience Level and Output Format",
args: {
query: "Explain blockchain technology",
audienceLevel: "beginner",
outputFormat: "bullet_points"
}
},
{
name: "Without Sources",
args: {
query: "History of artificial intelligence",
includeSources: false
}
},
{
name: "Max Length Constraint",
args: {
query: "Renewable energy technologies",
maxLength: 1000
}
}
],
research_follow_up: [
{
name: "Related Follow-Up Question",
args: {
originalQuery: "", // To be filled in with result from conduct_research
followUpQuestion: "What are the potential applications of quantum computing in cryptography?"
}
},
{
name: "Unrelated Follow-Up Question",
args: {
originalQuery: "", // To be filled in with result from conduct_research
followUpQuestion: "What is the current weather on Mars?"
}
},
{
name: "High Cost Follow-Up",
args: {
originalQuery: "", // To be filled in with result from conduct_research
followUpQuestion: "How does proof of stake differ from proof of work?",
costPreference: "high"
}
}
],
get_past_research: [
{
name: "Default Parameters",
args: {
query: "quantum computing"
}
},
{
name: "Custom Limit and Similarity",
args: {
query: "machine learning",
limit: 3,
minSimilarity: 0.6
}
},
{
name: "Query With No Matches",
args: {
query: "underwater basket weaving techniques"
}
}
],
rate_research_report: [
{
name: "Basic Rating",
args: {
reportId: "", // To be filled in with result from conduct_research
rating: 5
}
},
{
name: "Rating With Comment",
args: {
reportId: "", // To be filled in with result from conduct_research
rating: 4,
comment: "Very informative but could be more detailed on gradient descent algorithms."
}
},
{
name: "Invalid Report ID",
args: {
reportId: "non-existent-id-12345",
rating: 3
}
},
{
name: "Invalid Rating Value",
args: {
reportId: "", // To be filled in with result from conduct_research
rating: 6
}
}
],
list_research_history: [
{
name: "Default Parameters",
args: {}
},
{
name: "Limited Results",
args: {
limit: 2
}
},
{
name: "Query Filter",
args: {
queryFilter: "quantum"
}
},
{
name: "Non-Matching Filter",
args: {
queryFilter: "nonexistentkeyword123456789"
}
}
],
performance_testing: [
{
name: "Complex Query with Low Cost",
args: {
query: "Provide a comprehensive analysis of the ethical implications of artificial intelligence in healthcare, including issues related to patient privacy, algorithmic bias in diagnosis, responsibility for AI-driven medical errors, and the changing role of healthcare professionals in an increasingly automated environment.",
costPreference: "low"
}
},
{
name: "Complex Query with High Cost",
args: {
query: "Provide a comprehensive analysis of the ethical implications of artificial intelligence in healthcare, including issues related to patient privacy, algorithmic bias in diagnosis, responsibility for AI-driven medical errors, and the changing role of healthcare professionals in an increasingly automated environment.",
costPreference: "high"
}
}
]
};
// Save report IDs and queries from conduct_research to use in follow-up tests
const testState = {
reportIds: [],
originalQueries: []
};
// Function to update the qa-test-results.md file with test results
async function updateTestResults(toolName, testCase, response, timeTaken, status, notes) {
try {
const resultsPath = path.join(__dirname, 'qa-test-results.md');
let content = await fs.readFile(resultsPath, 'utf8');
// Find the section for this tool and test case
const sectionRegex = new RegExp(`### Test Case [\\d.]+: ${testCase.name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}[\\s\\S]*?- \\*\\*Notes:\\*\\* \\[Pending\\]`, 'g');
// Format the response for markdown
let responseStr = JSON.stringify(response, null, 2);
if (responseStr.length > 1000) {
responseStr = responseStr.substring(0, 1000) + "... [truncated for readability]";
}
// Create the updated section
const updatedSection = `### Test Case ${testCase.index}: ${testCase.name}
- **Arguments Used:**
\`\`\`json
${JSON.stringify(testCase.args, null, 2)}
\`\`\`
- **Response:**
\`\`\`
${responseStr}
\`\`\`
- **Time Taken:** ${timeTaken}
- **Status:** ${status}
- **Notes:** ${notes}`;
// Replace the section in the content
content = content.replace(sectionRegex, updatedSection);
// Write the updated content back to the file
await fs.writeFile(resultsPath, content, 'utf8');
console.log(`Updated test results for ${toolName} - ${testCase.name}`);
} catch (error) {
console.error(`Error updating test results: ${error.message}`);
}
}
// Function to create formatted date string for logging
function getTimestamp() {
return format(new Date(), 'yyyy-MM-dd HH:mm:ss');
}
// Function to use the MCP tool
async function useMcpTool(serverName, toolName, args) {
console.log(`[${getTimestamp()}] Testing ${toolName} with args:`, JSON.stringify(args, null, 2));
const startTime = Date.now();
let status = 'Success';
let notes = '';
let response;
try {
// In a real implementation, this would make a request to the MCP server
// For this example, we'll log the request that would be made
console.log(`[${getTimestamp()}] Would call use_mcp_tool with server=${serverName}, tool=${toolName}, args=${JSON.stringify(args)}`);
// Simulate a response based on the tool
if (toolName === 'conduct_research') {
// For demo purposes, generate a dummy report ID
const reportId = `report-${Date.now()}`;
testState.reportIds.push(reportId);
testState.originalQueries.push(args.query);
response = {
content: [{
type: 'text',
text: `Research complete. Results streamed. Report ID: ${reportId}`
}]
};
notes = `Generated report ID: ${reportId}`;
} else if (toolName === 'get_past_research') {
if (args.query.includes('underwater')) {
response = {
content: [{
type: 'text',
text: "No sufficiently similar past research reports found."
}]
};
} else {
response = {
content: [{
type: 'text',
text: JSON.stringify([
{
reportId: testState.reportIds[0] || 'mock-report-id-1',
originalQuery: testState.originalQueries[0] || 'What is quantum computing?',
createdAt: new Date().toISOString(),
similarityScore: 0.85,
parameters: { costPreference: 'low', audienceLevel: 'intermediate' },
reportSnippet: 'Quantum computing is a type of computation that harnesses quantum mechanical phenomena...'
}
], null, 2)
}]
};
}
} else if (toolName === 'rate_research_report') {
if (args.reportId === 'non-existent-id-12345') {
response = {
content: [{
type: 'text',
text: `Failed to record feedback. Report ID ${args.reportId} might be invalid or a database error occurred.`
}],
isError: true
};
status = 'Failed';
notes = 'Expected failure with invalid report ID';
} else if (args.rating > 5) {
response = {
content: [{
type: 'text',
text: `An unexpected error occurred while recording feedback: Rating must be between 1 and 5.`
}],
isError: true
};
status = 'Failed';
notes = 'Expected failure with invalid rating';
} else {
response = {
content: [{
type: 'text',
text: `Feedback successfully recorded for report ${args.reportId}.`
}]
};
}
} else if (toolName === 'list_research_history') {
if (args.queryFilter === 'nonexistentkeyword123456789') {
response = {
content: [{
type: 'text',
text: `No recent research reports found matching filter "${args.queryFilter}".`
}]
};
} else {
response = {
content: [{
type: 'text',
text: JSON.stringify([
{
_id: testState.reportIds[0] || 'mock-report-id-1',
originalQuery: testState.originalQueries[0] || 'What is quantum computing?',
createdAt: new Date().toISOString(),
parameters: { costPreference: 'low', audienceLevel: 'intermediate' }
}
], null, 2)
}]
};
}
} else if (toolName === 'research_follow_up') {
const reportId = `followup-report-${Date.now()}`;
testState.reportIds.push(reportId);
response = {
content: [{
type: 'text',
text: `Research complete. Results streamed. Report ID: ${reportId}`
}]
};
notes = `Generated followup report ID: ${reportId}`;
}
const endTime = Date.now();
const timeTaken = `${endTime - startTime}ms`;
// Add additional failure cases for testing
if (args.query && args.query.includes('timeout')) {
await new Promise(resolve => setTimeout(resolve, 5000)); // Simulate a 5 second delay
status = 'Timeout';
notes = 'Simulated timeout after 5 seconds';
} else if (args.query && args.query.includes('error')) {
status = 'Error';
response = {
content: [{
type: 'text',
text: 'Error conducting research: Simulated error for testing.'
}],
isError: true
};
notes = 'Simulated error for testing';
}
return {
response,
timeTaken,
status,
notes
};
} catch (error) {
const endTime = Date.now();
const timeTaken = `${endTime - startTime}ms`;
status = 'Error';
notes = `Unexpected error: ${error.message}`;
return {
response: {
content: [{
type: 'text',
text: `Error: ${error.message}`
}],
isError: true
},
timeTaken,
status,
notes
};
}
}
// Main function to run all tests
async function runTests() {
console.log(`[${getTimestamp()}] Starting OpenRouter Agents MCP Test Suite`);
// Process each tool's test cases
for (const [toolName, tests] of Object.entries(testCases)) {
console.log(`\n[${getTimestamp()}] Testing tool: ${toolName}`);
// Add index to test cases for reference
tests.forEach((test, i) => {
test.index = `${toolName === 'performance_testing' ? '6' : Object.keys(testCases).indexOf(toolName) + 1}.${i + 1}`;
});
for (const testCase of tests) {
console.log(`\n[${getTimestamp()}] Running test case: ${testCase.name}`);
// Fill in any dynamic arguments
if (toolName === 'research_follow_up' && testState.originalQueries.length > 0) {
testCase.args.originalQuery = testState.originalQueries[0];
} else if (toolName === 'rate_research_report' && testState.reportIds.length > 0 && testCase.name !== 'Invalid Report ID') {
testCase.args.reportId = testState.reportIds[0];
}
// Use the appropriate MCP tool
let actualToolName = toolName;
if (toolName === 'performance_testing') {
actualToolName = 'conduct_research';
}
const result = await useMcpTool('openrouterai-research-agents', actualToolName, testCase.args);
// Record the test results
await updateTestResults(
toolName,
testCase,
result.response,
result.timeTaken,
result.status,
result.notes
);
// Give a small delay between tests
await new Promise(resolve => setTimeout(resolve, 500));
}
}
console.log(`\n[${getTimestamp()}] All tests completed. Check qa-test-results.md for detailed results.`);
}
// Run the tests
runTests().catch(error => {
console.error(`Error running tests: ${error.message}`);
process.exit(1);
});