#!/usr/bin/env node
const axios = require('axios');
const https = require('https');
const fs = require('fs');
const path = require('path');
// Create axios instance
const axiosInstance = axios.create({
httpsAgent: new https.Agent({ rejectUnauthorized: false }),
timeout: 30000
});
// Parse command line arguments
const args = process.argv.slice(2);
if (args.length < 3) {
console.error('Usage: node generic-baseline-extractor.cjs <username> <password> <questions-file> [tunnel-url] [output-file]');
console.error('Example: node generic-baseline-extractor.cjs user@example.com password123 questions.txt https://tunnel.trycloudflare.com output.txt');
process.exit(1);
}
const USERNAME = args[0];
const PASSWORD = args[1];
const QUESTIONS_FILE = args[2];
const MCP_BASE = args[3] || 'https://localhost:8787';
const OUTPUT_FILE = args[4] || 'baseline-values.txt';
// Read questions from file
let questions;
try {
const questionsPath = path.isAbsolute(QUESTIONS_FILE)
? QUESTIONS_FILE
: path.join(__dirname, QUESTIONS_FILE);
const questionsContent = fs.readFileSync(questionsPath, 'utf8');
questions = questionsContent.split('\n').filter(q => q.trim().length > 0);
} catch (error) {
console.error(`Error reading questions file: ${error.message}`);
process.exit(1);
}
// Function to extract all numeric values from any text
function extractAllNumericValues(text) {
const values = [];
// Try to find numbers in various formats
// This regex finds integers and decimals
const numberPattern = /-?\d+(?:\.\d+)?/g;
const matches = text.match(numberPattern);
if (matches) {
matches.forEach(match => {
const num = parseFloat(match);
if (!isNaN(num) && num !== 0) { // Exclude zeros and NaN
values.push(num);
}
});
}
return values;
}
// Function to extract values from JSON structure
function extractValuesFromJSON(obj, values = []) {
if (obj === null || obj === undefined) return values;
if (typeof obj === 'number') {
if (!isNaN(obj) && obj !== 0) {
values.push(obj);
}
} else if (typeof obj === 'string') {
// Try to extract numbers from strings too
const nums = extractAllNumericValues(obj);
values.push(...nums);
} else if (Array.isArray(obj)) {
obj.forEach(item => extractValuesFromJSON(item, values));
} else if (typeof obj === 'object') {
Object.values(obj).forEach(value => extractValuesFromJSON(value, values));
}
return values;
}
async function authenticateAndExtractValues() {
console.log('Starting generic baseline extraction...\n');
// Collect all unique values across all questions
const allValuesFound = [];
const valuesByQuestion = [];
for (let i = 0; i < questions.length; i++) {
const question = questions[i].trim();
console.log(`Processing question ${i + 1}/${questions.length}: ${question.substring(0, 50)}...`);
try {
// Make request using MCP protocol
const response = await axiosInstance.post(`${MCP_BASE}/sse`, {
jsonrpc: '2.0',
method: 'tools/call',
params: {
name: 'api__invoices_caui',
arguments: {
prompt: question
}
},
id: i + 1
}, {
headers: {
'Authorization': `Bearer ${USERNAME}`,
'apikey': PASSWORD
}
});
// Parse response and extract values
const responseText = response.data.toString();
let questionValues = [];
// Try multiple extraction methods
// Method 1: Extract from SSE data lines
const lines = responseText.split('\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
try {
const data = JSON.parse(line.slice(6));
const extracted = extractValuesFromJSON(data);
questionValues.push(...extracted);
} catch (e) {
// Try raw text extraction
const nums = extractAllNumericValues(line);
questionValues.push(...nums);
}
}
}
// Method 2: Extract from any JSON in the response
const jsonMatches = responseText.match(/\{[^{}]*\}|\[[^\[\]]*\]/g);
if (jsonMatches) {
jsonMatches.forEach(match => {
try {
const parsed = JSON.parse(match);
const extracted = extractValuesFromJSON(parsed);
questionValues.push(...extracted);
} catch (e) {
// Not valid JSON, skip
}
});
}
// Method 3: Direct number extraction from entire response
const directNumbers = extractAllNumericValues(responseText);
questionValues.push(...directNumbers);
// Remove duplicates for this question
const uniqueForQuestion = [...new Set(questionValues)];
valuesByQuestion.push({
question: question,
values: uniqueForQuestion.slice(0, 10) // Keep first 10 for each question
});
allValuesFound.push(...uniqueForQuestion);
} catch (error) {
console.log(` Error processing question: ${error.message}`);
valuesByQuestion.push({
question: question,
values: []
});
}
}
// Get unique values across all questions
const allUniqueValues = [...new Set(allValuesFound)];
allUniqueValues.sort((a, b) => a - b);
// Generate output
let output = 'GENERIC BASELINE VALUE EXTRACTION\n';
output += '==================================\n\n';
output += `Questions processed: ${questions.length}\n`;
output += `Total values found: ${allValuesFound.length}\n`;
output += `Unique values found: ${allUniqueValues.length}\n\n`;
output += 'FIRST 10 UNIQUE VALUES (BASELINE):\n';
output += '-----------------------------------\n';
const first10 = allUniqueValues.slice(0, 10);
first10.forEach((value, index) => {
output += `${index + 1}. ${value}\n`;
});
output += '\n\nVALUES BY QUESTION (First 10 questions shown):\n';
output += '------------------------------------------------\n';
valuesByQuestion.slice(0, 10).forEach((item, index) => {
output += `\nQ${index + 1}: ${item.question.substring(0, 60)}...\n`;
output += `Values: ${item.values.slice(0, 5).join(', ')}`;
if (item.values.length > 5) {
output += ` ... (${item.values.length} total)`;
}
output += '\n';
});
// Save to file
fs.writeFileSync(OUTPUT_FILE, output);
console.log(`\n✓ Baseline extraction complete!`);
console.log(`✓ Results saved to: ${OUTPUT_FILE}`);
// Also save just the first 10 values in a simple format
const simpleOutput = first10.join('\n');
const simpleFile = OUTPUT_FILE.replace('.txt', '-simple.txt');
fs.writeFileSync(simpleFile, simpleOutput);
console.log(`✓ Simple list saved to: ${simpleFile}`);
return first10;
}
// Run the extraction
authenticateAndExtractValues().catch(err => {
console.error('Fatal error:', err.message);
process.exit(1);
});