Gauntlet-Incept MCP
by Birdsmith
- src
- utils
/**
* Test Harness
*
* This file provides utilities for testing the quality control system
* against known good and bad examples.
*/
// Import services
const questionService = require('../services/questionService');
const articleService = require('../services/articleService');
/**
* Measure the accuracy of the grading system
*
* @param {Array} examples - The examples to test
* @param {string} contentType - The type of content (question or article)
* @returns {Object} The accuracy metrics (precision, recall, F1)
*/
async function measureAccuracy(examples, contentType) {
try {
console.log(`Measuring accuracy for ${contentType} grader with ${examples.length} examples`);
let truePositives = 0;
let falsePositives = 0;
let trueNegatives = 0;
let falseNegatives = 0;
for (const example of examples) {
const { content, tags, expectedResult } = example;
// Grade the example
let gradeResult;
if (contentType === 'question') {
gradeResult = await questionService.gradeQuestion(content, tags);
} else {
gradeResult = await articleService.gradeArticle(content, tags);
}
// Compare with expected result
if (expectedResult === 'pass' && gradeResult.pass) {
truePositives++;
} else if (expectedResult === 'pass' && !gradeResult.pass) {
falseNegatives++;
} else if (expectedResult === 'fail' && !gradeResult.pass) {
trueNegatives++;
} else if (expectedResult === 'fail' && gradeResult.pass) {
falsePositives++;
}
}
// Calculate metrics
const precision = truePositives / (truePositives + falsePositives) || 0;
const recall = truePositives / (truePositives + falseNegatives) || 0;
const f1 = 2 * (precision * recall) / (precision + recall) || 0;
const results = {
precision: precision,
recall: recall,
f1: f1,
truePositives: truePositives,
falsePositives: falsePositives,
trueNegatives: trueNegatives,
falseNegatives: falseNegatives,
total: examples.length
};
console.log(`Accuracy results for ${contentType} grader:`, results);
return results;
} catch (error) {
console.error(`Error measuring accuracy for ${contentType} grader:`, error);
throw new Error(`Failed to measure accuracy: ${error.message}`);
}
}
/**
* Load test examples from the database
*
* @param {string} contentType - The type of content (question or article)
* @returns {Array} The test examples
*/
async function loadTestExamples(contentType) {
try {
// TODO: Implement actual loading logic
console.log(`Loading test examples for ${contentType}`);
// Placeholder implementation
const examples = [
{
id: 'good_1',
content: contentType === 'question'
? 'What is the result of dividing 3/4 by 1/2?'
: '# Division of Fractions\n\nIn this lesson, you will learn how to divide fractions...',
tags: {
subject: 'math',
grade: '6',
standard: 'CCSS.Math.6.NS.1',
lesson: 'Division of Fractions',
...(contentType === 'question' && { difficulty: 2 })
},
expectedResult: 'pass',
expectedScorecard: {
...(contentType === 'question'
? {
consistentWithArticle: true,
appropriateCategorizaton: true,
allPartsPresent: true,
accurateCorrectAnswer: true,
plausibleDistractors: true,
clearExplanations: true,
gradeAppropriateLanguage: true,
grammaticallyCorrect: true,
properlyFormatted: true
}
: {
appropriateCategorization: true,
directInstructionStyle: true,
workedExamples: true,
stepByStepBreakdowns: true,
factuallyAccurate: true,
gradeAppropriateLanguage: true,
clearAndUnambiguousWording: true,
properlyFormatted: true,
consistentExplanations: true
})
}
},
{
id: 'bad_1',
content: contentType === 'question'
? 'What is 3/4 divided by 1/2? A) 3/8 B) 6/4 C) 3/2 D) 1 1/2'
: 'Division of Fractions\n\nTo divide fractions, multiply by the reciprocal.',
tags: {
subject: 'math',
grade: '6',
standard: 'CCSS.Math.6.NS.1',
lesson: 'Division of Fractions',
...(contentType === 'question' && { difficulty: 2 })
},
expectedResult: 'fail',
expectedScorecard: {
...(contentType === 'question'
? {
consistentWithArticle: true,
appropriateCategorizaton: true,
allPartsPresent: false,
accurateCorrectAnswer: true,
plausibleDistractors: true,
clearExplanations: false,
gradeAppropriateLanguage: true,
grammaticallyCorrect: true,
properlyFormatted: false
}
: {
appropriateCategorization: true,
directInstructionStyle: false,
workedExamples: false,
stepByStepBreakdowns: false,
factuallyAccurate: true,
gradeAppropriateLanguage: true,
clearAndUnambiguousWording: false,
properlyFormatted: false,
consistentExplanations: true
})
}
}
];
return examples;
} catch (error) {
console.error(`Error loading test examples for ${contentType}:`, error);
throw new Error(`Failed to load test examples: ${error.message}`);
}
}
/**
* Add a new test example to the database
*
* @param {Object} example - The example to add
* @param {string} contentType - The type of content (question or article)
* @returns {string} The ID of the added example
*/
async function addTestExample(example, contentType) {
try {
// TODO: Implement actual saving logic
console.log(`Adding test example for ${contentType}:`, example);
// Placeholder implementation
const id = `${contentType}_example_${Date.now()}`;
console.log(`Added test example with ID: ${id}`);
return id;
} catch (error) {
console.error(`Error adding test example for ${contentType}:`, error);
throw new Error(`Failed to add test example: ${error.message}`);
}
}
module.exports = {
measureAccuracy,
loadTestExamples,
addTestExample
};