Gauntlet-Incept MCP

/** * Test Harness * * This file provides utilities for testing the quality control system * against known good and bad examples. */ // Import services const questionService = require('../services/questionService'); const articleService = require('../services/articleService'); /** * Measure the accuracy of the grading system * * @param {Array} examples - The examples to test * @param {string} contentType - The type of content (question or article) * @returns {Object} The accuracy metrics (precision, recall, F1) */ async function measureAccuracy(examples, contentType) { try { console.log(`Measuring accuracy for ${contentType} grader with ${examples.length} examples`); let truePositives = 0; let falsePositives = 0; let trueNegatives = 0; let falseNegatives = 0; for (const example of examples) { const { content, tags, expectedResult } = example; // Grade the example let gradeResult; if (contentType === 'question') { gradeResult = await questionService.gradeQuestion(content, tags); } else { gradeResult = await articleService.gradeArticle(content, tags); } // Compare with expected result if (expectedResult === 'pass' && gradeResult.pass) { truePositives++; } else if (expectedResult === 'pass' && !gradeResult.pass) { falseNegatives++; } else if (expectedResult === 'fail' && !gradeResult.pass) { trueNegatives++; } else if (expectedResult === 'fail' && gradeResult.pass) { falsePositives++; } } // Calculate metrics const precision = truePositives / (truePositives + falsePositives) || 0; const recall = truePositives / (truePositives + falseNegatives) || 0; const f1 = 2 * (precision * recall) / (precision + recall) || 0; const results = { precision: precision, recall: recall, f1: f1, truePositives: truePositives, falsePositives: falsePositives, trueNegatives: trueNegatives, falseNegatives: falseNegatives, total: examples.length }; console.log(`Accuracy results for ${contentType} grader:`, results); return results; } catch (error) { console.error(`Error measuring accuracy for ${contentType} grader:`, error); throw new Error(`Failed to measure accuracy: ${error.message}`); } } /** * Load test examples from the database * * @param {string} contentType - The type of content (question or article) * @returns {Array} The test examples */ async function loadTestExamples(contentType) { try { // TODO: Implement actual loading logic console.log(`Loading test examples for ${contentType}`); // Placeholder implementation const examples = [ { id: 'good_1', content: contentType === 'question' ? 'What is the result of dividing 3/4 by 1/2?' : '# Division of Fractions\n\nIn this lesson, you will learn how to divide fractions...', tags: { subject: 'math', grade: '6', standard: 'CCSS.Math.6.NS.1', lesson: 'Division of Fractions', ...(contentType === 'question' && { difficulty: 2 }) }, expectedResult: 'pass', expectedScorecard: { ...(contentType === 'question' ? { consistentWithArticle: true, appropriateCategorizaton: true, allPartsPresent: true, accurateCorrectAnswer: true, plausibleDistractors: true, clearExplanations: true, gradeAppropriateLanguage: true, grammaticallyCorrect: true, properlyFormatted: true } : { appropriateCategorization: true, directInstructionStyle: true, workedExamples: true, stepByStepBreakdowns: true, factuallyAccurate: true, gradeAppropriateLanguage: true, clearAndUnambiguousWording: true, properlyFormatted: true, consistentExplanations: true }) } }, { id: 'bad_1', content: contentType === 'question' ? 'What is 3/4 divided by 1/2? A) 3/8 B) 6/4 C) 3/2 D) 1 1/2' : 'Division of Fractions\n\nTo divide fractions, multiply by the reciprocal.', tags: { subject: 'math', grade: '6', standard: 'CCSS.Math.6.NS.1', lesson: 'Division of Fractions', ...(contentType === 'question' && { difficulty: 2 }) }, expectedResult: 'fail', expectedScorecard: { ...(contentType === 'question' ? { consistentWithArticle: true, appropriateCategorizaton: true, allPartsPresent: false, accurateCorrectAnswer: true, plausibleDistractors: true, clearExplanations: false, gradeAppropriateLanguage: true, grammaticallyCorrect: true, properlyFormatted: false } : { appropriateCategorization: true, directInstructionStyle: false, workedExamples: false, stepByStepBreakdowns: false, factuallyAccurate: true, gradeAppropriateLanguage: true, clearAndUnambiguousWording: false, properlyFormatted: false, consistentExplanations: true }) } } ]; return examples; } catch (error) { console.error(`Error loading test examples for ${contentType}:`, error); throw new Error(`Failed to load test examples: ${error.message}`); } } /** * Add a new test example to the database * * @param {Object} example - The example to add * @param {string} contentType - The type of content (question or article) * @returns {string} The ID of the added example */ async function addTestExample(example, contentType) { try { // TODO: Implement actual saving logic console.log(`Adding test example for ${contentType}:`, example); // Placeholder implementation const id = `${contentType}_example_${Date.now()}`; console.log(`Added test example with ID: ${id}`); return id; } catch (error) { console.error(`Error adding test example for ${contentType}:`, error); throw new Error(`Failed to add test example: ${error.message}`); } } module.exports = { measureAccuracy, loadTestExamples, addTestExample };