Skip to main content
Glama
multi-dimensional-assessor.test.ts23.1 kB
/** * Tests for Multi-Dimensional Confidence Assessor * * Tests the confidence assessment system that evaluates reasoning quality * across five dimensions: evidence quality, reasoning coherence, completeness, * uncertainty level, and bias freedom. */ import { beforeEach, describe, expect, it } from "vitest"; import { MultiDimensionalConfidenceAssessor } from "../../../confidence/multi-dimensional-assessor"; import type { Problem, ReasoningContext } from "../../../reasoning/types"; describe("MultiDimensionalConfidenceAssessor", () => { let assessor: MultiDimensionalConfidenceAssessor; beforeEach(() => { assessor = new MultiDimensionalConfidenceAssessor(); }); describe("Overall Confidence Scoring", () => { it("should return confidence score between 0 and 1", async () => { const context = createMockContext({ evidence: ["Evidence 1", "Evidence 2", "Evidence 3"], constraints: ["Constraint 1"], goals: ["Goal 1"], }); const assessment = await assessor.assessConfidence(context); expect(assessment.overallConfidence).toBeGreaterThanOrEqual(0); expect(assessment.overallConfidence).toBeLessThanOrEqual(1); }); it("should return higher confidence for high-quality reasoning", async () => { const highQualityContext = createMockContext({ evidence: [ "Strong evidence 1", "Strong evidence 2", "Strong evidence 3", "Strong evidence 4", ], constraints: ["Clear constraint"], goals: ["Well-defined goal"], }); const lowQualityContext = createMockContext({ evidence: ["Weak evidence"], constraints: [], goals: [], }); const highAssessment = await assessor.assessConfidence(highQualityContext); const lowAssessment = await assessor.assessConfidence(lowQualityContext); expect(highAssessment.overallConfidence).toBeGreaterThan(lowAssessment.overallConfidence); }); it("should include timestamp and processing time", async () => { const context = createMockContext(); const assessment = await assessor.assessConfidence(context); expect(assessment.timestamp).toBeInstanceOf(Date); expect(assessment.processingTime).toBeGreaterThan(0); }); }); describe("Evidence Quality Assessment", () => { it("should assess evidence quality between 0 and 1", async () => { const context = createMockContext({ evidence: ["Evidence 1", "Evidence 2"], }); const assessment = await assessor.assessConfidence(context); expect(assessment.evidenceQuality).toBeGreaterThanOrEqual(0); expect(assessment.evidenceQuality).toBeLessThanOrEqual(1); }); it("should give higher score for more evidence", async () => { const manyEvidenceContext = createMockContext({ evidence: ["E1", "E2", "E3", "E4", "E5"], }); const fewEvidenceContext = createMockContext({ evidence: ["E1"], }); const manyAssessment = await assessor.assessConfidence(manyEvidenceContext); const fewAssessment = await assessor.assessConfidence(fewEvidenceContext); expect(manyAssessment.evidenceQuality).toBeGreaterThan(fewAssessment.evidenceQuality); }); it("should handle empty evidence array", async () => { const context = createMockContext({ evidence: [], }); const assessment = await assessor.assessConfidence(context); expect(assessment.evidenceQuality).toBe(0); }); it("should not increase indefinitely with more evidence", async () => { const moderateContext = createMockContext({ evidence: Array(5).fill("Evidence"), }); const excessiveContext = createMockContext({ evidence: Array(50).fill("Evidence"), }); const moderateAssessment = await assessor.assessConfidence(moderateContext); const excessiveAssessment = await assessor.assessConfidence(excessiveContext); // Should plateau or have diminishing returns const difference = excessiveAssessment.evidenceQuality - moderateAssessment.evidenceQuality; expect(difference).toBeLessThan(0.3); // Not a huge difference }); }); describe("Reasoning Coherence Assessment", () => { it("should assess reasoning coherence between 0 and 1", async () => { const context = createMockContext(); const assessment = await assessor.assessConfidence(context); expect(assessment.reasoningCoherence).toBeGreaterThanOrEqual(0); expect(assessment.reasoningCoherence).toBeLessThanOrEqual(1); }); it("should give higher score for well-structured reasoning", async () => { const wellStructuredContext = createMockContext({ evidence: ["E1", "E2", "E3"], constraints: ["C1", "C2"], goals: ["G1", "G2"], }); const poorlyStructuredContext = createMockContext({ problem: { id: "test", description: "", context: "", }, evidence: [], constraints: [], goals: [], }); const wellAssessment = await assessor.assessConfidence(wellStructuredContext); const poorAssessment = await assessor.assessConfidence(poorlyStructuredContext); expect(wellAssessment.reasoningCoherence).toBeGreaterThan(poorAssessment.reasoningCoherence); }); }); describe("Completeness Assessment", () => { it("should assess completeness between 0 and 1", async () => { const context = createMockContext(); const assessment = await assessor.assessConfidence(context); expect(assessment.completeness).toBeGreaterThanOrEqual(0); expect(assessment.completeness).toBeLessThanOrEqual(1); }); it("should give higher score when goals are addressed", async () => { const completeContext = createMockContext({ goals: ["Goal 1", "Goal 2"], evidence: ["Evidence for goal 1", "Evidence for goal 2"], }); const incompleteContext = createMockContext({ goals: ["Goal 1", "Goal 2", "Goal 3"], evidence: ["Evidence for goal 1"], }); const completeAssessment = await assessor.assessConfidence(completeContext); const incompleteAssessment = await assessor.assessConfidence(incompleteContext); expect(completeAssessment.completeness).toBeGreaterThan(incompleteAssessment.completeness); }); it("should handle context with no goals", async () => { const context = createMockContext({ goals: [], }); const assessment = await assessor.assessConfidence(context); // Should still provide a completeness score expect(assessment.completeness).toBeGreaterThanOrEqual(0); expect(assessment.completeness).toBeLessThanOrEqual(1); }); }); describe("Uncertainty Classification", () => { it("should classify uncertainty type", async () => { const context = createMockContext(); const assessment = await assessor.assessConfidence(context); expect(assessment.uncertaintyType).toBeDefined(); expect(["epistemic", "aleatory", "ambiguity"]).toContain(assessment.uncertaintyType); }); it("should classify as epistemic when evidence is lacking", async () => { const context = createMockContext({ evidence: [], }); const assessment = await assessor.assessConfidence(context); expect(assessment.uncertaintyType).toBe("epistemic"); }); it("should classify as ambiguity when multiple interpretations exist", async () => { const context = createMockContext({ evidence: ["Evidence suggesting A", "Evidence suggesting B", "Evidence suggesting C"], problem: { id: "test", description: "Ambiguous problem with multiple valid solutions", context: "Multiple perspectives", }, }); const assessment = await assessor.assessConfidence(context); // Should detect ambiguity from multiple evidence items expect(["ambiguity", "epistemic"]).toContain(assessment.uncertaintyType); }); it("should provide uncertainty level between 0 and 1", async () => { const context = createMockContext(); const assessment = await assessor.assessConfidence(context); expect(assessment.uncertaintyLevel).toBeGreaterThanOrEqual(0); expect(assessment.uncertaintyLevel).toBeLessThanOrEqual(1); }); }); describe("Performance Requirements", () => { it("should complete assessment in less than 100ms", async () => { const context = createMockContext({ evidence: Array(10).fill("Evidence"), constraints: Array(5).fill("Constraint"), goals: Array(3).fill("Goal"), }); const startTime = performance.now(); await assessor.assessConfidence(context); const endTime = performance.now(); const duration = endTime - startTime; expect(duration).toBeLessThan(100); }); it("should handle large contexts efficiently", async () => { const largeContext = createMockContext({ evidence: Array(100).fill("Evidence"), constraints: Array(50).fill("Constraint"), goals: Array(20).fill("Goal"), }); const startTime = performance.now(); await assessor.assessConfidence(largeContext); const endTime = performance.now(); const duration = endTime - startTime; expect(duration).toBeLessThan(100); }); }); describe("Edge Cases", () => { it("should handle null evidence gracefully", async () => { const context = createMockContext({ evidence: null as any, }); const assessment = await assessor.assessConfidence(context); expect(assessment).toBeDefined(); expect(assessment.evidenceQuality).toBe(0); }); it("should handle undefined constraints", async () => { const context = createMockContext({ constraints: undefined as any, }); const assessment = await assessor.assessConfidence(context); expect(assessment).toBeDefined(); }); it("should handle empty problem description", async () => { const context = createMockContext({ problem: { id: "test", description: "", context: "", }, }); const assessment = await assessor.assessConfidence(context); expect(assessment).toBeDefined(); expect(assessment.overallConfidence).toBeGreaterThanOrEqual(0); }); it("should handle minimal context", async () => { const minimalContext: ReasoningContext = { problem: { id: "test", description: "Test", context: "", }, evidence: [], constraints: [], goals: [], }; const assessment = await assessor.assessConfidence(minimalContext); expect(assessment).toBeDefined(); expect(assessment.overallConfidence).toBeGreaterThanOrEqual(0); expect(assessment.overallConfidence).toBeLessThanOrEqual(1); }); }); describe("Confidence Factors", () => { it("should provide detailed factor breakdown", async () => { const context = createMockContext(); const assessment = await assessor.assessConfidence(context); expect(assessment.factors).toBeDefined(); expect(Array.isArray(assessment.factors)).toBe(true); expect(assessment.factors.length).toBeGreaterThan(0); }); it("should include all five dimensions in factors", async () => { const context = createMockContext(); const assessment = await assessor.assessConfidence(context); const dimensionNames = assessment.factors.map((f: any) => f.dimension); expect(dimensionNames).toContain("evidence"); expect(dimensionNames).toContain("coherence"); expect(dimensionNames).toContain("completeness"); expect(dimensionNames).toContain("uncertainty"); }); it("should provide explanations for each factor", async () => { const context = createMockContext(); const assessment = await assessor.assessConfidence(context); assessment.factors.forEach((factor: any) => { expect(factor.explanation).toBeDefined(); expect(typeof factor.explanation).toBe("string"); expect(factor.explanation.length).toBeGreaterThan(0); }); }); it("should include weights for each factor", async () => { const context = createMockContext(); const assessment = await assessor.assessConfidence(context); assessment.factors.forEach((factor: any) => { expect(factor.weight).toBeGreaterThan(0); expect(factor.weight).toBeLessThanOrEqual(1); }); // Weights should sum to approximately 1 const totalWeight = assessment.factors.reduce((sum: number, f: any) => sum + f.weight, 0); expect(totalWeight).toBeCloseTo(1, 1); }); }); describe("Integration with Reasoning Context", () => { it("should work with complete reasoning context", async () => { const context: ReasoningContext = { problem: { id: "test-problem", description: "Complex problem requiring analysis", context: "Important business decision", constraints: ["Budget limit", "Time constraint"], goals: ["Maximize ROI", "Minimize risk"], complexity: "complex", urgency: "high", }, evidence: ["Market research data", "Historical performance", "Expert opinions"], constraints: ["Budget limit", "Time constraint", "Resource limit"], goals: ["Maximize ROI", "Minimize risk", "Ensure quality"], framework: "systems-thinking", emotionalState: { valence: 0.5, arousal: 0.7, dominance: 0.3, }, }; const assessment = await assessor.assessConfidence(context); expect(assessment).toBeDefined(); expect(assessment.overallConfidence).toBeGreaterThan(0); }); it("should consider framework selection in assessment", async () => { const withFramework = createMockContext({ framework: "scientific-method", }); const withoutFramework = createMockContext({ framework: undefined, }); const withAssessment = await assessor.assessConfidence(withFramework); const withoutAssessment = await assessor.assessConfidence(withoutFramework); // Having a framework should generally increase confidence expect(withAssessment.overallConfidence).toBeGreaterThanOrEqual( withoutAssessment.overallConfidence ); }); it("should consider emotional state if present", async () => { const context = createMockContext({ emotionalState: { valence: -0.8, // Negative emotion arousal: 0.9, // High arousal dominance: 0.2, // Low dominance }, }); const assessment = await assessor.assessConfidence(context); // Should still provide valid assessment expect(assessment).toBeDefined(); expect(assessment.overallConfidence).toBeGreaterThanOrEqual(0); }); }); describe("Calibration Model", () => { it("should allow setting a calibration model", () => { const calibrationModel = { domain: "test", slope: 0.9, intercept: 0.05, sampleSize: 100, calibrationError: 0.05, lastUpdated: new Date(), }; assessor.setCalibrationModel(calibrationModel); const retrieved = assessor.getCalibrationModel(); expect(retrieved).toEqual(calibrationModel); }); it("should return undefined when no calibration model is set", () => { const retrieved = assessor.getCalibrationModel(); expect(retrieved).toBeUndefined(); }); it("should apply calibration when model is set", async () => { const context = createMockContext({ evidence: ["E1", "E2", "E3"], constraints: ["C1"], goals: ["G1"], }); // Get baseline confidence without calibration const baselineAssessment = await assessor.assessConfidence(context); const baselineConfidence = baselineAssessment.overallConfidence; // Set calibration model that adjusts scores const calibrationModel = { domain: "test", slope: 0.8, intercept: 0.1, sampleSize: 100, calibrationError: 0.05, lastUpdated: new Date(), }; assessor.setCalibrationModel(calibrationModel); // Get calibrated confidence const calibratedAssessment = await assessor.assessConfidence(context); const calibratedConfidence = calibratedAssessment.overallConfidence; // Calibrated confidence should be different from baseline // Expected: calibrated = 0.8 * baseline + 0.1 const expectedCalibrated = Math.min(1.0, Math.max(0, 0.8 * baselineConfidence + 0.1)); expect(calibratedConfidence).toBeCloseTo(expectedCalibrated, 2); }); it("should clamp calibrated confidence to valid range", async () => { const context = createMockContext({ evidence: Array(10).fill("Evidence"), constraints: Array(5).fill("Constraint"), goals: Array(3).fill("Goal"), }); // Set calibration model that could produce values outside [0, 1] const calibrationModel = { domain: "test", slope: 2.0, // Could push values above 1 intercept: 0.5, sampleSize: 100, calibrationError: 0.05, lastUpdated: new Date(), }; assessor.setCalibrationModel(calibrationModel); const assessment = await assessor.assessConfidence(context); // Should be clamped to valid range expect(assessment.overallConfidence).toBeGreaterThanOrEqual(0); expect(assessment.overallConfidence).toBeLessThanOrEqual(1); }); it("should handle negative calibration results", async () => { const context = createMockContext({ evidence: [], constraints: [], goals: [], }); // Set calibration model that could produce negative values const calibrationModel = { domain: "test", slope: 0.5, intercept: -0.5, // Could push values below 0 sampleSize: 100, calibrationError: 0.05, lastUpdated: new Date(), }; assessor.setCalibrationModel(calibrationModel); const assessment = await assessor.assessConfidence(context); // Should be clamped to 0 expect(assessment.overallConfidence).toBeGreaterThanOrEqual(0); }); }); describe("Explanation Quality", () => { it("should provide different explanations for different evidence scores", async () => { const contexts = [ createMockContext({ evidence: [] }), // Score 0 createMockContext({ evidence: ["E1"] }), // Low score createMockContext({ evidence: ["E1", "E2", "E3"] }), // Medium score createMockContext({ evidence: Array(10).fill("E") }), // High score ]; const assessments = await Promise.all(contexts.map((c) => assessor.assessConfidence(c))); // Each should have a unique explanation const explanations = assessments.map((a) => { const evidenceFactor = a.factors.find((f: any) => f.dimension === "evidence"); return evidenceFactor?.explanation; }); // All explanations should be defined and different expect(explanations.every((e) => e && e.length > 0)).toBe(true); expect(new Set(explanations).size).toBe(explanations.length); }); it("should provide different explanations for different coherence scores", async () => { const contexts = [ createMockContext({ problem: { id: "1", description: "", context: "" }, evidence: [], constraints: [], goals: [], }), // Low coherence createMockContext({ evidence: ["E1"], constraints: ["C1"], goals: ["G1"], }), // Medium coherence createMockContext({ evidence: ["E1", "E2", "E3"], constraints: ["C1", "C2"], goals: ["G1", "G2"], framework: "scientific-method", }), // High coherence ]; const assessments = await Promise.all(contexts.map((c) => assessor.assessConfidence(c))); // Each should have a unique explanation const explanations = assessments.map((a) => { const coherenceFactor = a.factors.find((f: any) => f.dimension === "coherence"); return coherenceFactor?.explanation; }); // All explanations should be defined expect(explanations.every((e) => e && e.length > 0)).toBe(true); }); it("should provide different explanations for different completeness scores", async () => { const contexts = [ createMockContext({ goals: [], evidence: [] }), // Low completeness createMockContext({ goals: ["G1", "G2"], evidence: ["E1"] }), // Medium completeness createMockContext({ goals: ["G1", "G2"], evidence: ["E1", "E2", "E3"] }), // High completeness ]; const assessments = await Promise.all(contexts.map((c) => assessor.assessConfidence(c))); // Each should have a unique explanation const explanations = assessments.map((a) => { const completenessFactor = a.factors.find((f: any) => f.dimension === "completeness"); return completenessFactor?.explanation; }); // All explanations should be defined expect(explanations.every((e) => e && e.length > 0)).toBe(true); }); it("should provide different explanations for different uncertainty levels", async () => { const contexts = [ createMockContext({ evidence: Array(10).fill("E") }), // Low uncertainty createMockContext({ evidence: ["E1", "E2"] }), // Medium uncertainty createMockContext({ evidence: [] }), // High uncertainty ]; const assessments = await Promise.all(contexts.map((c) => assessor.assessConfidence(c))); // Each should have a unique explanation const explanations = assessments.map((a) => { const uncertaintyFactor = a.factors.find((f: any) => f.dimension === "uncertainty"); return uncertaintyFactor?.explanation; }); // All explanations should be defined expect(explanations.every((e) => e && e.length > 0)).toBe(true); }); }); }); // Helper function to create mock reasoning contexts function createMockContext(overrides: Partial<ReasoningContext> = {}): ReasoningContext { const defaultProblem: Problem = { id: "test-problem", description: "Test problem description", context: "Test context", }; return { problem: overrides.problem || defaultProblem, evidence: overrides.evidence !== undefined ? overrides.evidence : ["Default evidence"], constraints: overrides.constraints !== undefined ? overrides.constraints : ["Default constraint"], goals: overrides.goals !== undefined ? overrides.goals : ["Default goal"], framework: overrides.framework, emotionalState: overrides.emotionalState, }; }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/keyurgolani/ThoughtMcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server