Elenchus MCP Server

Overview Schema Related Servers Score Discussions

elenchus-mcp
src
llm-evaluators

edge-case-evaluator.ts•7.84 KiB

/** * LLM-based Edge Case Evaluator * * Replaces keyword detection with actual analysis of whether * edge cases were genuinely examined and adequately covered. */ import type { SamplingFunction, EdgeCaseEvaluation, EvaluatorContext, LLMEvaluatorConfig, } from './types.js'; import { DEFAULT_EVALUATOR_CONFIG } from './types.js'; const EDGE_CASE_SYSTEM_PROMPT = `You are an expert at evaluating test coverage and edge case analysis. Your task is to assess whether edge cases were GENUINELY analyzed, not just mentioned. TRUE edge case analysis includes: 1. Identifying specific scenarios that could cause failures 2. Explaining WHY each scenario is problematic 3. Describing expected vs actual behavior 4. Proposing mitigations or test cases FALSE edge case analysis (just mentioning): - "Edge cases were considered ✓" - "Boundary conditions checked" - Listing categories without specific scenarios - Using keywords without actual analysis Categories to evaluate: - NULL/UNDEFINED: Empty inputs, missing parameters - BOUNDARY: Min/max values, overflow/underflow - CONCURRENCY: Race conditions, deadlocks - ERROR PATHS: Exception handling, failure modes - DATA INTEGRITY: Type mismatches, encoding issues - RESOURCE LIMITS: Memory, file handles, connections - TIMING: Timeouts, delays, async operations - SECURITY: Injection, XSS, privilege escalation Respond with a JSON evaluation.`; const EDGE_CASE_EVALUATION_PROMPT = `Evaluate the edge case coverage in this verification: ## Target Files {targetFiles} ## Code Type/Domain {projectType} ## Verification Outputs {verificationOutputs} --- Analyze: 1. Were edge cases GENUINELY analyzed or just mentioned? 2. What specific edge cases were examined with actual reasoning? 3. What important edge cases are MISSING for this type of code? 4. What's the overall coverage quality (0-100)? Important: Look for EVIDENCE of actual analysis, not just keywords! Respond with ONLY a valid JSON object: { "passed": boolean, "confidence": "high" | "medium" | "low" | "uncertain", "reasoning": "detailed explanation of coverage quality", "evidence": ["specific evidence of actual edge case analysis"], "analyzedCases": [ { "description": "specific edge case that was actually analyzed", "category": "NULL|BOUNDARY|CONCURRENCY|ERROR|DATA|RESOURCE|TIMING|SECURITY", "adequatelyHandled": boolean } ], "missingCases": [ { "description": "specific edge case that should be considered", "category": "category name", "importance": "critical" | "important" | "nice-to-have" } ], "coverageScore": number (0-100) }`; /** * Evaluate edge case coverage using LLM reasoning */ export async function evaluateEdgeCasesLLM( context: EvaluatorContext, samplingFn: SamplingFunction, config: LLMEvaluatorConfig = DEFAULT_EVALUATOR_CONFIG ): Promise<EdgeCaseEvaluation> { const prompt = EDGE_CASE_EVALUATION_PROMPT .replace('{targetFiles}', context.targetFiles.join('\n')) .replace('{projectType}', context.projectType || 'general') .replace('{verificationOutputs}', truncateForContext(context.allOutputs, 6000)); try { const response = await samplingFn({ messages: [{ role: 'user', content: { type: 'text', text: prompt } }], systemPrompt: EDGE_CASE_SYSTEM_PROMPT, maxTokens: config.maxTokens, temperature: config.temperature, }); const content = typeof response.content === 'string' ? response.content : response.content.text; return parseEdgeCaseResponse(content); } catch (error) { return { passed: false, confidence: 'low', reasoning: `LLM evaluation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, evidence: [], analyzedCases: [], missingCases: [{ description: 'Unable to evaluate edge cases - LLM unavailable', category: 'UNKNOWN', importance: 'critical', }], coverageScore: 0, }; } } /** * Get recommended edge cases for a specific code type */ export async function getRecommendedEdgeCases( codeType: string, filePaths: string[], samplingFn: SamplingFunction, config: LLMEvaluatorConfig = DEFAULT_EVALUATOR_CONFIG ): Promise<Array<{ description: string; category: string; importance: string }>> { const prompt = `For code of type "${codeType}" with files: ${filePaths.slice(0, 10).join('\n')} List the most important edge cases to verify. Focus on: - Domain-specific failure modes - Common pitfalls for this code type - Security-relevant edge cases Respond with ONLY a JSON array: [ { "description": "specific scenario", "category": "category", "importance": "critical|important|nice-to-have" } ]`; try { const response = await samplingFn({ messages: [{ role: 'user', content: { type: 'text', text: prompt } }], systemPrompt: 'You are an expert at identifying edge cases for different types of code.', maxTokens: 1500, temperature: config.temperature, }); const content = typeof response.content === 'string' ? response.content : response.content.text; const jsonMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/) || content.match(/(\[[\s\S]*\])/); if (jsonMatch) { return JSON.parse(jsonMatch[1].trim()); } return []; } catch { return []; } } function parseEdgeCaseResponse(content: string): EdgeCaseEvaluation { try { const jsonMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/) || content.match(/(\{[\s\S]*\})/); if (!jsonMatch) { throw new Error('No JSON found in response'); } const parsed = JSON.parse(jsonMatch[1].trim()); return { passed: Boolean(parsed.passed), confidence: validateConfidence(parsed.confidence), reasoning: String(parsed.reasoning || 'No reasoning provided'), evidence: Array.isArray(parsed.evidence) ? parsed.evidence : [], analyzedCases: validateAnalyzedCases(parsed.analyzedCases), missingCases: validateMissingCases(parsed.missingCases), coverageScore: Math.min(100, Math.max(0, Number(parsed.coverageScore) || 0)), }; } catch (error) { return { passed: false, confidence: 'low', reasoning: `Failed to parse response: ${error instanceof Error ? error.message : 'Parse error'}`, evidence: [], analyzedCases: [], missingCases: [], coverageScore: 0, }; } } function validateConfidence(value: unknown): 'high' | 'medium' | 'low' | 'uncertain' { if (['high', 'medium', 'low', 'uncertain'].includes(String(value))) { return value as 'high' | 'medium' | 'low' | 'uncertain'; } return 'uncertain'; } function validateAnalyzedCases(cases: unknown): EdgeCaseEvaluation['analyzedCases'] { if (!Array.isArray(cases)) return []; return cases .filter((c): c is Record<string, unknown> => c && typeof c === 'object') .map(c => ({ description: String(c.description || 'Unknown'), category: String(c.category || 'UNKNOWN'), adequatelyHandled: Boolean(c.adequatelyHandled), })); } function validateMissingCases(cases: unknown): EdgeCaseEvaluation['missingCases'] { if (!Array.isArray(cases)) return []; return cases .filter((c): c is Record<string, unknown> => c && typeof c === 'object') .map(c => ({ description: String(c.description || 'Unknown'), category: String(c.category || 'UNKNOWN'), importance: validateImportance(c.importance), })); } function validateImportance(val: unknown): 'critical' | 'important' | 'nice-to-have' { if (['critical', 'important', 'nice-to-have'].includes(String(val))) { return val as 'critical' | 'important' | 'nice-to-have'; } return 'important'; } function truncateForContext(text: string, maxLength: number): string { if (text.length <= maxLength) return text; const half = Math.floor(maxLength / 2); return text.slice(0, half) + '\n\n... [truncated] ...\n\n' + text.slice(-half); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jhlee0409/elenchus-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

edge-case-evaluator.ts•7.84 KiB