Skip to main content
Glama
haasonsaas

Deep Code Reasoning MCP Server

by haasonsaas
HypothesisTournamentService.ts25.1 kB
import type { ClaudeCodeContext, HypothesisDefinition, HypothesisExplorationResult, TournamentResult, TournamentRound, Evidence, Finding, Action, } from '../models/types.js'; import { ConversationalGeminiService } from './ConversationalGeminiService.js'; import { ConversationManager } from './ConversationManager.js'; import { SecureCodeReader } from '../utils/SecureCodeReader.js'; import { v4 as uuidv4 } from 'uuid'; interface TournamentConfig { maxHypotheses: number; maxRounds: number; eliminationThreshold: number; // Confidence below this gets eliminated parallelSessions: number; // Max concurrent conversations crossPollinationEnabled: boolean; } export class HypothesisTournamentService { private conversationalGemini: ConversationalGeminiService; private conversationManager: ConversationManager; private codeReader: SecureCodeReader; private config: TournamentConfig; constructor( geminiApiKey: string, config: Partial<TournamentConfig> = {}, ) { this.conversationalGemini = new ConversationalGeminiService(geminiApiKey); this.conversationManager = new ConversationManager(); this.codeReader = new SecureCodeReader(); this.config = { maxHypotheses: 6, maxRounds: 3, eliminationThreshold: 0.3, parallelSessions: 4, crossPollinationEnabled: true, ...config, }; } /** * Run a hypothesis tournament to find the root cause of an issue */ async runTournament( context: ClaudeCodeContext, issue: string, ): Promise<TournamentResult> { const startTime = Date.now(); // Generate initial hypotheses const hypotheses = await this.generateHypotheses(context, issue); const rounds: TournamentRound[] = []; let remainingHypotheses = [...hypotheses]; let allFindings: Finding[] = []; // Run tournament rounds for (let roundNum = 1; roundNum <= this.config.maxRounds && remainingHypotheses.length > 1; roundNum++) { const round = await this.runRound( roundNum, remainingHypotheses, context, issue, rounds, ); rounds.push(round); allFindings.push(...this.extractFindingsFromRound(round)); // Eliminate low-confidence hypotheses remainingHypotheses = round.results .filter(r => r.overallConfidence >= this.config.eliminationThreshold) .sort((a, b) => b.overallConfidence - a.overallConfidence) .slice(0, Math.ceil(remainingHypotheses.length / 2)) .map(r => r.hypothesis); // Share insights across sessions if enabled if (this.config.crossPollinationEnabled && remainingHypotheses.length > 1) { await this.crossPollinateInsights(round.results); } } // Determine winner and runner-up const finalResults = rounds[rounds.length - 1]?.results || []; const sortedResults = finalResults.sort((a, b) => b.overallConfidence - a.overallConfidence); const winner = sortedResults[0]; const runnerUp = sortedResults[1]; // Calculate metrics const duration = Date.now() - startTime; const sequentialTime = hypotheses.length * (duration / rounds.length); const parallelEfficiency = sequentialTime / duration; return { issue, totalHypotheses: hypotheses.length, rounds, winner, runnerUp, allFindings, recommendations: this.generateRecommendations(winner, runnerUp, allFindings), duration, parallelEfficiency, }; } /** * Generate initial hypotheses based on the issue description */ private async generateHypotheses( context: ClaudeCodeContext, issue: string, ): Promise<HypothesisDefinition[]> { // Read relevant code files const codeFiles = await this.codeReader.readCodeFiles(context.focusArea); // Use Gemini to generate hypotheses const sessionId = this.conversationManager.createSession(context); const prompt = `Given this issue: "${issue}" And considering: - Previous attempts: ${context.attemptedApproaches.join(', ')} - Partial findings: ${context.partialFindings.map(f => f.description).join(', ')} Generate ${this.config.maxHypotheses} distinct hypotheses for what might be causing this issue. For each hypothesis, provide: 1. A clear theory about the root cause 2. A specific approach to test it 3. A category (performance/bug/security/architecture/integration) 4. A priority score (0-1) based on likelihood Focus on diverse hypotheses that cover different aspects of the system.`; const { response } = await this.conversationalGemini.startConversation( sessionId, context, 'hypothesis_generation', codeFiles, prompt, ); // Parse hypotheses from response const hypotheses = this.parseHypothesesFromResponse(response); // Clean up generation session this.conversationManager.releaseLock(sessionId); return hypotheses.slice(0, this.config.maxHypotheses); } /** * Run a single tournament round */ private async runRound( roundNumber: number, hypotheses: HypothesisDefinition[], context: ClaudeCodeContext, issue: string, previousRounds: TournamentRound[], ): Promise<TournamentRound> { const roundStartTime = Date.now(); // Create sessions for each hypothesis const sessions = hypotheses.map(h => ({ hypothesis: h, sessionId: this.conversationManager.createSession({ ...context, stuckPoints: [...context.stuckPoints, `Testing: ${h.theory}`], }), })); // Read code files once for all sessions const codeFiles = await this.codeReader.readCodeFiles(context.focusArea); // Explore hypotheses in parallel (respecting parallelism limit) const results: HypothesisExplorationResult[] = []; for (let i = 0; i < sessions.length; i += this.config.parallelSessions) { const batch = sessions.slice(i, i + this.config.parallelSessions); const batchResults = await Promise.all( batch.map(({ hypothesis, sessionId }) => this.exploreHypothesis( sessionId, hypothesis, issue, codeFiles, roundNumber, previousRounds, ), ), ); results.push(...batchResults); } // Extract cross-hypothesis insights const insights = this.extractCrossHypothesisInsights(results); return { roundNumber, hypotheses, results, eliminatedHypotheses: hypotheses .filter(h => !results.find(r => r.hypothesis.id === h.id && r.overallConfidence >= this.config.eliminationThreshold)) .map(h => h.id), insights, }; } /** * Explore a single hypothesis through conversational analysis */ private async exploreHypothesis( sessionId: string, hypothesis: HypothesisDefinition, issue: string, codeFiles: Map<string, string>, roundNumber: number, previousRounds: TournamentRound[], ): Promise<HypothesisExplorationResult> { const evidence: Evidence[] = []; const keyInsights: string[] = []; let explorationDepth = 0; try { // Initial prompt based on round const initialPrompt = this.buildExplorationPrompt( hypothesis, issue, roundNumber, previousRounds, ); // Start the exploration const { response: initialResponse, suggestedFollowUps } = await this.conversationalGemini.startConversation( sessionId, this.conversationManager.getSession(sessionId)!.context, 'hypothesis_test', codeFiles, initialPrompt, ); explorationDepth++; // Extract initial evidence evidence.push(...this.extractEvidenceFromResponse(initialResponse, 'initial')); keyInsights.push(...this.extractInsightsFromResponse(initialResponse)); // Follow up based on initial findings if (suggestedFollowUps.length > 0 && roundNumber > 1) { const followUpPrompt = `Based on your initial analysis, please investigate: ${suggestedFollowUps[0]} Focus on finding concrete evidence that either supports or contradicts the hypothesis.`; const { response: followUpResponse } = await this.conversationalGemini.continueConversation( sessionId, followUpPrompt, true, ); explorationDepth++; evidence.push(...this.extractEvidenceFromResponse(followUpResponse, 'followup')); } // Try to reproduce the issue if we have enough confidence const currentConfidence = this.calculateConfidence(evidence); if (currentConfidence > 0.5) { const reproductionResponse = await this.attemptReproduction( sessionId, hypothesis, evidence, ); explorationDepth++; if (reproductionResponse.success) { evidence.push({ type: 'supporting', description: 'Successfully reproduced the issue', confidence: 0.9, discoveredAt: Date.now(), }); } } // Finalize and get structured results const finalResult = await this.conversationalGemini.finalizeConversation( sessionId, 'actionable', ); const relatedFindings = finalResult.findings.rootCauses .filter(rc => rc.confidence < 0.5) // Lower confidence findings might be unrelated .map(rc => ({ type: 'bug' as const, severity: 'medium' as const, location: rc.evidence[0] || { file: 'unknown', line: 0 }, description: rc.description, evidence: [rc.description], })); return { hypothesis, sessionId, evidence, overallConfidence: this.calculateConfidence(evidence), explorationDepth, keyInsights, relatedFindings: relatedFindings.length > 0 ? relatedFindings : undefined, }; } catch (error) { console.error(`Error exploring hypothesis ${hypothesis.id}:`, error); // Return low-confidence result on error return { hypothesis, sessionId, evidence: [{ type: 'contradicting', description: `Exploration failed: ${error instanceof Error ? error.message : 'Unknown error'}`, confidence: 0.1, discoveredAt: Date.now(), }], overallConfidence: 0.1, explorationDepth, keyInsights: ['Exploration encountered errors'], }; } } /** * Build exploration prompt based on round and previous findings */ private buildExplorationPrompt( hypothesis: HypothesisDefinition, issue: string, roundNumber: number, previousRounds: TournamentRound[], ): string { let prompt = `We're investigating this issue: "${issue}" Current hypothesis: "${hypothesis.theory}" Test approach: ${hypothesis.testApproach} Category: ${hypothesis.category} Please explore this hypothesis by: 1. Looking for evidence that supports or contradicts it 2. Examining the relevant code sections 3. Considering edge cases and boundary conditions 4. Checking for patterns that match the symptoms`; // Add context from previous rounds if (roundNumber > 1 && previousRounds.length > 0) { const eliminatedTheories = previousRounds .flatMap(r => r.eliminatedHypotheses) .map(id => previousRounds.flatMap(r => r.hypotheses).find(h => h.id === id)?.theory) .filter(Boolean); prompt += `\n\nPreviously eliminated theories:\n${eliminatedTheories.join('\n- ')}`; const previousInsights = previousRounds.flatMap(r => r.insights); if (previousInsights.length > 0) { prompt += `\n\nInsights from previous rounds:\n${previousInsights.join('\n- ')}`; } } return prompt; } /** * Extract evidence from AI response */ private extractEvidenceFromResponse(response: string, phase: string): Evidence[] { const evidence: Evidence[] = []; const timestamp = Date.now(); // Look for supporting evidence patterns const supportingPatterns = [ /confirm|validate|support|consistent with|aligns with|indicates/i, /found|discovered|identified|observed/i, ]; // Look for contradicting evidence patterns const contradictingPatterns = [ /contradict|disprove|inconsistent|rules out|unlikely/i, /no evidence|not found|absence of/i, ]; const lines = response.split('\n'); for (const line of lines) { const isSupporting = supportingPatterns.some(p => p.test(line)); const isContradicting = contradictingPatterns.some(p => p.test(line)); if (isSupporting || isContradicting) { // Extract code references if present const codeRef = line.match(/(\w+\.\w+):(\d+)/); evidence.push({ type: isSupporting ? 'supporting' : 'contradicting', description: line.trim(), location: codeRef ? { file: codeRef[1], line: parseInt(codeRef[2]) } : undefined, confidence: this.estimateConfidenceFromText(line), discoveredAt: timestamp, }); } } return evidence; } /** * Extract key insights from response */ private extractInsightsFromResponse(response: string): string[] { const insights: string[] = []; // Look for insight patterns const insightPatterns = [ /key finding:|important:|notable:|significant:/i, /this suggests|this indicates|this means/i, /pattern:|observation:/i, ]; const lines = response.split('\n'); for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (insightPatterns.some(p => p.test(line))) { // Get this line and potentially the next one let insight = line.trim(); if (i + 1 < lines.length && lines[i + 1].trim() && !insightPatterns.some(p => p.test(lines[i + 1]))) { insight += ' ' + lines[i + 1].trim(); } insights.push(insight); } } return insights; } /** * Attempt to reproduce the issue based on current evidence */ private async attemptReproduction( sessionId: string, hypothesis: HypothesisDefinition, currentEvidence: Evidence[], ): Promise<{ success: boolean; steps?: string[] }> { const prompt = `Based on the evidence we've gathered for the hypothesis "${hypothesis.theory}", can you provide specific steps to reproduce this issue? Current evidence: ${currentEvidence.filter(e => e.type === 'supporting').map(e => `- ${e.description}`).join('\n')} Please provide concrete reproduction steps if possible, or explain why reproduction isn't feasible.`; const { response } = await this.conversationalGemini.continueConversation( sessionId, prompt, false, ); // Simple check for reproduction success const successPatterns = /can be reproduced|reproduction steps:|to reproduce:|reproducible/i; const failurePatterns = /cannot reproduce|unable to reproduce|not reproducible/i; if (successPatterns.test(response)) { // Extract steps const steps = response .split('\n') .filter(line => /^\d+\.|^-|^step/i.test(line.trim())) .map(line => line.trim()); return { success: true, steps }; } return { success: false }; } /** * Calculate overall confidence from evidence */ private calculateConfidence(evidence: Evidence[]): number { if (evidence.length === 0) return 0; const weights = { supporting: 1, contradicting: -1, neutral: 0 }; const weightedSum = evidence.reduce( (sum, e) => sum + (weights[e.type] * e.confidence), 0, ); const maxPossible = evidence.reduce( (sum, e) => sum + Math.abs(weights[e.type] * e.confidence), 0, ); // Normalize to 0-1 range return maxPossible > 0 ? (weightedSum + maxPossible) / (2 * maxPossible) : 0.5; } /** * Estimate confidence from text content */ private estimateConfidenceFromText(text: string): number { const highConfidenceWords = /definitely|certainly|clearly|obviously|confirmed/i; const mediumConfidenceWords = /likely|probably|suggests|indicates|appears/i; const lowConfidenceWords = /possibly|might|could|maybe|uncertain/i; if (highConfidenceWords.test(text)) return 0.8 + Math.random() * 0.2; if (mediumConfidenceWords.test(text)) return 0.5 + Math.random() * 0.3; if (lowConfidenceWords.test(text)) return 0.2 + Math.random() * 0.3; return 0.5; // Default medium confidence } /** * Parse hypotheses from Gemini's response */ private parseHypothesesFromResponse(response: string): HypothesisDefinition[] { const hypotheses: HypothesisDefinition[] = []; // Look for numbered hypotheses const hypothesisBlocks = response.split(/\d+\.\s+/); for (let i = 1; i < hypothesisBlocks.length; i++) { const block = hypothesisBlocks[i]; // Extract theory (usually first line) const lines = block.split('\n').filter(l => l.trim()); if (lines.length === 0) continue; const theory = lines[0].replace(/theory:|hypothesis:/i, '').trim(); // Extract other fields const testApproach = this.extractField(block, /approach:|test:|method:/i) || 'Investigate through code analysis'; const category = this.extractCategory(block); const priority = this.extractPriority(block); hypotheses.push({ id: `h${i}`, theory, testApproach, category, priority, }); } // If no numbered format found, try other patterns if (hypotheses.length === 0) { // Fallback parsing logic const lines = response.split('\n'); let currentHypothesis: Partial<HypothesisDefinition> | null = null; for (const line of lines) { if (/hypothesis|theory/i.test(line) && !currentHypothesis) { currentHypothesis = { id: `h${hypotheses.length + 1}`, theory: line.replace(/.*?:/, '').trim(), category: 'bug', priority: 0.5, }; } else if (currentHypothesis && /approach|test|method/i.test(line)) { currentHypothesis.testApproach = line.replace(/.*?:/, '').trim(); hypotheses.push(currentHypothesis as HypothesisDefinition); currentHypothesis = null; } } } return hypotheses; } private extractField(text: string, pattern: RegExp): string | undefined { const match = text.match(new RegExp(`${pattern.source}\\s*(.+)`, 'i')); return match?.[1]?.trim(); } private extractCategory(text: string): HypothesisDefinition['category'] { if (/performance|slow|latency|speed/i.test(text)) return 'performance'; if (/security|vulnerability|exploit|injection/i.test(text)) return 'security'; if (/architecture|design|structure|pattern/i.test(text)) return 'architecture'; if (/integration|external|api|service/i.test(text)) return 'integration'; return 'bug'; } private extractPriority(text: string): number { const match = text.match(/priority:?\s*([\d.]+)|(\d+)%|likelihood:?\s*([\d.]+)/i); if (match) { const value = parseFloat(match[1] || match[2] || match[3]); return value > 1 ? value / 100 : value; } // Estimate from confidence words if (/high|likely|probable/i.test(text)) return 0.7 + Math.random() * 0.2; if (/medium|possible|moderate/i.test(text)) return 0.4 + Math.random() * 0.2; if (/low|unlikely|improbable/i.test(text)) return 0.1 + Math.random() * 0.2; return 0.5; } /** * Share insights between active sessions */ private async crossPollinateInsights(results: HypothesisExplorationResult[]): Promise<void> { // Find significant insights that could help other hypotheses const significantInsights = results .filter(r => r.overallConfidence > 0.6) .flatMap(r => r.keyInsights) .filter(insight => /pattern|common|related|connected|affects all|system-wide/i.test(insight), ); if (significantInsights.length === 0) return; // Share with lower-confidence hypotheses const strugglingHypotheses = results.filter(r => r.overallConfidence < 0.5); for (const result of strugglingHypotheses) { try { const prompt = `New insights from parallel investigations: ${significantInsights.join('\n- ')} Do any of these insights change your analysis of the hypothesis "${result.hypothesis.theory}"?`; await this.conversationalGemini.continueConversation( result.sessionId, prompt, false, ); } catch (error) { console.warn(`Failed to cross-pollinate to session ${result.sessionId}:`, error); } } } /** * Extract findings from a tournament round */ private extractFindingsFromRound(round: TournamentRound): Finding[] { return round.results .flatMap(r => r.relatedFindings || []) .filter((f, index, self) => // Deduplicate findings index === self.findIndex(other => other.description === f.description && other.location.file === f.location.file, ), ); } /** * Extract insights that span multiple hypotheses */ private extractCrossHypothesisInsights(results: HypothesisExplorationResult[]): string[] { const insights: string[] = []; // Find common patterns const allInsights = results.flatMap(r => r.keyInsights); const insightCounts = new Map<string, number>(); for (const insight of allInsights) { // Normalize for comparison const normalized = insight.toLowerCase().replace(/[^\w\s]/g, ''); insightCounts.set(normalized, (insightCounts.get(normalized) || 0) + 1); } // Insights that appear in multiple hypotheses for (const [insight, count] of insightCounts) { if (count >= 2) { const original = allInsights.find(i => i.toLowerCase().replace(/[^\w\s]/g, '') === insight, ); if (original) { insights.push(`Common finding across ${count} hypotheses: ${original}`); } } } // Contradictory findings const highConfidence = results.filter(r => r.overallConfidence > 0.7); const lowConfidence = results.filter(r => r.overallConfidence < 0.3); if (highConfidence.length > 0 && lowConfidence.length > 0) { insights.push( `Strong evidence for: ${highConfidence.map(r => r.hypothesis.theory).join(', ')}. ` + `Weak evidence for: ${lowConfidence.map(r => r.hypothesis.theory).join(', ')}.`, ); } return insights; } /** * Generate recommendations from tournament results */ private generateRecommendations( winner: HypothesisExplorationResult | undefined, runnerUp: HypothesisExplorationResult | undefined, allFindings: Finding[], ): TournamentResult['recommendations'] { const primary: Action[] = []; const secondary: Action[] = []; if (winner && winner.overallConfidence > 0.7) { // High confidence winner primary.push({ type: 'fix', description: `Address root cause: ${winner.hypothesis.theory}`, priority: 'critical', estimatedEffort: '2-4 hours', }); // Add reproduction steps if available if (winner.reproductionSteps) { primary.push({ type: 'investigate', description: 'Verify issue using reproduction steps', priority: 'high', estimatedEffort: '30 minutes', }); } } else if (winner) { // Lower confidence winner primary.push({ type: 'investigate', description: `Further investigate: ${winner.hypothesis.theory}`, priority: 'high', estimatedEffort: '1-2 hours', }); } if (runnerUp && runnerUp.overallConfidence > 0.5) { secondary.push({ type: 'investigate', description: `Also consider: ${runnerUp.hypothesis.theory}`, priority: 'medium', estimatedEffort: '1-2 hours', }); } // Add recommendations for serendipitous findings const criticalFindings = allFindings.filter(f => f.severity === 'critical' || f.severity === 'high'); for (const finding of criticalFindings) { secondary.push({ type: 'fix', description: `Unrelated issue found: ${finding.description}`, priority: finding.severity === 'critical' ? 'high' : 'medium', estimatedEffort: '1-3 hours', }); } // Add monitoring if performance-related if (winner?.hypothesis.category === 'performance') { primary.push({ type: 'monitor', description: 'Set up performance monitoring for affected areas', priority: 'medium', estimatedEffort: '1 hour', }); } return { primary, secondary }; } }

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/haasonsaas/deep-code-reasoning-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server