mcp-reasoner

MIT License
143
Overview InspectNew Schema Related Servers Reviews Score
import { v4 as uuidv4 } from 'uuid';
import { ThoughtNode, ReasoningRequest, ReasoningResponse, CONFIG } from '../../types.js';
import { MCTS002AlphaStrategy } from './mcts-002-alpha.js';

// Queue implementation for bidirectional search
class Queue<T> {
  private items: T[];

  constructor() {
    this.items = [];
  }

  enqueue(item: T): void {
    this.items.push(item);
  }

  dequeue(): T | undefined {
    return this.items.shift();
  }

  isEmpty(): boolean {
    return this.items.length === 0;
  }

  size(): number {
    return this.items.length;
  }
}

// Extend PolicyGuidedNode to include both A* and bidirectional properties
interface BidirectionalPolicyNode extends ThoughtNode {
  visits: number;
  totalReward: number;
  untriedActions?: string[];
  g: number;  // A* cost from start
  h: number;  // A* heuristic to goal
  f: number;  // A* f = g + h
  policyScore: number;
  valueEstimate: number;
  priorActionProbs: Map<string, number>;
  puct?: number;
  actionHistory?: string[];
  noveltyScore?: number;
  parent?: string;  // For path reconstruction
  direction?: 'forward' | 'backward';
  searchDepth?: number;
  meetingPoint?: boolean;
}

export class MCTS002AltAlphaStrategy extends MCTS002AlphaStrategy {
  private startNode: BidirectionalPolicyNode | null = null;
  private goalNode: BidirectionalPolicyNode | null = null;
  private bidirectionalStats: {
    forwardExplorationRate: number;
    backwardExplorationRate: number;
    meetingPoints: number;
    pathQuality: number;
  };

  constructor(stateManager: any, numSimulations: number = CONFIG.numSimulations) {
    super(stateManager, numSimulations);
    this.bidirectionalStats = {
      forwardExplorationRate: Math.sqrt(2),
      backwardExplorationRate: Math.sqrt(2),
      meetingPoints: 0,
      pathQuality: 0
    };
  }

  public async processThought(request: ReasoningRequest): Promise<ReasoningResponse> {
    // Get base response first to ensure proper MCTS initialization
    const baseResponse = await super.processThought(request);

    const nodeId = uuidv4();
    const parentNode = request.parentId ? 
      await this.getNode(request.parentId) as BidirectionalPolicyNode : undefined;

    const node: BidirectionalPolicyNode = {
      id: nodeId,
      thought: request.thought,
      depth: request.thoughtNumber - 1,
      score: 0,
      children: [],
      parentId: request.parentId,
      isComplete: !request.nextThoughtNeeded,
      visits: 0,
      totalReward: 0,
      untriedActions: [],
      g: parentNode ? parentNode.g + 1 : 0,
      h: 0,
      f: 0,
      policyScore: 0,
      valueEstimate: 0,
      priorActionProbs: new Map(),
      actionHistory: parentNode ? 
        [...(parentNode.actionHistory || []), this.getActionKey(request.thought)] :
        [this.getActionKey(request.thought)],
      searchDepth: 0,
      direction: parentNode ? parentNode.direction : 'forward'
    };

    // Track start and goal nodes for bidirectional search
    if (!parentNode) {
      this.startNode = node;
      node.direction = 'forward';
    }
    if (node.isComplete) {
      this.goalNode = node;
      node.direction = 'backward';
    }

    // Run bidirectional search if we have both endpoints
    if (this.startNode && this.goalNode) {
      const path = await this.bidirectionalSearch(this.startNode, this.goalNode);
      if (path.length > 0) {
        await this.updatePathWithPolicyGuidance(path);
      }
    }

    // Calculate enhanced path statistics
    const currentPath = await this.stateManager.getPath(nodeId);
    const enhancedScore = this.calculateBidirectionalPolicyScore(currentPath);

    return {
      ...baseResponse,
      score: enhancedScore,
      bestScore: Math.max(baseResponse.bestScore || 0, enhancedScore)
    };
  }

  protected getActionKey(thought: string): string {
    // Simple action extraction based on first few words
    return thought.split(/\s+/).slice(0, 3).join('_').toLowerCase();
  }

  private async searchLevel(
    queue: Queue<BidirectionalPolicyNode>,
    visited: Map<string, BidirectionalPolicyNode>,
    otherVisited: Map<string, BidirectionalPolicyNode>,
    direction: 'forward' | 'backward'
  ): Promise<BidirectionalPolicyNode | null> {
    const levelSize = queue.size();
    
    for (let i = 0; i < levelSize; i++) {
      const current = queue.dequeue();
      if (!current) continue;

      // Check if we've found a meeting point
      if (otherVisited.has(current.id)) {
        current.meetingPoint = true;
        this.bidirectionalStats.meetingPoints++;
        await this.saveNode(current);
        return current;
      }

      // Get neighbors based on direction and policy scores
      const neighbors = direction === 'forward' ?
        await Promise.all(current.children.map(id => this.getNode(id))) :
        await Promise.all([current.parentId].filter((id): id is string => !!id).map(id => this.getNode(id)));

      const validNeighbors = neighbors.filter((n): n is BidirectionalPolicyNode => !!n)
        .sort((a, b) => b.policyScore - a.policyScore); // Use policy scores for neighbor selection

      for (const neighbor of validNeighbors) {
        if (!visited.has(neighbor.id)) {
          visited.set(neighbor.id, neighbor);
          neighbor.parent = current.id;
          neighbor.direction = direction;
          neighbor.searchDepth = (current.searchDepth || 0) + 1;
          await this.saveNode(neighbor);
          queue.enqueue(neighbor);
        }
      }
    }

    return null;
  }

  private async bidirectionalSearch(
    start: BidirectionalPolicyNode, 
    goal: BidirectionalPolicyNode
  ): Promise<BidirectionalPolicyNode[]> {
    const forwardQueue = new Queue<BidirectionalPolicyNode>();
    const backwardQueue = new Queue<BidirectionalPolicyNode>();
    const forwardVisited = new Map<string, BidirectionalPolicyNode>();
    const backwardVisited = new Map<string, BidirectionalPolicyNode>();
    
    forwardQueue.enqueue(start);
    backwardQueue.enqueue(goal);
    forwardVisited.set(start.id, start);
    backwardVisited.set(goal.id, goal);
    
    while (!forwardQueue.isEmpty() && !backwardQueue.isEmpty()) {
      // Search from both directions with policy guidance
      const meetingPoint = await this.searchLevel(
        forwardQueue, 
        forwardVisited, 
        backwardVisited,
        'forward'
      );
      
      if (meetingPoint) {
        const path = this.reconstructPath(
          meetingPoint, 
          forwardVisited, 
          backwardVisited
        );
        this.updateBidirectionalStats(path);
        return path;
      }

      const backMeetingPoint = await this.searchLevel(
        backwardQueue,
        backwardVisited,
        forwardVisited,
        'backward'
      );

      if (backMeetingPoint) {
        const path = this.reconstructPath(
          backMeetingPoint,
          forwardVisited,
          backwardVisited
        );
        this.updateBidirectionalStats(path);
        return path;
      }

      // Adapt exploration rates based on progress
      this.adaptBidirectionalExploration(forwardVisited, backwardVisited);
    }
    
    return [];
  }

  private reconstructPath(
    meetingPoint: BidirectionalPolicyNode,
    forwardVisited: Map<string, BidirectionalPolicyNode>,
    backwardVisited: Map<string, BidirectionalPolicyNode>
  ): BidirectionalPolicyNode[] {
    const path: BidirectionalPolicyNode[] = [meetingPoint];
    
    // Reconstruct forward path
    let current = meetingPoint;
    while (current.parent && forwardVisited.has(current.parent)) {
      current = forwardVisited.get(current.parent)!;
      path.unshift(current);
    }
    
    // Reconstruct backward path
    current = meetingPoint;
    while (current.parent && backwardVisited.has(current.parent)) {
      current = backwardVisited.get(current.parent)!;
      path.push(current);
    }
    
    return path;
  }

  private async updatePathWithPolicyGuidance(path: BidirectionalPolicyNode[]): Promise<void> {
    const pathBonus = 0.2;
    
    for (const node of path) {
      // Boost both policy and value estimates for nodes along the path
      node.policyScore += pathBonus;
      node.valueEstimate = (node.valueEstimate + 1) / 2;
      
      // Update action probabilities with path information
      if (node.parentId) {
        const parentNode = await this.getNode(node.parentId) as BidirectionalPolicyNode;
        const actionKey = this.getActionKey(node.thought);
        const currentProb = parentNode.priorActionProbs.get(actionKey) || 0;
        const newProb = Math.max(currentProb, 0.8); // Strong preference for path actions
        parentNode.priorActionProbs.set(actionKey, newProb);
        await this.saveNode(parentNode);
      }

      await this.saveNode(node);
    }

    // Update path quality metric
    this.bidirectionalStats.pathQuality = path.reduce((acc, node) => 
      acc + node.policyScore + node.valueEstimate, 0) / (path.length * 2);
  }

  private adaptBidirectionalExploration(
    forwardVisited: Map<string, BidirectionalPolicyNode>,
    backwardVisited: Map<string, BidirectionalPolicyNode>
  ): void {
    // Adjust exploration rates based on search progress
    const forwardProgress = Array.from(forwardVisited.values())
      .reduce((acc, node) => acc + node.policyScore, 0) / forwardVisited.size;
    const backwardProgress = Array.from(backwardVisited.values())
      .reduce((acc, node) => acc + node.policyScore, 0) / backwardVisited.size;

    // Increase exploration in the direction making less progress
    if (forwardProgress > backwardProgress) {
      this.bidirectionalStats.backwardExplorationRate *= 1.05;
      this.bidirectionalStats.forwardExplorationRate *= 0.95;
    } else {
      this.bidirectionalStats.forwardExplorationRate *= 1.05;
      this.bidirectionalStats.backwardExplorationRate *= 0.95;
    }
  }

  private updateBidirectionalStats(path: BidirectionalPolicyNode[]): void {
    const forwardNodes = path.filter(n => n.direction === 'forward');
    const backwardNodes = path.filter(n => n.direction === 'backward');
    
    // Update exploration rates based on path composition
    const forwardQuality = forwardNodes.reduce((acc, n) => acc + n.policyScore, 0) / forwardNodes.length;
    const backwardQuality = backwardNodes.reduce((acc, n) => acc + n.policyScore, 0) / backwardNodes.length;
    
    this.bidirectionalStats.pathQuality = (forwardQuality + backwardQuality) / 2;
  }

  private calculateBidirectionalPolicyScore(path: ThoughtNode[]): number {
    if (path.length === 0) return 0;
    
    return path.reduce((acc, node) => {
      const biNode = node as BidirectionalPolicyNode;
      const baseScore = node.score;
      const policyBonus = biNode.policyScore || 0;
      const valueBonus = biNode.valueEstimate || 0;
      const meetingPointBonus = biNode.meetingPoint ? 0.2 : 0;
      const directionBonus = biNode.direction === 'forward' ? 
        this.bidirectionalStats.forwardExplorationRate * 0.1 :
        this.bidirectionalStats.backwardExplorationRate * 0.1;
      
      return acc + (
        baseScore + 
        policyBonus + 
        valueBonus + 
        meetingPointBonus +
        directionBonus
      ) / 5;
    }, 0) / path.length;
  }

  public async getMetrics(): Promise<any> {
    const baseMetrics = await super.getMetrics();
    const nodes = await this.stateManager.getAllNodes() as BidirectionalPolicyNode[];
    
    const forwardNodes = nodes.filter(n => n.direction === 'forward');
    const backwardNodes = nodes.filter(n => n.direction === 'backward');
    const meetingPoints = nodes.filter(n => n.meetingPoint);

    const bidirectionalMetrics = {
      forwardSearch: {
        nodesExplored: forwardNodes.length,
        averagePolicyScore: forwardNodes.reduce((sum, n) => sum + n.policyScore, 0) / forwardNodes.length,
        explorationRate: this.bidirectionalStats.forwardExplorationRate
      },
      backwardSearch: {
        nodesExplored: backwardNodes.length,
        averagePolicyScore: backwardNodes.reduce((sum, n) => sum + n.policyScore, 0) / backwardNodes.length,
        explorationRate: this.bidirectionalStats.backwardExplorationRate
      },
      meetingPoints: {
        count: this.bidirectionalStats.meetingPoints,
        averageDepth: meetingPoints.reduce((sum, n) => sum + n.depth, 0) / (meetingPoints.length || 1)
      },
      pathQuality: this.bidirectionalStats.pathQuality
    };

    return {
      ...baseMetrics,
      name: 'MCTS-002Alt-Alpha (Bidirectional + Policy Enhanced)',
      hasStartNode: !!this.startNode,
      hasGoalNode: !!this.goalNode,
      bidirectionalMetrics
    };
  }

  public async clear(): Promise<void> {
    await super.clear();
    this.startNode = null;
    this.goalNode = null;
    this.bidirectionalStats = {
      forwardExplorationRate: Math.sqrt(2),
      backwardExplorationRate: Math.sqrt(2),
      meetingPoints: 0,
      pathQuality: 0
    };
  }
}