learning-service.tsβ’20.9 kB
import { SemanticEngine } from '../engines/semantic-engine.js';
import { PatternEngine } from '../engines/pattern-engine.js';
import { SQLiteDatabase } from '../storage/sqlite-db.js';
import { SemanticVectorDB } from '../storage/vector-db.js';
import { config } from '../config/config.js';
import { nanoid } from 'nanoid';
export interface LearningResult {
  success: boolean;
  conceptsLearned: number;
  patternsLearned: number;
  featuresLearned: number;
  insights: string[];
  timeElapsed: number;
  blueprint?: {
    techStack: string[];
    entryPoints: Record<string, string>;
    keyDirectories: Record<string, string>;
    architecture: string;
  };
}
export interface LearningOptions {
  force?: boolean;
  progressCallback?: (current: number, total: number, message: string) => void;
}
/**
 * Shared learning service used by both CLI and MCP tools
 * Ensures consistent behavior across all interfaces
 */
export class LearningService {
  /**
   * Learn from a codebase - shared implementation for CLI and MCP
   */
  static async learnFromCodebase(
    path: string,
    options: LearningOptions = {}
  ): Promise<LearningResult> {
    const startTime = Date.now();
    const insights: string[] = [];
    // Create project-specific database and engines
    const projectDbPath = config.getDatabasePath(path);
    const projectDatabase = new SQLiteDatabase(projectDbPath);
    const projectVectorDB = new SemanticVectorDB(process.env.OPENAI_API_KEY);
    const projectSemanticEngine = new SemanticEngine(projectDatabase, projectVectorDB);
    const projectPatternEngine = new PatternEngine(projectDatabase);
    try {
      console.error(`ποΈ Using project database: ${projectDbPath}`);
      // Check if already learned and not forcing re-learn
      if (!options.force) {
        const existingIntelligence = await this.checkExistingIntelligence(projectDatabase, path);
        if (existingIntelligence && existingIntelligence.concepts > 0) {
          return {
            success: true,
            conceptsLearned: existingIntelligence.concepts,
            patternsLearned: existingIntelligence.patterns,
            featuresLearned: existingIntelligence.features,
            insights: ['Using existing intelligence (use force: true to re-learn)'],
            timeElapsed: Date.now() - startTime
          };
        }
      }
      // Phase 1: Comprehensive codebase analysis
      insights.push('π Phase 1: Analyzing codebase structure...');
      const codebaseAnalysis = await projectSemanticEngine.analyzeCodebase(path);
      insights.push(`   β
 Detected languages: ${codebaseAnalysis.languages?.join(', ') || 'unknown'}`);
      insights.push(`   β
 Found frameworks: ${codebaseAnalysis.frameworks?.join(', ') || 'none detected'}`);
      if (codebaseAnalysis.complexity) {
        insights.push(`   β
 Complexity: ${codebaseAnalysis.complexity.cyclomatic?.toFixed(1) || 'N/A'} cyclomatic, ${codebaseAnalysis.complexity.cognitive?.toFixed(1) || 'N/A'} cognitive`);
      }
      // Phase 2: Deep semantic learning
      insights.push('π§  Phase 2: Learning semantic concepts...');
      const concepts = await projectSemanticEngine.learnFromCodebase(path, options.progressCallback);
      // Analyze concept distribution
      const conceptTypes = concepts.reduce((acc, concept) => {
        const type = concept.type || 'unknown';
        acc[type] = (acc[type] || 0) + 1;
        return acc;
      }, {} as Record<string, number>);
      insights.push(`   β
 Extracted ${concepts.length} semantic concepts:`);
      Object.entries(conceptTypes).forEach(([type, count]) => {
        insights.push(`     - ${count} ${type}${count > 1 ? 's' : ''}`);
      });
      // Phase 3: Pattern discovery and learning
      insights.push('π Phase 3: Discovering coding patterns...');
      const patterns = await projectPatternEngine.learnFromCodebase(path, options.progressCallback);
      // Analyze pattern distribution
      const patternTypes = patterns.reduce((acc, pattern) => {
        const patternType = pattern.type || 'unknown';
        const category = patternType.split('_')[0];
        acc[category] = (acc[category] || 0) + 1;
        return acc;
      }, {} as Record<string, number>);
      insights.push(`   β
 Identified ${patterns.length} coding patterns:`);
      Object.entries(patternTypes).forEach(([category, count]) => {
        insights.push(`     - ${count} ${category} pattern${count > 1 ? 's' : ''}`);
      });
      // Phase 4: Relationship and dependency analysis
      insights.push('π Phase 4: Analyzing relationships and dependencies...');
      const relationships = await this.analyzeCodebaseRelationships(concepts, patterns);
      insights.push(`   β
 Built ${relationships.conceptRelationships} concept relationships`);
      insights.push(`   β
 Identified ${relationships.dependencyPatterns} dependency patterns`);
      // Phase 5: Intelligence synthesis and storage
      insights.push('πΎ Phase 5: Synthesizing and storing intelligence...');
      await this.storeIntelligence(projectDatabase, path, concepts, patterns);
      // Generate learning insights
      const learningInsights = await this.generateLearningInsights(concepts, patterns, codebaseAnalysis);
      insights.push('π― Learning Summary:');
      learningInsights.forEach(insight => insights.push(`   ${insight}`));
      // Phase 6: Vector embeddings for semantic search
      insights.push('π Phase 6: Building semantic search index...');
      const vectorCount = await this.buildSemanticIndex(projectVectorDB, concepts, patterns);
      insights.push(`   β
 Created ${vectorCount} vector embeddings for semantic search`);
      // Phase 6.5: Ensure project metadata exists (required for foreign keys)
      insights.push('π Phase 6.5: Ensuring project metadata...');
      const existingMetadata = projectDatabase.getProjectMetadata(path);
      if (!existingMetadata) {
        projectDatabase.insertProjectMetadata({
          projectId: nanoid(),
          projectPath: path,
          projectName: path.split('/').pop() || 'unknown',
          languagePrimary: codebaseAnalysis.languages?.[0],
          languagesDetected: codebaseAnalysis.languages || [],
          frameworkDetected: codebaseAnalysis.frameworks || [],
          intelligenceVersion: '0.5.5',
          lastFullScan: new Date()
        });
        insights.push(`   β
 Created project metadata for ${path}`);
      } else {
        insights.push(`   β
 Project metadata already exists`);
      }
      // Phase 7: Feature mapping for intelligent file routing
      insights.push('πΊοΈ  Phase 7: Building feature map for file routing...');
      const featureMaps = await projectPatternEngine.buildFeatureMap(path);
      // Debug: Log what we got from buildFeatureMap
      if (featureMaps.length > 0) {
        insights.push(`   π Debug: Received ${featureMaps.length} feature maps from buildFeatureMap`);
        const sample = featureMaps[0];
        insights.push(`   π Debug: Sample feature map: id=${sample?.id}, featureName=${sample?.featureName}, primaryFiles=${sample?.primaryFiles?.length || 0}`);
      }
      // Store feature maps in database (filter out invalid ones)
      let validFeatureMaps = 0;
      let skippedDueToMissingId = 0;
      let skippedDueToMissingName = 0;
      for (const featureMap of featureMaps) {
        // Skip feature maps with missing required fields
        if (!featureMap.id) {
          skippedDueToMissingId++;
          continue;
        }
        if (!featureMap.featureName) {
          skippedDueToMissingName++;
          continue;
        }
        projectDatabase.insertFeatureMap({
          id: featureMap.id,
          projectPath: path,
          featureName: featureMap.featureName,
          primaryFiles: featureMap.primaryFiles || [],
          relatedFiles: featureMap.relatedFiles || [],
          dependencies: featureMap.dependencies || [],
          status: 'active'
        });
        validFeatureMaps++;
      }
      const totalFiles = featureMaps.reduce((sum, fm) => sum + (fm.primaryFiles?.length || 0) + (fm.relatedFiles?.length || 0), 0);
      const totalSkipped = skippedDueToMissingId + skippedDueToMissingName;
      if (totalSkipped > 0) {
        insights.push(`   β οΈ  Skipped ${totalSkipped} invalid feature maps (${skippedDueToMissingId} missing ID, ${skippedDueToMissingName} missing name)`);
      }
      insights.push(`   β
 Mapped ${validFeatureMaps} features to ${totalFiles} files`);
      // Phase 8: Store blueprint data
      insights.push('πΎ Phase 8: Storing project blueprint...');
      await this.storeProjectBlueprint(path, codebaseAnalysis, projectDatabase);
      const timeElapsed = Date.now() - startTime;
      insights.push(`β‘ Learning completed in ${timeElapsed}ms`);
      // Build blueprint summary
      const blueprint = {
        techStack: codebaseAnalysis.frameworks || [],
        entryPoints: (codebaseAnalysis.entryPoints || []).reduce((acc, ep) => {
          acc[ep.type] = ep.filePath;
          return acc;
        }, {} as Record<string, string>),
        keyDirectories: (codebaseAnalysis.keyDirectories || []).reduce((acc, dir) => {
          acc[dir.type] = dir.path;
          return acc;
        }, {} as Record<string, string>),
        architecture: this.inferArchitecturePattern(codebaseAnalysis)
      };
      return {
        success: true,
        conceptsLearned: concepts.length,
        patternsLearned: patterns.length,
        featuresLearned: validFeatureMaps,
        insights,
        timeElapsed,
        blueprint
      };
    } catch (error) {
      return {
        success: false,
        conceptsLearned: 0,
        patternsLearned: 0,
        featuresLearned: 0,
        insights: [`β Learning failed: ${error instanceof Error ? error.message : error}`],
        timeElapsed: Date.now() - startTime
      };
    } finally {
      // Clean up project-specific resources
      if (projectSemanticEngine) {
        projectSemanticEngine.cleanup();
      }
      if (projectVectorDB) {
        try {
          await projectVectorDB.close();
        } catch (error) {
          console.warn('Warning: Failed to close project vector database:', error);
        }
      }
      if (projectDatabase) {
        projectDatabase.close();
      }
    }
  }
  private static async checkExistingIntelligence(
    database: SQLiteDatabase,
    path: string
  ): Promise<{ concepts: number; patterns: number; features: number } | null> {
    const concepts = database.getSemanticConcepts().length;
    const patterns = database.getDeveloperPatterns().length;
    const features = database.getFeatureMaps(path).length;
    if (concepts > 0 || patterns > 0) {
      return { concepts, patterns, features };
    }
    console.warn('β οΈ  No existing intelligence found in project database - starting fresh analysis');
    return null;
  }
  private static async storeIntelligence(
    database: SQLiteDatabase,
    path: string,
    concepts: any[],
    patterns: any[]
  ): Promise<void> {
    // Store concepts
    for (const concept of concepts) {
      database.insertSemanticConcept({
        id: concept.id,
        conceptName: concept.name,
        conceptType: concept.type,
        confidenceScore: concept.confidence,
        relationships: concept.relationships,
        evolutionHistory: {},
        filePath: concept.filePath,
        lineRange: concept.lineRange
      });
    }
    // Store patterns
    for (const pattern of patterns) {
      database.insertDeveloperPattern({
        patternId: pattern.id,
        patternType: pattern.type,
        patternContent: pattern.content,
        frequency: pattern.frequency,
        contexts: pattern.contexts,
        examples: pattern.examples,
        confidence: pattern.confidence
      });
    }
  }
  private static async analyzeCodebaseRelationships(
    concepts: any[],
    patterns: any[]
  ): Promise<{ conceptRelationships: number; dependencyPatterns: number }> {
    const conceptRelationships = new Set<string>();
    // Group concepts by file
    const conceptsByFile = concepts.reduce((acc, concept) => {
      const filePath = concept.filePath || concept.file_path || 'unknown';
      if (!acc[filePath]) acc[filePath] = [];
      acc[filePath].push(concept);
      return acc;
    }, {} as Record<string, any[]>);
    // Find relationships within files
    Object.values(conceptsByFile).forEach(fileConcepts => {
      if (Array.isArray(fileConcepts)) {
        for (let i = 0; i < fileConcepts.length; i++) {
          for (let j = i + 1; j < fileConcepts.length; j++) {
            const relationshipKey = `${fileConcepts[i].id}-${fileConcepts[j].id}`;
            conceptRelationships.add(relationshipKey);
          }
        }
      }
    });
    // Analyze dependency patterns
    const dependencyPatterns = new Set<string>();
    patterns.forEach(pattern => {
      const patternType = pattern.type || '';
      if (patternType.includes('dependency') ||
        patternType.includes('import') ||
        patternType.includes('organization')) {
        dependencyPatterns.add(pattern.id);
      }
    });
    return {
      conceptRelationships: conceptRelationships.size,
      dependencyPatterns: dependencyPatterns.size
    };
  }
  private static async generateLearningInsights(
    concepts: any[],
    patterns: any[],
    codebaseAnalysis: any
  ): Promise<string[]> {
    const insights: string[] = [];
    // Analyze codebase characteristics
    const totalLines = codebaseAnalysis?.complexity?.lines || 0;
    const conceptDensity = totalLines > 0 ? (concepts.length / totalLines * 1000).toFixed(2) : '0';
    if (totalLines > 0) {
      insights.push(`π Concept density: ${conceptDensity} concepts per 1000 lines`);
    }
    // Analyze pattern distribution
    const namingPatterns = patterns.filter(p => p.type?.includes('naming'));
    const structuralPatterns = patterns.filter(p => p.type?.includes('organization') || p.type?.includes('structure'));
    const implementationPatterns = patterns.filter(p => p.type?.includes('implementation'));
    if (namingPatterns.length > 0) {
      insights.push(`β¨ Strong naming conventions detected (${namingPatterns.length} patterns)`);
    }
    if (structuralPatterns.length > 0) {
      insights.push(`ποΈ Organized code structure found (${structuralPatterns.length} patterns)`);
    }
    if (implementationPatterns.length > 0) {
      insights.push(`βοΈ Design patterns in use (${implementationPatterns.length} patterns)`);
    }
    // Analyze complexity
    const complexity = codebaseAnalysis?.complexity;
    if (complexity && typeof complexity.cyclomatic === 'number') {
      if (complexity.cyclomatic < 10) {
        insights.push('π’ Low complexity codebase - easy to maintain');
      } else if (complexity.cyclomatic < 30) {
        insights.push('π‘ Moderate complexity - consider refactoring high-complexity areas');
      } else {
        insights.push('π΄ High complexity detected - refactoring recommended');
      }
    }
    // Analyze language and framework usage
    const languages = codebaseAnalysis?.languages || [];
    const frameworks = codebaseAnalysis?.frameworks || [];
    if (languages.length === 1) {
      insights.push(`π― Single-language codebase (${languages[0]}) - consistent technology stack`);
    } else if (languages.length > 1) {
      insights.push(`π Multi-language codebase (${languages.join(', ')}) - consider integration patterns`);
    }
    if (frameworks.length > 0) {
      insights.push(`π§ Framework usage: ${frameworks.join(', ')}`);
    }
    return insights;
  }
  private static async buildSemanticIndex(
    vectorDB: SemanticVectorDB,
    concepts: any[],
    patterns: any[]
  ): Promise<number> {
    try {
      await vectorDB.initialize('in-memoria-intelligence');
      let vectorCount = 0;
      // Create embeddings for semantic concepts
      for (const concept of concepts) {
        const conceptType = concept.type || 'unknown';
        const text = `${concept.name} ${conceptType}`;
        await vectorDB.storeCodeEmbedding(text, {
          id: concept.id,
          filePath: concept.filePath,
          functionName: conceptType === 'function' ? concept.name : undefined,
          className: conceptType === 'class' ? concept.name : undefined,
          language: 'unknown',
          complexity: 1,
          lineCount: 1,
          lastModified: new Date()
        });
        vectorCount++;
      }
      // Create embeddings for patterns
      for (const pattern of patterns) {
        const patternType = pattern.type || 'unknown';
        const text = `${patternType} ${pattern.content?.description || ''}`;
        await vectorDB.storeCodeEmbedding(text, {
          id: pattern.id,
          filePath: `pattern-${patternType}`,
          language: 'pattern',
          complexity: pattern.frequency || 1,
          lineCount: 1,
          lastModified: new Date()
        });
        vectorCount++;
      }
      return vectorCount;
    } catch (error) {
      console.warn('Failed to build semantic index:', error);
      return 0;
    }
  }
  private static async storeProjectBlueprint(
    projectPath: string,
    codebaseAnalysis: any,
    database: SQLiteDatabase
  ): Promise<void> {
    // Debug logging to track what's being received
    // console.error(`\nπ storeProjectBlueprint called for project: ${projectPath}`);
    // console.error(`   codebaseAnalysis.entryPoints: ${codebaseAnalysis.entryPoints ? `Array(${codebaseAnalysis.entryPoints.length})` : 'undefined'}`);
    // console.error(`   codebaseAnalysis.keyDirectories: ${codebaseAnalysis.keyDirectories ? `Array(${codebaseAnalysis.keyDirectories.length})` : 'undefined'}`);
    // Store entry points (filter out invalid ones)
    if (codebaseAnalysis.entryPoints && Array.isArray(codebaseAnalysis.entryPoints)) {
      // let stored = 0;
      // let skipped = 0;
      for (const entryPoint of codebaseAnalysis.entryPoints) {
        // Skip entry points with missing required fields
        if (!entryPoint.type || !entryPoint.filePath) {
          // skipped++;
          // console.error(`   β οΈ  Skipping invalid entry point: type=${entryPoint.type}, filePath=${entryPoint.filePath}`);
          continue;
        }
        database.insertEntryPoint({
          id: nanoid(),
          projectPath,
          entryType: entryPoint.type,
          filePath: entryPoint.filePath,
          description: entryPoint.description,
          framework: entryPoint.framework
        });
        // stored++;
      }
      // console.error(`   β
 Stored ${stored} entry points (${skipped} skipped)`);
    } // else {
      // console.error(`   β οΈ  No entry points to store (not an array or undefined)`);
    // }
    // Store key directories (filter out invalid ones)
    if (codebaseAnalysis.keyDirectories && Array.isArray(codebaseAnalysis.keyDirectories)) {
      // let stored = 0;
      // let skipped = 0;
      for (const directory of codebaseAnalysis.keyDirectories) {
        // Skip directories with missing required fields
        if (!directory.path || !directory.type) {
          // skipped++;
          // console.error(`   β οΈ  Skipping invalid directory: path=${directory.path}, type=${directory.type}`);
          continue;
        }
        database.insertKeyDirectory({
          id: nanoid(),
          projectPath,
          directoryPath: directory.path,
          directoryType: directory.type,
          fileCount: directory.fileCount || 0,
          description: directory.description
        });
        // stored++;
      }
      // console.error(`   β
 Stored ${stored} key directories (${skipped} skipped)\n`);
    } // else {
      // console.error(`   β οΈ  No key directories to store (not an array or undefined)\n`);
    // }
  }
  private static inferArchitecturePattern(codebaseAnalysis: any): string {
    const frameworks = codebaseAnalysis?.frameworks || [];
    const directories = codebaseAnalysis?.keyDirectories || [];
    if (frameworks.some((f: string) => f.toLowerCase().includes('react'))) {
      return 'Component-Based (React)';
    } else if (frameworks.some((f: string) => f.toLowerCase().includes('express'))) {
      return 'REST API (Express)';
    } else if (frameworks.some((f: string) => f.toLowerCase().includes('fastapi'))) {
      return 'REST API (FastAPI)';
    } else if (directories.some((d: any) => d.type === 'services')) {
      return 'Service-Oriented';
    } else if (directories.some((d: any) => d.type === 'components')) {
      return 'Component-Based';
    } else if (directories.some((d: any) => d.type === 'models' && d.type === 'views')) {
      return 'MVC Pattern';
    } else {
      return 'Modular';
    }
  }
}