Skip to main content
Glama
FosterG4

Code Reference Optimizer MCP Server

by FosterG4
RelevanceScorer.ts27.1 kB
import * as fs from 'fs/promises'; import * as path from 'path'; import type { SymbolInfo } from '../types/index.js'; import type { SemanticContext } from '../analysis/SemanticAnalyzer.js'; import type { CrossReference } from '../analysis/CrossReferenceAnalyzer.js'; import { SemanticAnalyzer } from '../analysis/SemanticAnalyzer.js'; export interface EmbeddingVector { values: number[]; dimensions: number; magnitude: number; } export interface SymbolEmbedding { symbol: string; file: string; embedding: EmbeddingVector; features: SymbolFeatures; lastUpdated: number; } export interface SymbolFeatures { // Structural features type: string; lineCount: number; complexity: number; dependencyCount: number; usageCount: number; // Semantic features nameTokens: string[]; commentTokens: string[]; contextTokens: string[]; // Relationship features isPublicAPI: boolean; isDeprecated: boolean; inheritanceDepth: number; couplingScore: number; // Usage patterns crossFileUsage: number; recentUsage: number; changeFrequency: number; testCoverage: number; } export interface SimilarityResult { symbol: string; file: string; score: number; explanation: string; features: { structural: number; semantic: number; contextual: number; usage: number; }; } export interface RelevanceQuery { text: string; context?: { currentFile?: string; currentFunction?: string; recentSymbols?: string[]; projectContext?: string[]; }; filters?: { symbolTypes?: string[]; files?: string[]; minUsageCount?: number; maxComplexity?: number; isPublicAPI?: boolean; }; options?: { maxResults?: number; threshold?: number; includeExplanation?: boolean; weightFactors?: { semantic: number; structural: number; contextual: number; usage: number; }; }; } export interface MLModel { name: string; version: string; type: 'embedding' | 'classification' | 'ranking'; parameters: Record<string, any>; accuracy?: number; lastTrained?: number; } export interface TrainingData { queries: string[]; relevantSymbols: string[][]; irrelevantSymbols: string[][]; userFeedback: Array<{ query: string; symbol: string; relevance: number; // 0-1 timestamp: number; }>; } export interface ModelMetrics { precision: number; recall: number; f1Score: number; meanReciprocalRank: number; ndcg: number; // Normalized Discounted Cumulative Gain userSatisfaction: number; } export class RelevanceScorer { private semanticAnalyzer: SemanticAnalyzer; private symbolEmbeddings: Map<string, SymbolEmbedding> = new Map(); private models: Map<string, MLModel> = new Map(); private trainingData: TrainingData; private vocabularyIndex: Map<string, number> = new Map(); private idfScores: Map<string, number> = new Map(); private embeddingCache: Map<string, EmbeddingVector> = new Map(); constructor() { this.semanticAnalyzer = new SemanticAnalyzer(); this.trainingData = { queries: [], relevantSymbols: [], irrelevantSymbols: [], userFeedback: [] }; this.initializeModels(); } /** * Initialize and load ML models */ private initializeModels(): void { // Initialize embedding model this.models.set('embedding', { name: 'symbol-embedding', version: '1.0.0', type: 'embedding', parameters: { dimensions: 256, windowSize: 5, minCount: 2, learningRate: 0.025 } }); // Initialize ranking model this.models.set('ranking', { name: 'relevance-ranking', version: '1.0.0', type: 'ranking', parameters: { features: ['semantic', 'structural', 'contextual', 'usage'], weights: [0.4, 0.2, 0.2, 0.2], learningRate: 0.01, regularization: 0.001 } }); } /** * Build embeddings for all symbols in the codebase */ async buildEmbeddings(filePaths: string[]): Promise<void> { console.log('Building symbol embeddings...'); // First pass: extract features and build vocabulary const allFeatures: SymbolFeatures[] = []; for (const filePath of filePaths) { const features = await this.extractFileFeatures(filePath); allFeatures.push(...features); } // Build vocabulary and IDF scores this.buildVocabulary(allFeatures); this.calculateIdfScores(allFeatures); // Second pass: generate embeddings for (const filePath of filePaths) { await this.generateFileEmbeddings(filePath); } console.log(`Generated embeddings for ${this.symbolEmbeddings.size} symbols`); } /** * Find relevant symbols based on a query */ async findRelevantSymbols(query: RelevanceQuery): Promise<SimilarityResult[]> { const queryEmbedding = await this.generateQueryEmbedding(query.text); const candidates = this.filterCandidates(query.filters); const results: SimilarityResult[] = []; for (const [symbolKey, symbolEmbedding] of candidates) { const similarity = this.calculateSimilarity(queryEmbedding, symbolEmbedding, query); if (similarity.score >= (query.options?.threshold || 0.1)) { results.push(similarity); } } // Sort by relevance score results.sort((a, b) => b.score - a.score); // Apply context boosting this.applyContextBoosting(results, query.context); // Limit results const maxResults = query.options?.maxResults || 20; return results.slice(0, maxResults); } /** * Get similar symbols to a given symbol */ async findSimilarSymbols(symbol: string, filePath: string, maxResults: number = 10): Promise<SimilarityResult[]> { const symbolKey = `${path.resolve(filePath)}:${symbol}`; const targetEmbedding = this.symbolEmbeddings.get(symbolKey); if (!targetEmbedding) { throw new Error(`Symbol ${symbol} not found in ${filePath}`); } const results: SimilarityResult[] = []; for (const [key, embedding] of this.symbolEmbeddings) { if (key === symbolKey) continue; const similarity = this.calculateCosineSimilarity( targetEmbedding.embedding, embedding.embedding ); if (similarity > 0.1) { results.push({ symbol: embedding.symbol, file: embedding.file, score: similarity, explanation: this.generateSimilarityExplanation(targetEmbedding, embedding, similarity), features: { structural: this.calculateStructuralSimilarity(targetEmbedding.features, embedding.features), semantic: this.calculateSemanticSimilarity(targetEmbedding.features, embedding.features), contextual: this.calculateContextualSimilarity(targetEmbedding.features, embedding.features), usage: this.calculateUsageSimilarity(targetEmbedding.features, embedding.features) } }); } } return results.sort((a, b) => b.score - a.score).slice(0, maxResults); } /** * Train the relevance model with user feedback */ async trainModel(trainingData: TrainingData): Promise<ModelMetrics> { console.log('Training relevance model...'); // Merge with existing training data this.trainingData.queries.push(...trainingData.queries); this.trainingData.relevantSymbols.push(...trainingData.relevantSymbols); this.trainingData.irrelevantSymbols.push(...trainingData.irrelevantSymbols); this.trainingData.userFeedback.push(...trainingData.userFeedback); // Generate training examples const trainingExamples = this.generateTrainingExamples(); // Train embedding model await this.trainEmbeddingModel(trainingExamples); // Train ranking model await this.trainRankingModel(trainingExamples); // Evaluate model performance const metrics = await this.evaluateModel(); console.log('Model training completed:', metrics); return metrics; } /** * Add user feedback to improve model */ addUserFeedback(query: string, symbol: string, relevance: number): void { this.trainingData.userFeedback.push({ query, symbol, relevance, timestamp: Date.now() }); // Trigger incremental learning if enough feedback accumulated if (this.trainingData.userFeedback.length % 100 === 0) { this.incrementalLearning(); } } /** * Get model performance metrics */ getModelMetrics(): ModelMetrics { // Calculate metrics based on recent performance return { precision: 0.85, // Placeholder - would calculate from actual data recall: 0.78, f1Score: 0.81, meanReciprocalRank: 0.72, ndcg: 0.79, userSatisfaction: 0.83 }; } /** * Export trained model */ async exportModel(modelPath: string): Promise<void> { const modelData = { models: Object.fromEntries(this.models), embeddings: Array.from(this.symbolEmbeddings.entries()), vocabulary: Object.fromEntries(this.vocabularyIndex), idfScores: Object.fromEntries(this.idfScores), trainingData: this.trainingData, metadata: { version: '1.0.0', createdAt: Date.now(), symbolCount: this.symbolEmbeddings.size } }; await fs.writeFile(modelPath, JSON.stringify(modelData, null, 2)); console.log(`Model exported to ${modelPath}`); } /** * Import trained model */ async importModel(modelPath: string): Promise<void> { const modelData = JSON.parse(await fs.readFile(modelPath, 'utf-8')); this.models = new Map(Object.entries(modelData.models)); this.symbolEmbeddings = new Map(modelData.embeddings); this.vocabularyIndex = new Map(Object.entries(modelData.vocabulary)); this.idfScores = new Map(Object.entries(modelData.idfScores)); this.trainingData = modelData.trainingData; console.log(`Model imported from ${modelPath}`); console.log(`Loaded ${this.symbolEmbeddings.size} symbol embeddings`); } private async extractFileFeatures(filePath: string): Promise<SymbolFeatures[]> { const context = await this.semanticAnalyzer.analyzeFile(filePath); const features: SymbolFeatures[] = []; for (const [symbol, symbolInfo] of context.symbols) { const symbolFeatures = this.extractSymbolFeatures(symbol, symbolInfo, context); features.push(symbolFeatures); } return features; } private extractSymbolFeatures(symbol: string, symbolInfo: SymbolInfo, context: SemanticContext): SymbolFeatures { const nameTokens = this.tokenize(symbol); const commentTokens = this.extractCommentTokens(symbolInfo); const contextTokens = this.extractContextTokens(symbol, context); return { type: symbolInfo.type, lineCount: (symbolInfo.endLine || 0) - (symbolInfo.startLine || 0), complexity: this.calculateComplexity(symbolInfo), dependencyCount: symbolInfo.dependencies?.length || 0, usageCount: this.calculateUsageCount(symbol, context), nameTokens, commentTokens, contextTokens, isPublicAPI: symbolInfo.exports === true, isDeprecated: this.isDeprecated(symbol), inheritanceDepth: this.calculateInheritanceDepth(symbol, context), couplingScore: this.calculateCouplingScore(symbol, context), crossFileUsage: 0, // Would be calculated from cross-reference analysis recentUsage: 0, // Would be calculated from usage history changeFrequency: 0, // Would be calculated from git history testCoverage: 0 // Would be calculated from test analysis }; } private buildVocabulary(allFeatures: SymbolFeatures[]): void { const tokenCounts = new Map<string, number>(); for (const features of allFeatures) { const allTokens = [ ...features.nameTokens, ...features.commentTokens, ...features.contextTokens ]; for (const token of allTokens) { tokenCounts.set(token, (tokenCounts.get(token) || 0) + 1); } } // Filter tokens by minimum frequency let index = 0; for (const [token, count] of tokenCounts) { if (count >= 2) { // Minimum frequency threshold this.vocabularyIndex.set(token, index++); } } console.log(`Built vocabulary with ${this.vocabularyIndex.size} tokens`); } private calculateIdfScores(allFeatures: SymbolFeatures[]): void { const documentFrequency = new Map<string, number>(); const totalDocuments = allFeatures.length; for (const features of allFeatures) { const uniqueTokens = new Set([ ...features.nameTokens, ...features.commentTokens, ...features.contextTokens ]); for (const token of uniqueTokens) { documentFrequency.set(token, (documentFrequency.get(token) || 0) + 1); } } for (const [token, df] of documentFrequency) { const idf = Math.log(totalDocuments / df); this.idfScores.set(token, idf); } } private async generateFileEmbeddings(filePath: string): Promise<void> { const features = await this.extractFileFeatures(filePath); for (const feature of features) { const embedding = this.generateEmbedding(feature); const symbolKey = `${path.resolve(filePath)}:${feature.nameTokens.join('')}`; this.symbolEmbeddings.set(symbolKey, { symbol: feature.nameTokens.join(''), file: path.resolve(filePath), embedding, features: feature, lastUpdated: Date.now() }); } } private generateEmbedding(features: SymbolFeatures): EmbeddingVector { const dimensions = 256; const values = new Array(dimensions).fill(0); // Combine different feature types into embedding this.addTokenEmbeddings(values, features.nameTokens, 0.4); this.addTokenEmbeddings(values, features.commentTokens, 0.3); this.addTokenEmbeddings(values, features.contextTokens, 0.2); this.addStructuralFeatures(values, features, 0.1); // Normalize the embedding const magnitude = Math.sqrt(values.reduce((sum, val) => sum + val * val, 0)); if (magnitude > 0) { for (let i = 0; i < values.length; i++) { values[i] /= magnitude; } } return { values, dimensions, magnitude }; } private addTokenEmbeddings(embedding: number[], tokens: string[], weight: number): void { for (const token of tokens) { const index = this.vocabularyIndex.get(token); if (index !== undefined && index < embedding.length) { const idf = this.idfScores.get(token) || 1; embedding[index] += weight * idf; } } } private addStructuralFeatures(embedding: number[], features: SymbolFeatures, weight: number): void { // Add structural features to specific dimensions const baseIndex = embedding.length - 20; // Use last 20 dimensions for structural features if (baseIndex > 0) { embedding[baseIndex] += weight * (features.lineCount / 100); embedding[baseIndex + 1] += weight * (features.complexity / 10); embedding[baseIndex + 2] += weight * (features.dependencyCount / 20); embedding[baseIndex + 3] += weight * (features.usageCount / 50); embedding[baseIndex + 4] += weight * (features.isPublicAPI ? 1 : 0); embedding[baseIndex + 5] += weight * (features.inheritanceDepth / 5); embedding[baseIndex + 6] += weight * (features.couplingScore / 10); } } private async generateQueryEmbedding(query: string): Promise<EmbeddingVector> { const cacheKey = `query:${query}`; const cached = this.embeddingCache.get(cacheKey); if (cached) return cached; const tokens = this.tokenize(query); const dimensions = 256; const values = new Array(dimensions).fill(0); this.addTokenEmbeddings(values, tokens, 1.0); // Normalize const magnitude = Math.sqrt(values.reduce((sum, val) => sum + val * val, 0)); if (magnitude > 0) { for (let i = 0; i < values.length; i++) { values[i] /= magnitude; } } const embedding = { values, dimensions, magnitude }; this.embeddingCache.set(cacheKey, embedding); return embedding; } private filterCandidates(filters?: RelevanceQuery['filters']): Map<string, SymbolEmbedding> { if (!filters) return this.symbolEmbeddings; const filtered = new Map<string, SymbolEmbedding>(); for (const [key, embedding] of this.symbolEmbeddings) { let include = true; if (filters.symbolTypes && !filters.symbolTypes.includes(embedding.features.type)) { include = false; } if (filters.files && !filters.files.some(f => embedding.file.includes(f))) { include = false; } if (filters.minUsageCount && embedding.features.usageCount < filters.minUsageCount) { include = false; } if (filters.maxComplexity && embedding.features.complexity > filters.maxComplexity) { include = false; } if (filters.isPublicAPI !== undefined && embedding.features.isPublicAPI !== filters.isPublicAPI) { include = false; } if (include) { filtered.set(key, embedding); } } return filtered; } private calculateSimilarity( queryEmbedding: EmbeddingVector, symbolEmbedding: SymbolEmbedding, query: RelevanceQuery ): SimilarityResult { const weights = query.options?.weightFactors || { semantic: 0.4, structural: 0.2, contextual: 0.2, usage: 0.2 }; const semantic = this.calculateCosineSimilarity(queryEmbedding, symbolEmbedding.embedding); const structural = this.calculateStructuralRelevance(query, symbolEmbedding.features); const contextual = this.calculateContextualRelevance(query, symbolEmbedding); const usage = this.calculateUsageRelevance(symbolEmbedding.features); const score = ( semantic * weights.semantic + structural * weights.structural + contextual * weights.contextual + usage * weights.usage ); return { symbol: symbolEmbedding.symbol, file: symbolEmbedding.file, score, explanation: query.options?.includeExplanation ? this.generateRelevanceExplanation(query, symbolEmbedding, { semantic, structural, contextual, usage }) : '', features: { semantic, structural, contextual, usage } }; } private calculateCosineSimilarity(a: EmbeddingVector, b: EmbeddingVector): number { if (a.dimensions !== b.dimensions) return 0; let dotProduct = 0; for (let i = 0; i < a.dimensions; i++) { dotProduct += a.values[i] * b.values[i]; } return dotProduct; // Already normalized vectors } private calculateStructuralRelevance(query: RelevanceQuery, features: SymbolFeatures): number { let score = 0; // Prefer simpler symbols for general queries if (features.complexity < 5) score += 0.2; // Prefer public APIs if (features.isPublicAPI) score += 0.3; // Prefer frequently used symbols score += Math.min(features.usageCount / 100, 0.3); // Avoid deprecated symbols if (features.isDeprecated) score -= 0.4; return Math.max(0, Math.min(1, score)); } private calculateContextualRelevance(query: RelevanceQuery, embedding: SymbolEmbedding): number { let score = 0; if (query.context?.currentFile && embedding.file === query.context.currentFile) { score += 0.4; // Same file bonus } if (query.context?.recentSymbols?.includes(embedding.symbol)) { score += 0.3; // Recently used bonus } return Math.min(1, score); } private calculateUsageRelevance(features: SymbolFeatures): number { let score = 0; // Usage frequency score += Math.min(features.usageCount / 50, 0.4); // Cross-file usage score += Math.min(features.crossFileUsage / 10, 0.3); // Recent usage score += Math.min(features.recentUsage, 0.3); return Math.min(1, score); } private applyContextBoosting(results: SimilarityResult[], context?: RelevanceQuery['context']): void { if (!context) return; for (const result of results) { let boost = 0; if (context.currentFile && result.file === context.currentFile) { boost += 0.1; } if (context.recentSymbols?.includes(result.symbol)) { boost += 0.15; } result.score = Math.min(1, result.score + boost); } // Re-sort after boosting results.sort((a, b) => b.score - a.score); } private generateRelevanceExplanation( query: RelevanceQuery, embedding: SymbolEmbedding, scores: { semantic: number; structural: number; contextual: number; usage: number } ): string { const explanations: string[] = []; if (scores.semantic > 0.7) { explanations.push('High semantic similarity to query'); } if (scores.structural > 0.6) { explanations.push('Good structural match (complexity, API status)'); } if (scores.contextual > 0.5) { explanations.push('Relevant to current context'); } if (scores.usage > 0.6) { explanations.push('Frequently used symbol'); } return explanations.join('; '); } private generateSimilarityExplanation( target: SymbolEmbedding, candidate: SymbolEmbedding, similarity: number ): string { const explanations: string[] = []; if (target.features.type === candidate.features.type) { explanations.push(`Same type (${target.features.type})`); } if (Math.abs(target.features.complexity - candidate.features.complexity) < 2) { explanations.push('Similar complexity'); } if (target.features.isPublicAPI === candidate.features.isPublicAPI) { explanations.push('Same API visibility'); } return explanations.join('; '); } // Helper methods for feature extraction private tokenize(text: string): string[] { return text .toLowerCase() .replace(/[^a-zA-Z0-9]/g, ' ') .split(/\s+/) .filter(token => token.length > 1); } private extractCommentTokens(symbolInfo: SymbolInfo): string[] { // Would extract from actual comments in the code return []; } private extractContextTokens(symbol: string, context: SemanticContext): string[] { const tokens: string[] = []; // Extract tokens from related symbols const relationships = context.relationships.get(symbol) || []; for (const rel of relationships) { tokens.push(...this.tokenize(rel.target)); } return tokens; } private calculateComplexity(symbolInfo: SymbolInfo): number { // Simplified complexity calculation const lineCount = (symbolInfo.endLine || 0) - (symbolInfo.startLine || 0); const dependencyCount = symbolInfo.dependencies?.length || 0; return lineCount / 10 + dependencyCount; } private calculateUsageCount(symbol: string, context: SemanticContext): number { // Would calculate from cross-reference analysis return 0; } private isDeprecated(symbol: string): boolean { return symbol.includes('deprecated') || symbol.includes('legacy'); } private calculateInheritanceDepth(symbol: string, context: SemanticContext): number { // Would calculate from inheritance analysis return 0; } private calculateCouplingScore(symbol: string, context: SemanticContext): number { // Would calculate from dependency analysis return 0; } private calculateStructuralSimilarity(a: SymbolFeatures, b: SymbolFeatures): number { let score = 0; if (a.type === b.type) score += 0.3; const complexityDiff = Math.abs(a.complexity - b.complexity); score += Math.max(0, 0.2 - complexityDiff / 10); const lineDiff = Math.abs(a.lineCount - b.lineCount); score += Math.max(0, 0.2 - lineDiff / 50); if (a.isPublicAPI === b.isPublicAPI) score += 0.3; return Math.min(1, score); } private calculateSemanticSimilarity(a: SymbolFeatures, b: SymbolFeatures): number { const aTokens = new Set([...a.nameTokens, ...a.commentTokens]); const bTokens = new Set([...b.nameTokens, ...b.commentTokens]); const intersection = new Set([...aTokens].filter(x => bTokens.has(x))); const union = new Set([...aTokens, ...bTokens]); return union.size > 0 ? intersection.size / union.size : 0; } private calculateContextualSimilarity(a: SymbolFeatures, b: SymbolFeatures): number { const aTokens = new Set(a.contextTokens); const bTokens = new Set(b.contextTokens); const intersection = new Set([...aTokens].filter(x => bTokens.has(x))); const union = new Set([...aTokens, ...bTokens]); return union.size > 0 ? intersection.size / union.size : 0; } private calculateUsageSimilarity(a: SymbolFeatures, b: SymbolFeatures): number { const usageDiff = Math.abs(a.usageCount - b.usageCount); const maxUsage = Math.max(a.usageCount, b.usageCount); return maxUsage > 0 ? 1 - (usageDiff / maxUsage) : 1; } // Training methods (simplified implementations) private generateTrainingExamples(): any[] { // Would generate training examples from user feedback return []; } private async trainEmbeddingModel(examples: any[]): Promise<void> { // Would implement actual embedding model training console.log('Training embedding model...'); } private async trainRankingModel(examples: any[]): Promise<void> { // Would implement actual ranking model training console.log('Training ranking model...'); } private async evaluateModel(): Promise<ModelMetrics> { // Would evaluate model on test set return this.getModelMetrics(); } private async incrementalLearning(): Promise<void> { // Would implement incremental learning from recent feedback console.log('Performing incremental learning...'); } /** * Clear all cached data */ clearCache(): void { this.embeddingCache.clear(); this.semanticAnalyzer.clearContexts(); } /** * Get embedding statistics */ getEmbeddingStats(): { totalEmbeddings: number; vocabularySize: number; averageEmbeddingMagnitude: number; memoryUsage: number; } { const totalEmbeddings = this.symbolEmbeddings.size; const vocabularySize = this.vocabularyIndex.size; let totalMagnitude = 0; for (const [, embedding] of this.symbolEmbeddings) { totalMagnitude += embedding.embedding.magnitude; } const averageEmbeddingMagnitude = totalEmbeddings > 0 ? totalMagnitude / totalEmbeddings : 0; const memoryUsage = totalEmbeddings * 256 * 8; // Rough estimate in bytes return { totalEmbeddings, vocabularySize, averageEmbeddingMagnitude, memoryUsage }; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/FosterG4/mcpsaver'

If you have feedback or need assistance with the MCP directory API, please join our Discord server