Local Search MCP Server

RecommendationEngine.ts•13 KiB

import { DocumentChunkOptimized, TfidfAnalysisResult, SearchRecommendation, SuggestionStrategy } from '../types/index.js'; import { log } from './Logger.js'; export interface RecommendationEngineConfig { maxQueryTerms?: number; maxAnalysisDocuments?: number; } /** * TF-IDF based contextual search recommendation engine * Analyzes low-confidence search results and generates intelligent query suggestions */ export class RecommendationEngine { private static readonly DEFAULT_MAX_ANALYSIS_DOCUMENTS = 5; private static readonly DEFAULT_MAX_QUERY_TERMS = 8; private static readonly TFIDF_FORMULA = 'score = TF × log((docCount + 1) / (DF + 1))'; // Default expiry for generated recommendations (30 days) private static readonly RECOMMENDATION_EXPIRY_MS = 30 * 24 * 60 * 60 * 1000; private readonly maxQueryTerms: number; private readonly maxAnalysisDocuments: number; constructor(config: RecommendationEngineConfig = {}) { this.maxQueryTerms = config.maxQueryTerms ?? RecommendationEngine.DEFAULT_MAX_QUERY_TERMS; this.maxAnalysisDocuments = config.maxAnalysisDocuments ?? RecommendationEngine.DEFAULT_MAX_ANALYSIS_DOCUMENTS; // Validate configuration if (this.maxQueryTerms < 1 || this.maxQueryTerms > 100) { throw new Error(`maxQueryTerms must be between 1 and 100, got ${this.maxQueryTerms}`); } if (this.maxAnalysisDocuments < 1 || this.maxAnalysisDocuments > 50) { throw new Error(`maxAnalysisDocuments must be between 1 and 50, got ${this.maxAnalysisDocuments}`); } log.debug('RecommendationEngine initialized', { maxQueryTerms: this.maxQueryTerms, maxAnalysisDocuments: this.maxAnalysisDocuments }); } /** * Analyze low-confidence search results and generate contextual recommendations * @param query Original user query * @param results Search results (presumed low-confidence) * @param totalDocuments Total documents in the index * @param currentThreshold Current TF-IDF threshold from learning system * @returns Analysis results with suggested query modifications */ async analyzeForRecommendations( query: string, results: DocumentChunkOptimized[], totalDocuments: number, currentThreshold: number ): Promise<Omit<SearchRecommendation, 'id'> | null> { const timer = log.time('tfidf-analysis'); try { // Limit analysis to top results for performance const analysisResults = results.slice(0, this.maxAnalysisDocuments); if (analysisResults.length === 0) { log.debug('No results to analyze for recommendations'); return null; } // Extract and tokenize query terms const queryTerms = this.tokenizeQuery(query); if (queryTerms.length === 0 || queryTerms.length > this.maxQueryTerms) { log.debug('Query has no terms or too many terms for analysis', { termCount: queryTerms.length, maxAllowed: this.maxQueryTerms }); return null; } log.debug('Starting TF-IDF analysis', { query: query.substring(0, 50), resultCount: analysisResults.length, queryTerms: queryTerms.length, totalDocuments, currentThreshold }); // Calculate TF-IDF scores for each term across the result documents const tfidfResults = this.calculateTfidfAnalysis(queryTerms, analysisResults, totalDocuments); // Generate recommendations based on TF-IDF analysis const recommendation = this.generateRecommendation(query, queryTerms, tfidfResults, currentThreshold); timer(); if (recommendation) { log.info('Generated contextual recommendation', { strategy: recommendation.suggestionStrategy, confidence: recommendation.confidence, suggestedTermCount: recommendation.suggestedTerms.length }); } else { log.debug('No recommendation generated for query'); } return recommendation; } catch (error: any) { log.error('TF-IDF analysis failed', error, { query: query.substring(0, 50) }); return null; } } /** * Tokenize query into individual terms, preserving quoted phrases * @param query The search query * @returns Array of query terms */ private tokenizeQuery(query: string): string[] { const terms: string[] = []; const regex = /"([^"]*)"|(\S+)/g; let match; while ((match = regex.exec(query.toLowerCase())) !== null) { // Use quoted content if present, otherwise the unquoted term const term = match[1] || match[2]; if (term && term.length > 1) { // Skip single characters terms.push(term); } } return [...new Set(terms)]; // Remove duplicates } /** * Calculate TF-IDF analysis for query terms across result documents * @param queryTerms Tokenized query terms * @param results Top search results to analyze * @param totalDocuments Total documents in index * @returns TF-IDF analysis results for each term */ private calculateTfidfAnalysis( queryTerms: string[], results: DocumentChunkOptimized[], totalDocuments: number ): TfidfAnalysisResult[] { const termAnalysis: TfidfAnalysisResult[] = []; for (const term of queryTerms) { // Calculate document frequency (DF): how many result documents contain this term let df = 0; let totalTf = 0; for (const result of results) { const content = result.content.toLowerCase(); const occurrences = (content.match(new RegExp(`\\b${this.escapeRegex(term)}\\b`, 'g')) || []).length; if (occurrences > 0) { df++; totalTf += occurrences; } } // Calculate average TF across documents that contain the term const avgTf = df > 0 ? totalTf / df : 0; // Apply TF-IDF formula: TF × log((N + 1) / (DF + 1)) // Using +1 smoothing to avoid division by zero and reduce impact of rare terms const tfidf = avgTf * Math.log((totalDocuments + 1) / (df + 1)); // Determine if term should be removed based on TF-IDF score const shouldRemove = tfidf < 1.0; // Conservative threshold const confidence = Math.max(0, Math.min(1, tfidf / 5.0)); // Normalize confidence to 0-1 termAnalysis.push({ term, tf: avgTf, df, tfidf, shouldRemove, confidence }); } // Sort by TF-IDF score (lowest first - most likely to cause issues) return termAnalysis.sort((a, b) => a.tfidf - b.tfidf); } /** * Generate a contextual recommendation based on TF-IDF analysis * @param originalQuery The original query * @param queryTerms Tokenized terms * @param tfidfResults TF-IDF analysis results * @param currentThreshold Current learning threshold * @returns Recommendation or null if no good suggestions */ private generateRecommendation( originalQuery: string, queryTerms: string[], tfidfResults: TfidfAnalysisResult[], currentThreshold: number ): Omit<SearchRecommendation, 'id'> | null { // Safety check: never remove all terms const removableTerms = tfidfResults.filter(r => r.shouldRemove && r.tfidf < currentThreshold && !this.isEssentialTerm(r.term, originalQuery) ); if (removableTerms.length === 0) { return null; } // Find the most problematic term (lowest TF-IDF score) const mostProblematicTerm = removableTerms[0]; // Strategy 1: Term removal - safest approach if (this.canSafelyRemoveTerm(mostProblematicTerm.term, queryTerms)) { const suggestedTerms = queryTerms.filter(t => t !== mostProblematicTerm.term); return { query: originalQuery, suggestedTerms, suggestionStrategy: SuggestionStrategy.TERM_REMOVAL, tfidfThreshold: currentThreshold, confidence: mostProblematicTerm.confidence, generatedAt: new Date(), expiresAt: new Date(Date.now() + RecommendationEngine.RECOMMENDATION_EXPIRY_MS), // 30 days totalDocuments: 0, // Will be set by caller analyzedDocuments: tfidfResults.length }; } // Strategy 2: Term refinement - replace with more specific term const refinedSuggestion = this.generateRefinedTerm(mostProblematicTerm.term, tfidfResults); if (refinedSuggestion) { return { query: originalQuery, suggestedTerms: [refinedSuggestion], suggestionStrategy: SuggestionStrategy.TERM_REFINEMENT, tfidfThreshold: currentThreshold, confidence: 0.7, // Moderate confidence for refinements generatedAt: new Date(), expiresAt: new Date(Date.now() + RecommendationEngine.RECOMMENDATION_EXPIRY_MS), totalDocuments: 0, analyzedDocuments: tfidfResults.length }; } // Strategy 3: Contextual addition - add clarifying terms const contextualTerms = this.suggestContextualTerms(mostProblematicTerm.term, tfidfResults); if (contextualTerms.length > 0) { const combinedTerms = [...queryTerms, ...contextualTerms]; return { query: originalQuery, suggestedTerms: combinedTerms, suggestionStrategy: SuggestionStrategy.CONTEXTUAL_ADDITION, tfidfThreshold: currentThreshold, confidence: 0.6, // Lower confidence for additions generatedAt: new Date(), expiresAt: new Date(Date.now() + RecommendationEngine.RECOMMENDATION_EXPIRY_MS), totalDocuments: 0, analyzedDocuments: tfidfResults.length }; } return null; } /** * Check if a term is essential and shouldn't be removed * @param term The term to check * @param originalQuery The original query string * @returns true if term is essential */ private isEssentialTerm(term: string, originalQuery: string): boolean { // Keep terms that are in quotes (phrase searches) if (originalQuery.includes(`"${term}"`)) { return true; } // Keep very common programming terms const essentialTerms = ['function', 'class', 'method', 'variable', 'import', 'export', 'async', 'await', 'return', 'const', 'let', 'var']; return essentialTerms.includes(term.toLowerCase()); } /** * Check if a term can be safely removed without making the query meaningless * @param termToRemove The term being considered for removal * @param allTerms All query terms * @returns true if safe to remove */ private canSafelyRemoveTerm(termToRemove: string, allTerms: string[]): boolean { // Keep at least 2 terms to maintain query specificity return allTerms.length > 2; } /** * Generate a refined version of a problematic term * @param problematicTerm The term causing issues * @param tfidfResults All TF-IDF results for context * @returns Refined term suggestion or null */ private generateRefinedTerm(problematicTerm: string, tfidfResults: TfidfAnalysisResult[]): string | null { // Look for better-performing related terms in the results const betterTerms = tfidfResults .filter(r => !r.shouldRemove && r.tfidf > 2.0) // Good TF-IDF scores .filter(r => this.areTermsRelated(r.term, problematicTerm)) .sort((a, b) => b.tfidf - a.tfidf); return betterTerms.length > 0 ? betterTerms[0].term : null; } /** * Suggest contextual terms to add for clarification * @param problematicTerm The term causing ambiguity * @param tfidfResults All TF-IDF results * @returns Array of contextual terms to add */ private suggestContextualTerms(problematicTerm: string, tfidfResults: TfidfAnalysisResult[]): string[] { // Find terms that appear in multiple highly-ranked results const highScoringTerms = tfidfResults .filter(r => r.tfidf > 3.0 && r.df >= 2) // Good TF-IDF and appears in multiple docs .filter(r => r.term !== problematicTerm) .slice(0, 2); // Limit to 2 contextual terms return highScoringTerms.map(r => r.term); } /** * Check if two terms are semantically related * @param term1 First term * @param term2 Second term * @returns true if terms are related */ private areTermsRelated(term1: string, term2: string): boolean { // Simple heuristic: check for common prefixes/suffixes or substring relationships const lower1 = term1.toLowerCase(); const lower2 = term2.toLowerCase(); // Direct substring relationship if (lower1.includes(lower2) || lower2.includes(lower1)) { return lower1 !== lower2; // Not identical } // Common programming term variations const commonMappings: Record<string, string[]> = { 'function': ['method', 'func'], 'class': ['object', 'instance'], 'variable': ['var', 'const', 'let'], 'import': ['require', 'module'], 'async': ['await', 'promise'], }; for (const [key, variants] of Object.entries(commonMappings)) { if ((lower1 === key && variants.includes(lower2)) || (lower2 === key && variants.includes(lower1))) { return true; } } return false; } /** * Escape special regex characters * @param string String to escape * @returns Escaped string */ private escapeRegex(string: string): string { return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PatrickRuddiman/local-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

RecommendationEngine.ts•13 KiB