Local Search MCP Server

SearchService.ts•19.4 KiB

import { EmbeddingService, EmbeddingConfig } from './EmbeddingService.js'; import { VectorIndex } from './VectorIndex.js'; import { RecommendationService } from './RecommendationService.js'; import { RecommendationRepository } from './RecommendationRepository.js'; import { RecommendationEngine } from './RecommendationEngine.js'; import { LearningAlgorithm } from './LearningAlgorithm.js'; import { DatabaseSchema } from './DatabaseSchema.js'; import { DomainExtractor } from './DomainExtractor.js'; import { log } from './Logger.js'; import { DocumentChunkOptimized, SearchResult, SearchOptions, SearchRecommendation, RecommendationEffectiveness, EnhancedSearchOptions, QueryIntent, DomainMatch } from '../types/index.js'; import * as path from 'path'; export interface SearchServiceConfig { embeddingConfig?: EmbeddingConfig; } export class SearchService { private embeddingService: EmbeddingService | null; private vectorIndex: VectorIndex; private recommendationService: RecommendationService; private recommendationEngine: RecommendationEngine; private learningAlgorithm: LearningAlgorithm; private domainExtractor: DomainExtractor; private config: Required<SearchServiceConfig>; constructor(config: SearchServiceConfig = {}) { this.config = { embeddingConfig: {}, ...config }; this.embeddingService = null; // Will be initialized async // Create shared database schema for all repositories const schema = new DatabaseSchema(); const recommendationRepository = new RecommendationRepository(schema.getDatabase()); this.recommendationService = new RecommendationService(recommendationRepository); this.vectorIndex = new VectorIndex(schema, recommendationRepository); // Initialize recommendation system components with configurable parameters this.recommendationEngine = new RecommendationEngine({ maxQueryTerms: 8, // More practical limit than the previous 50 maxAnalysisDocuments: 5 }); this.learningAlgorithm = new LearningAlgorithm(); // Initialize domain extractor with database for vocabulary lookups this.domainExtractor = new DomainExtractor(schema.getDatabase()); // Initialize learning parameters from database (async) this.initializeLearningParams(); log.info('SearchService initialized with recommendation system and domain detection'); } /** * Initialize learning parameters from persistent storage */ private async initializeLearningParams(): Promise<void> { try { const persistedParams = await this.vectorIndex.getLearningParameters(); if (persistedParams) { this.learningAlgorithm = LearningAlgorithm.fromJSON(persistedParams as any); log.debug('Learning parameters loaded from database', { threshold: persistedParams.currentTfidfThreshold, historyLength: persistedParams.effectivenessHistory.length }); } } catch (error) { log.warn('Failed to load learning parameters, using defaults', error as any); } } /** * Ensure EmbeddingService singleton is initialized */ private async ensureEmbeddingService(): Promise<EmbeddingService> { if (!this.embeddingService) { this.embeddingService = await EmbeddingService.getInstance(this.config.embeddingConfig); } return this.embeddingService; } /** * Tokenize query into individual terms, preserving quoted phrases * @param query The search query * @returns Array of query terms */ private tokenizeQuery(query: string): string[] { const terms: string[] = []; const regex = /"([^"]*)"|(\S+)/g; let match; while ((match = regex.exec(query.toLowerCase())) !== null) { // Use quoted content if present, otherwise the unquoted term const term = match[1] || match[2]; if (term && term.length > 1) { // Skip single characters terms.push(term); } } return [...new Set(terms)]; // Remove duplicates } /** * Analyze query intent and detect relevant domains * @param query Search query * @returns Query intent with detected domains */ async analyzeQueryIntent(query: string): Promise<QueryIntent> { try { log.debug('Analyzing query intent', { query: query.substring(0, 50) }); // Use domain extractor to detect technology domains in query const detectedDomains = await this.domainExtractor.classifyQueryIntent(query); // Calculate overall confidence based on domain matches const confidence = detectedDomains.length > 0 ? detectedDomains.reduce((sum: number, domain: DomainMatch) => sum + domain.confidence, 0) / detectedDomains.length : 0.5; // Default confidence for queries without clear domain signals const queryIntent: QueryIntent = { queryHash: Buffer.from(query).toString('base64'), detectedDomains, confidence, expiresAt: new Date(Date.now() + 24 * 60 * 60 * 1000) // 24 hours }; log.debug('Query intent analyzed', { query: query.substring(0, 50), domainsDetected: detectedDomains.length, confidence, domains: detectedDomains.map((d: DomainMatch) => d.domain) }); return queryIntent; } catch (error: any) { log.warn('Failed to analyze query intent', error as any); // Return default intent return { queryHash: Buffer.from(query).toString('base64'), detectedDomains: [], confidence: 0.5, expiresAt: new Date(Date.now() + 24 * 60 * 60 * 1000) }; } } /** * Enhanced search with domain-aware result boosting * @param query Natural language query * @param options Enhanced search configuration * @returns Search results with domain-based ranking */ async searchDocumentsEnhanced(query: string, options: EnhancedSearchOptions = {}): Promise<SearchResult> { const startTime = Date.now(); try { log.debug('Executing enhanced semantic search', { query: query.substring(0, 50), options }); // Analyze query intent to detect relevant domains const queryIntent = await this.analyzeQueryIntent(query); // Perform standard semantic search const searchResult = await this.searchDocuments(query, options); // Apply domain-based result boosting if domains detected OR domain filters specified if (queryIntent.detectedDomains.length > 0 || (options.domainFilter && options.domainFilter.length > 0)) { const boostedResults = this.applyDomainBoosting(searchResult.results, queryIntent.detectedDomains); searchResult.results = boostedResults; log.debug('Applied domain-based result boosting', { query: query.substring(0, 50), detectedDomains: queryIntent.detectedDomains.length, domainFiltersApplied: options.domainFilter?.length || 0, resultsCount: boostedResults.length }); } // Apply enhanced filtering if specified if (options.domainFilter || options.contentTypeFilter || options.languageFilter) { const filteredResults = this.applyEnhancedFiltering(searchResult.results, options); searchResult.results = filteredResults; searchResult.totalResults = filteredResults.length; log.debug('Applied enhanced filtering', { query: query.substring(0, 50), originalCount: searchResult.results.length, filteredCount: filteredResults.length, filters: { domains: options.domainFilter?.length || 0, contentTypes: options.contentTypeFilter?.length || 0, languages: options.languageFilter?.length || 0 } }); } const enhancedSearchTime = Date.now() - startTime; searchResult.searchTime = enhancedSearchTime; return searchResult; } catch (error: any) { log.error('Enhanced search failed', error, { query: query.substring(0, 50), options }); // Fallback to standard search return this.searchDocuments(query, options); } } /** * Apply domain-based boosting to search results * @param results Original search results * @param domainMatches Detected domain matches with boost factors * @returns Results with adjusted scores based on domain relevance */ private applyDomainBoosting(results: DocumentChunkOptimized[], domainMatches: DomainMatch[]): DocumentChunkOptimized[] { if (domainMatches.length === 0) return results; return results.map(result => { let boostFactor = 1.0; // Check if result has domain tags that match query intent const contentMetadata = result.contentMetadata; if (contentMetadata?.domainTags) { for (const domainMatch of domainMatches) { if (contentMetadata.domainTags.includes(domainMatch.domain)) { // Apply boost factor (weighted by domain confidence) const boost = domainMatch.boostFactor * domainMatch.confidence; boostFactor = Math.max(boostFactor, 1.0 + boost); } } } // Apply boost to score const originalScore = result.score || 0; const boostedScore = originalScore * boostFactor; return { ...result, score: boostedScore }; }).sort((a, b) => (b.score || 0) - (a.score || 0)); // Re-sort by boosted scores } /** * Apply enhanced filtering based on content metadata * @param results Search results to filter * @param options Enhanced search options with filtering criteria * @returns Filtered results */ private applyEnhancedFiltering(results: DocumentChunkOptimized[], options: EnhancedSearchOptions): DocumentChunkOptimized[] { return results.filter(result => { const metadata = result.contentMetadata; if (!metadata) return true; // Include if no metadata available // Filter by domain tags if (options.domainFilter && options.domainFilter.length > 0) { const hasMatchingDomain = metadata.domainTags?.some(tag => options.domainFilter!.includes(tag) ); if (!hasMatchingDomain) return false; } // Filter by content type if (options.contentTypeFilter && options.contentTypeFilter.length > 0) { if (!options.contentTypeFilter.includes(metadata.contentType)) return false; } // Filter by programming language if (options.languageFilter && options.languageFilter.length > 0) { if (!options.languageFilter.includes(metadata.language)) return false; } // Filter by minimum quality score if (options.minQualityScore !== undefined) { if (metadata.qualityScore < options.minQualityScore) return false; } // Filter by minimum authority score if (options.minAuthorityScore !== undefined) { if (metadata.sourceAuthority < options.minAuthorityScore) return false; } return true; }); } /** * Search for documents using semantic similarity * @param query Natural language query * @param options Search configuration * @returns Search results with optimized response (no embeddings) */ async searchDocuments(query: string, options: SearchOptions = {}): Promise<SearchResult> { const startTime = Date.now(); try { log.debug('Executing semantic search query', { query: query.substring(0, 50), options }); // Generate embedding for the query const embeddingService = await this.ensureEmbeddingService(); const queryEmbedding = await embeddingService.embedQuery(query); // Search for similar chunks in the vector index const rawResults = await this.vectorIndex.searchSimilar( queryEmbedding, options.limit || 10, options.minScore || 0.0 ); const searchTime = Date.now() - startTime; const totalResults = rawResults.length; // Remove embedding arrays to save context window space const optimizedResults: DocumentChunkOptimized[] = rawResults.map(chunk => { const { embedding, ...chunkWithoutEmbedding } = chunk; return chunkWithoutEmbedding as DocumentChunkOptimized; }); // Calculate average score for recommendation analysis const averageScore = optimizedResults.length > 0 ? optimizedResults.reduce((sum, r) => sum + (r.score || 0), 0) / optimizedResults.length : 0; // Tokenize query for recommendation analysis const queryTerms = this.tokenizeQuery(query); // Check if search should trigger recommendation analysis const shouldAnalyzeRecommendations = this.learningAlgorithm.shouldAnalyzeForRecommendations( totalResults, averageScore, queryTerms.length ); let contextualRecommendations: SearchRecommendation | undefined = undefined; let nextSteps: string[] = []; // Generate contextual recommendations if needed (currently synchronous for simplicity) if (shouldAnalyzeRecommendations) { try { // Quick synchronous approximation - get cached recommendation if available const cachedRec = await this.vectorIndex.getRecommendation(query); if (cachedRec) { contextualRecommendations = cachedRec; log.debug('Using cached recommendation', { recommendationId: cachedRec.id, strategy: cachedRec.suggestionStrategy, confidence: cachedRec.confidence }); } } catch (error) { log.warn('Failed to check cached recommendations', error as any); } } // Generate next steps guidance for LLM workflow if (optimizedResults.length > 0) { const topResult = optimizedResults[0]; nextSteps.push( `For full content of top result: get_file_details({filePath: "${topResult.filePath}", chunkIndex: ${topResult.chunkIndex}})` ); if (optimizedResults.length > 1) { const uniqueFiles = [...new Set(optimizedResults.slice(0, 3).map(r => r.filePath))]; uniqueFiles.forEach(filePath => { nextSteps.push( `For all chunks in "${path.basename(filePath)}": get_file_details({filePath: "${filePath}"})` ); }); } // Add contextual recommendation next steps if (contextualRecommendations) { const rec = contextualRecommendations; const suggestedQuery = rec.suggestedTerms.join(' '); nextSteps.unshift( `Recommended refined search: "${suggestedQuery}" (TF-IDF refinement, confidence: ${(rec.confidence * 100).toFixed(1)}%)` ); } } log.debug('Search query completed', { query: query.substring(0, 50), totalResults, searchTime, topScore: optimizedResults[0]?.score || 0, recommendationAnalyzed: shouldAnalyzeRecommendations }); return { query, results: optimizedResults, totalResults, searchTime, options, nextSteps, recommendation: contextualRecommendations || undefined }; } catch (error: any) { log.error('Search query failed', error, { query: query.substring(0, 50), options }); // Return empty result set on error return { query, results: [], totalResults: 0, searchTime: Date.now() - startTime, options, nextSteps: [] }; } } /** * Get file details and content with context * @param filePath Absolute file path * @param chunkIndex Optional specific chunk index * @param contextSize Number of surrounding chunks to include (default 3) * @returns File chunks and content without embeddings */ async getFileDetails(filePath: string, chunkIndex?: number, contextSize: number = 3): Promise<DocumentChunkOptimized[]> { try { log.debug('Retrieving file details', { filePath, chunkIndex, contextSize }); if (chunkIndex !== undefined) { // Get specific chunk with surrounding context const allChunks = await this.vectorIndex.getFileChunks(filePath); if (allChunks.length === 0) { log.debug('No chunks found for file', { filePath }); return []; } // Find the target chunk const targetChunkIndex = allChunks.findIndex(chunk => chunk.chunkIndex === chunkIndex); if (targetChunkIndex === -1) { log.debug('Target chunk not found', { filePath, chunkIndex }); return []; } // Calculate context window const startIndex = Math.max(0, targetChunkIndex - contextSize); const endIndex = Math.min(allChunks.length - 1, targetChunkIndex + contextSize); // Get context chunks const contextChunks = allChunks.slice(startIndex, endIndex + 1); log.debug('Retrieved file details with context', { filePath, chunkIndex, totalChunks: allChunks.length, returnedChunks: contextChunks.length, contextWindow: [startIndex, endIndex] }); // Remove embeddings and return optimized chunks return contextChunks.map(chunk => { const { embedding, ...chunkWithoutEmbedding } = chunk; return chunkWithoutEmbedding as DocumentChunkOptimized; }); } else { // Get all chunks for the file (without embeddings) const chunks = await this.vectorIndex.getFileChunks(filePath); log.debug('Retrieved all file details', { filePath, totalChunks: chunks.length }); return chunks.map(chunk => { const { embedding, ...chunkWithoutEmbedding } = chunk; return chunkWithoutEmbedding as DocumentChunkOptimized; }); } } catch (error: any) { log.error('Failed to get file details', error, { filePath, chunkIndex }); return []; } } /** * Get index statistics * @returns Index statistics */ async getStatistics() { try { log.debug('Retrieving index statistics'); const stats = await this.vectorIndex.getStatistics(); log.debug('Index statistics retrieved', { totalChunks: stats.totalChunks, totalFiles: stats.totalFiles, totalTokens: stats.totalTokens }); return stats; } catch (error: any) { log.error('Failed to get statistics', error); return { totalChunks: 0, totalFiles: 0, totalTokens: 0, embeddingModel: 'unknown', lastUpdated: new Date(), dbSize: 0 }; } } /** * Check if embedding service is ready * @returns true if embedding service is initialized */ async isReady(): Promise<boolean> { try { await this.ensureEmbeddingService(); return true; } catch (error: any) { log.error('SearchService not ready', error); return false; } } /** * Get search service configuration */ getConfig(): SearchServiceConfig { return this.config; } /** * Get the vector index instance for administrative operations */ get vectorIndexInstance(): VectorIndex { return this.vectorIndex; } /** * Close all resources */ dispose(): void { try { log.debug('Disposing SearchService resources'); this.vectorIndex.close(); if (this.embeddingService) { // Note: Don't dispose the singleton EmbeddingService here this.embeddingService = null; } log.info('SearchService disposed'); } catch (error: any) { log.error('Error disposing SearchService resources', error); } } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PatrickRuddiman/local-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

SearchService.ts•19.4 KiB