DevContext

Overview Schema Related Servers Score Discussions

devcontext
src
services

retrieval.service.js•162 KiB

/** * RetrievalService * * Service for fetching various context components for conversation initialization * and other context-aware operations. This service will be expanded significantly * to support different types of context retrieval. */ import * as dbQueries from "../db/queries.js"; import { KEY_ARCHITECTURE_DOCUMENT_PATHS } from "../config.js"; import CompressionService from "./compression.service.js"; /** * RetrievalService class for handling context retrieval operations */ class RetrievalService { /** * Constructor for RetrievalService * @param {Object} dependencies - Service dependencies * @param {Object} dependencies.dbClient - Database client instance * @param {Object} dependencies.logger - Logger instance * @param {Object} dependencies.configService - Configuration service instance * @param {Object} dependencies.compressionService - Compression service instance * @param {Object} dependencies.relationshipManager - Relationship manager instance */ constructor({ dbClient, logger, configService, compressionService, relationshipManager, }) { this.dbClient = dbClient; this.logger = logger; this.configService = configService; this.compressionService = compressionService; this.relationshipManager = relationshipManager; // Log successful initialization this.logger.info("RetrievalService initialized successfully", { hasDbClient: !!this.dbClient, hasLogger: !!this.logger, hasConfigService: !!this.configService, hasCompressionService: !!this.compressionService, hasRelationshipManager: !!this.relationshipManager, }); } // =========================================== // PROJECT STRUCTURE SUMMARY METHODS // =========================================== /** * Retrieves and assembles the project structure summary * Calls all relevant DB query functions and transforms results into structured format * @returns {Promise<Object>} Project structure object with counts and summary */ async getProjectStructureSummary() { try { this.logger.debug("Starting project structure summary retrieval"); // Call all DB query functions for counts const [ langCounts, typeCounts, entityAiStatusCounts, docTypeCounts, docAiStatusCounts, relTypeCounts, ] = await Promise.all([ dbQueries.getCodeEntityCountsByLanguage(this.dbClient), dbQueries.getCodeEntityCountsByType(this.dbClient), dbQueries.getCodeEntityCountsByAiStatus(this.dbClient), dbQueries.getProjectDocumentCountsByType(this.dbClient), dbQueries.getProjectDocumentCountsByAiStatus(this.dbClient), dbQueries.getCodeRelationshipCountsByType(this.dbClient), ]); this.logger.debug("Retrieved all project structure counts", { langCountsLength: langCounts.length, typeCountsLength: typeCounts.length, entityAiStatusCountsLength: entityAiStatusCounts.length, docTypeCountsLength: docTypeCounts.length, docAiStatusCountsLength: docAiStatusCounts.length, relTypeCountsLength: relTypeCounts.length, }); // Transform arrays of objects into map/record format const entityCountsByLanguage = this._transformCountsToMap( langCounts, "language" ); const entityCountsByType = this._transformCountsToMap( typeCounts, "entity_type" ); const documentCountsByType = this._transformCountsToMap( docTypeCounts, "file_type" ); const relationshipTypeCounts = this._transformCountsToMap( relTypeCounts, "relationship_type" ); // Transform AI status counts for nested structure const entityAiStatus = this._transformCountsToMap( entityAiStatusCounts, "ai_status" ); const docAiStatus = this._transformCountsToMap( docAiStatusCounts, "ai_status" ); // Calculate totals for dynamic summary const totalEntities = this._sumCounts(entityCountsByLanguage); const totalDocuments = this._sumCounts(documentCountsByType); const totalRelationships = this._sumCounts(relationshipTypeCounts); const primaryLanguages = Object.keys(entityCountsByLanguage).slice(0, 3); // Top 3 languages // Generate dynamic summary const summary = this._generateProjectSummary({ totalEntities, totalDocuments, totalRelationships, primaryLanguages, }); // Construct the project structure object const projectStructure = { summary, entityCountsByLanguage, entityCountsByType, documentCountsByType, aiProcessingStatus: { codeEntities: entityAiStatus, projectDocuments: docAiStatus, }, relationshipTypeCounts, }; this.logger.info("Project structure summary assembled successfully", { totalEntities, totalDocuments, totalRelationships, languageCount: Object.keys(entityCountsByLanguage).length, documentTypeCount: Object.keys(documentCountsByType).length, }); return projectStructure; } catch (error) { this.logger.error("Error retrieving project structure summary", { error: error.message, stack: error.stack, }); // Return a fallback structure with error indication return { summary: "Error retrieving project structure. Some data may be unavailable.", entityCountsByLanguage: {}, entityCountsByType: {}, documentCountsByType: {}, aiProcessingStatus: { codeEntities: {}, projectDocuments: {}, }, relationshipTypeCounts: {}, error: error.message, }; } } /** * Retrieves and formats recent conversation topics for context * @param {string|null} initialQueryString - Optional initial query for filtering/prioritization * @returns {Promise<Object>} Object with topics array */ async getRecentConversationTopicsSummary(initialQueryString = null) { try { this.logger.debug("Retrieving recent conversation topics summary", { hasInitialQuery: !!initialQueryString, initialQueryLength: initialQueryString?.length || 0, }); // Define limit for topics const topicsLimit = 5; // Tokenize initialQueryString into terms if provided let initialQueryTerms = []; if (initialQueryString && typeof initialQueryString === "string") { initialQueryTerms = initialQueryString .toLowerCase() .split(/\s+/) .filter((term) => term.length > 2); // Filter out very short terms } // Fetch more topics if we need to filter, otherwise just fetch the limit const fetchLimit = initialQueryTerms.length > 0 ? topicsLimit * 2 : topicsLimit; // Call the database query function const recentTopicsFromDb = await dbQueries.getRecentConversationTopics( this.dbClient, fetchLimit, initialQueryTerms ); let selectedTopics = recentTopicsFromDb; // Optional filtering/prioritization if initialQueryString exists if (initialQueryTerms.length > 0 && recentTopicsFromDb.length > 0) { this.logger.debug("Applying relevance filtering for recent topics", { initialQueryTerms, topicsToFilter: recentTopicsFromDb.length, }); // Score topics based on relevance to initial query terms const scoredTopics = recentTopicsFromDb.map((topic) => { let relevanceScore = 0; // Check summary for matches (case-insensitive) if (topic.summary) { const summaryLower = topic.summary.toLowerCase(); for (const term of initialQueryTerms) { if (summaryLower.includes(term)) { relevanceScore += 2; // Higher weight for summary matches } } } // Check keywords for matches if (topic.keywords) { try { const keywordsArray = JSON.parse(topic.keywords); if (Array.isArray(keywordsArray)) { for (const keyword of keywordsArray) { const keywordLower = keyword.toLowerCase(); for (const term of initialQueryTerms) { if (keywordLower.includes(term)) { relevanceScore += 1; // Lower weight for keyword matches } } } } } catch (error) { // If keywords is not valid JSON, treat as string and search const keywordsLower = topic.keywords.toLowerCase(); for (const term of initialQueryTerms) { if (keywordsLower.includes(term)) { relevanceScore += 1; } } } } return { ...topic, relevanceScore, }; }); // Sort by relevance score (descending), then by recency (already sorted from DB) scoredTopics.sort((a, b) => { if (a.relevanceScore !== b.relevanceScore) { return b.relevanceScore - a.relevanceScore; // Higher score first } return 0; // Maintain original order (recency) for same scores }); // Take top topics after scoring selectedTopics = scoredTopics.slice(0, topicsLimit); this.logger.debug("Applied relevance filtering", { originalCount: recentTopicsFromDb.length, filteredCount: selectedTopics.length, hasRelevantTopics: selectedTopics.some((t) => t.relevanceScore > 0), }); } else { // No filtering needed, just take the limit selectedTopics = recentTopicsFromDb.slice(0, topicsLimit); } // Format the final selected topics const formattedTopics = selectedTopics.map((topic) => ({ topicId: topic.topicId, summary: topic.summary || "", purposeTag: topic.purposeTag || null, })); this.logger.info("Recent conversation topics summary retrieved", { topicsCount: formattedTopics.length, requestedLimit: topicsLimit, hadInitialQuery: !!initialQueryString, }); return { topics: formattedTopics, }; } catch (error) { this.logger.error("Error retrieving recent conversation topics summary", { error: error.message, stack: error.stack, initialQueryString, }); // Return empty topics array on error return { topics: [], }; } } /** * Helper method to transform array of count objects to map format * @param {Array} countsArray - Array of objects with key and count properties * @param {string} keyField - The field name to use as the map key * @returns {Object} Map with key -> count pairs */ _transformCountsToMap(countsArray, keyField) { const map = {}; if (Array.isArray(countsArray)) { for (const item of countsArray) { if (item[keyField] && typeof item.count === "number") { map[item[keyField]] = item.count; } } } return map; } /** * Helper method to sum all values in a counts map * @param {Object} countsMap - Map with count values * @returns {number} Total sum of all counts */ _sumCounts(countsMap) { return Object.values(countsMap).reduce((sum, count) => sum + count, 0); } /** * Helper method to generate dynamic project summary text * @param {Object} stats - Project statistics * @returns {string} Generated summary text */ _generateProjectSummary({ totalEntities, totalDocuments, totalRelationships, primaryLanguages, }) { const parts = []; if (totalEntities > 0) { parts.push(`${totalEntities} code entities`); } if (totalDocuments > 0) { parts.push(`${totalDocuments} documents`); } if (totalRelationships > 0) { parts.push(`${totalRelationships} relationships`); } if (primaryLanguages.length > 0) { parts.push(`Primary languages: ${primaryLanguages.join(", ")}`); } if (parts.length === 0) { return "Project context summary: No data available or project not yet analyzed."; } return `Project context summary: ${parts.join(", ")}.`; } // =========================================== // ARCHITECTURE CONTEXT METHODS // =========================================== /** * Retrieves and formats key architecture documents for context * Fetches documents defined in KEY_ARCHITECTURE_DOCUMENT_PATHS, prioritizing AI summaries * @returns {Promise<Object>} Object with keyDocuments array and optional overallProjectGoalHint */ async getArchitectureContextSummary() { try { this.logger.debug("Retrieving architecture context summary", { documentPathsCount: KEY_ARCHITECTURE_DOCUMENT_PATHS.length, documentPaths: KEY_ARCHITECTURE_DOCUMENT_PATHS, }); // Initialize array for key documents data const keyDocumentsData = []; let overallProjectGoalHint = null; // Iterate through each configured document path for (const docPath of KEY_ARCHITECTURE_DOCUMENT_PATHS) { try { this.logger.debug(`Fetching architecture document: ${docPath}`); // Fetch the document from the database const doc = await dbQueries.getProjectDocumentByFilePath( this.dbClient, docPath ); if (doc) { this.logger.debug(`Found architecture document: ${docPath}`, { documentId: doc.document_id, aiStatus: doc.ai_status, hasSummary: !!doc.summary, hasContent: !!doc.raw_content, }); // Determine summarySnippet based on AI status and available content let summarySnippet; if ( doc.ai_status === "completed" && doc.summary && doc.summary.trim() ) { // Use AI summary if available and completed summarySnippet = doc.summary.trim(); this.logger.debug(`Using AI summary for ${docPath}`); } else if (doc.raw_content && doc.raw_content.trim()) { // Use raw content snippet if AI summary not available const content = doc.raw_content.trim(); const maxSnippetLength = 500; if (content.length <= maxSnippetLength) { summarySnippet = content; } else { // Take first 500 characters and add ellipsis summarySnippet = content.substring(0, maxSnippetLength) + "..."; } this.logger.debug(`Using raw content snippet for ${docPath}`, { originalLength: content.length, snippetLength: summarySnippet.length, }); } else { // No content available summarySnippet = "Content not available or not summarized."; this.logger.debug(`No content available for ${docPath}`); } // Add formatted document data keyDocumentsData.push({ filePath: doc.file_path, aiStatus: doc.ai_status, summarySnippet, }); // Optional: Set overallProjectGoalHint from primary goal documents if ( !overallProjectGoalHint && (docPath === "README.md" || docPath === "docs/prd.md") && summarySnippet && summarySnippet !== "Content not available or not summarized." ) { // Use first part of the summary as goal hint (max 200 chars) const maxHintLength = 200; if (summarySnippet.length <= maxHintLength) { overallProjectGoalHint = summarySnippet; } else { overallProjectGoalHint = summarySnippet.substring(0, maxHintLength) + "..."; } this.logger.debug(`Set overallProjectGoalHint from ${docPath}`, { hintLength: overallProjectGoalHint.length, }); } } else { // Document not found in the database this.logger.debug( `Key architecture document not found: ${docPath}` ); // Optionally add an entry for missing documents keyDocumentsData.push({ filePath: docPath, aiStatus: "not_found", summarySnippet: "Document not found in index.", }); } } catch (docError) { this.logger.error( `Error fetching architecture document: ${docPath}`, { error: docError.message, stack: docError.stack, docPath, } ); // Add an error entry for this document keyDocumentsData.push({ filePath: docPath, aiStatus: "error", summarySnippet: `Error retrieving document: ${docError.message}`, }); } } this.logger.info("Architecture context summary retrieved successfully", { keyDocumentsCount: keyDocumentsData.length, documentsFound: keyDocumentsData.filter( (doc) => doc.aiStatus !== "not_found" && doc.aiStatus !== "error" ).length, hasProjectGoalHint: !!overallProjectGoalHint, }); // Return the assembled architecture context const result = { keyDocuments: keyDocumentsData, }; if (overallProjectGoalHint) { result.overallProjectGoalHint = overallProjectGoalHint; } return result; } catch (error) { this.logger.error("Error retrieving architecture context summary", { error: error.message, stack: error.stack, }); // Return fallback structure on error return { keyDocuments: [], error: error.message, }; } } // =========================================== // FTS QUERY PREPARATION METHODS // =========================================== /** * Prepares an FTS query string from natural language query text * Converts user input into a format suitable for SQLite FTS5 MATCH operations * @param {string} naturalLanguageQuery - The user's natural language query * @returns {string} FTS5-compatible query string * @private */ _prepareFtsQueryString(naturalLanguageQuery) { try { this.logger.debug("Preparing FTS query string", { originalQuery: naturalLanguageQuery, queryLength: naturalLanguageQuery?.length || 0, }); // Handle null, undefined, or empty queries if (!naturalLanguageQuery || typeof naturalLanguageQuery !== "string") { this.logger.debug("Invalid or empty query provided"); return ""; } // Convert to lowercase for consistent processing let processedQuery = naturalLanguageQuery.toLowerCase().trim(); // Basic tokenization - split by spaces and punctuation // Remove special characters but keep alphanumeric and basic punctuation let tokens = processedQuery .split(/[\s\.,;:!?\-\[\]{}'"]+/) .filter((token) => token.length > 0); this.logger.debug("Initial tokenization completed", { tokenCount: tokens.length, tokens: tokens.slice(0, 10), // Log first 10 tokens for debugging }); // Filter out very short tokens and common stop words const stopWords = new Set([ "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "has", "he", "in", "is", "it", "its", "of", "on", "that", "the", "to", "was", "will", "with", "the", "this", "that", "they", "them", "their", "what", "where", "when", "why", "how", "i", "you", "we", "me", "my", "your", "our", ]); const filteredTokens = tokens.filter((token) => { // Keep tokens that are: // - At least 2 characters long // - Not common stop words // - Contain at least one letter (to avoid pure punctuation) return ( token.length >= 2 && !stopWords.has(token) && /[a-zA-Z]/.test(token) ); }); this.logger.debug("Token filtering completed", { originalTokenCount: tokens.length, filteredTokenCount: filteredTokens.length, filteredTokens: filteredTokens, }); // If no valid tokens remain, return empty string if (filteredTokens.length === 0) { this.logger.debug("No valid tokens found after filtering"); return ""; } // Escape any special FTS5 characters in individual tokens const escapedTokens = filteredTokens.map((token) => { // Escape FTS5 special characters: " * ( ) [ ] { } ^ ~ - return token.replace(/["\*\[\]\{\}\^\~\-]/g, "\\$&"); }); // Join tokens with OR for broader matching in initial snippets // This casts a wider net to find relevant content const ftsQueryString = escapedTokens.join(" OR "); this.logger.debug("FTS query string preparation completed", { originalQuery: naturalLanguageQuery, finalQueryString: ftsQueryString, tokenCount: escapedTokens.length, }); return ftsQueryString; } catch (error) { this.logger.error("Error preparing FTS query string", { error: error.message, stack: error.stack, naturalLanguageQuery, }); // Return empty string on error to avoid breaking FTS queries return ""; } } // =========================================== // FTS SNIPPET RETRIEVAL METHODS // =========================================== /** * Retrieves FTS-based context snippets for an initial query * Orchestrates full-text search across code entities and project documents * @param {string} initialQueryString - The user's initial query text * @param {number} limit - Maximum number of snippets to return (default: 3) * @returns {Promise<Array>} Array of formatted snippet objects */ async getFtsSnippetsForInitialQuery(initialQueryString, limit = 3) { try { this.logger.debug("Starting FTS snippets retrieval for initial query", { initialQueryString, limit, queryLength: initialQueryString?.length || 0, }); // Initialize empty results array const initialQueryContextSnippets = []; // Validate input if ( !initialQueryString || typeof initialQueryString !== "string" || initialQueryString.trim() === "" ) { this.logger.debug( "No valid initial query provided, returning empty snippets" ); return initialQueryContextSnippets; } // Prepare FTS query string using the helper method from Task 166 const ftsQueryString = this._prepareFtsQueryString(initialQueryString); if (!ftsQueryString || ftsQueryString.trim() === "") { this.logger.debug( "No valid FTS query string generated, returning empty snippets" ); return initialQueryContextSnippets; } this.logger.debug("FTS query string prepared for snippet retrieval", { originalQuery: initialQueryString, ftsQueryString, limit, }); // Task 168: Perform FTS calls for code entities and documents let codeEntityHits = []; let documentHits = []; try { // Fetch more than the limit to allow for merging and re-ranking const ftsLimit = limit * 2; this.logger.debug("Executing FTS queries", { ftsQueryString, ftsLimit, }); // Execute FTS queries in parallel for better performance const [codeEntityResults, documentResults] = await Promise.allSettled([ dbQueries.searchCodeEntitiesFts( this.dbClient, ftsQueryString, ftsLimit ), dbQueries.searchProjectDocumentsFts( this.dbClient, ftsQueryString, ftsLimit ), ]); // Handle code entity FTS results if (codeEntityResults.status === "fulfilled") { codeEntityHits = codeEntityResults.value || []; this.logger.debug("Code entity FTS query completed successfully", { hitCount: codeEntityHits.length, ftsQueryString, }); } else { this.logger.error("Code entity FTS query failed", { error: codeEntityResults.reason?.message || "Unknown error", ftsQueryString, }); // Continue with empty results for code entities } // Handle document FTS results if (documentResults.status === "fulfilled") { documentHits = documentResults.value || []; this.logger.debug("Document FTS query completed successfully", { hitCount: documentHits.length, ftsQueryString, }); } else { this.logger.error("Document FTS query failed", { error: documentResults.reason?.message || "Unknown error", ftsQueryString, }); // Continue with empty results for documents } this.logger.info("FTS queries completed", { codeEntityHitsCount: codeEntityHits.length, documentHitsCount: documentHits.length, totalHits: codeEntityHits.length + documentHits.length, originalQuery: initialQueryString, }); // If both queries failed or returned no results, return empty array if (codeEntityHits.length === 0 && documentHits.length === 0) { this.logger.debug("No FTS hits found from either source", { ftsQueryString, originalQuery: initialQueryString, }); return initialQueryContextSnippets; } } catch (error) { this.logger.error("Unexpected error during FTS queries", { error: error.message, stack: error.stack, ftsQueryString, originalQuery: initialQueryString, }); // Return empty array on unexpected error return initialQueryContextSnippets; } // Task 169: Combine and rank FTS results let allHits = []; try { this.logger.debug("Starting to combine and rank FTS results", { codeEntityHitsCount: codeEntityHits.length, documentHitsCount: documentHits.length, }); // Transform code entity hits to common structure const transformedCodeEntityHits = codeEntityHits.map((hit) => ({ id: hit.entity_id, type: "code_entity", rank: hit.rank, ftsSnippet: hit.highlight_snippet, })); // Transform document hits to common structure const transformedDocumentHits = documentHits.map((hit) => ({ id: hit.document_id, type: "project_document", rank: hit.rank, ftsSnippet: hit.highlight_snippet, })); // Combine transformed hits into single array allHits = [...transformedCodeEntityHits, ...transformedDocumentHits]; this.logger.debug("FTS hits transformed and combined", { codeEntityTransformed: transformedCodeEntityHits.length, documentTransformed: transformedDocumentHits.length, totalCombined: allHits.length, }); // Sort by FTS rank - lower rank is better in SQLite FTS5 allHits.sort((a, b) => a.rank - b.rank); this.logger.info("FTS results combined and ranked successfully", { totalCombinedHits: allHits.length, topHitRank: allHits.length > 0 ? allHits[0].rank : null, topHitType: allHits.length > 0 ? allHits[0].type : null, rankRange: allHits.length > 0 ? { best: allHits[0].rank, worst: allHits[allHits.length - 1].rank, } : null, }); // Log sample of top hits for debugging if (allHits.length > 0) { const sampleSize = Math.min(3, allHits.length); const topHitsSample = allHits.slice(0, sampleSize).map((hit) => ({ id: hit.id, type: hit.type, rank: hit.rank, snippetPreview: hit.ftsSnippet?.substring(0, 50) + "...", })); this.logger.debug("Top FTS hits sample", { sampleSize, topHits: topHitsSample, }); } } catch (error) { this.logger.error("Error combining and ranking FTS results", { error: error.message, stack: error.stack, codeEntityHitsCount: codeEntityHits.length, documentHitsCount: documentHits.length, }); // Continue with empty allHits array allHits = []; } // If no hits after combining, return empty array if (allHits.length === 0) { this.logger.debug("No combined FTS hits available for processing", { originalQuery: initialQueryString, ftsQueryString, }); return initialQueryContextSnippets; } // Task 170: Fetch full records for top FTS hits let hydratedResults = []; try { // Take the top N hits (limit the number of results we process) const topHits = allHits.slice(0, limit); this.logger.debug("Starting to fetch full records for top FTS hits", { totalHitsAvailable: allHits.length, topHitsSelected: topHits.length, limit, }); // Process each top hit to fetch its full record for (let i = 0; i < topHits.length; i++) { const hit = topHits[i]; try { this.logger.debug( `Fetching full record for hit ${i + 1}/${topHits.length}`, { hitId: hit.id, hitType: hit.type, rank: hit.rank, } ); let record = null; // Fetch the appropriate record based on hit type if (hit.type === "code_entity") { record = await dbQueries.getCodeEntityById(this.dbClient, hit.id); } else if (hit.type === "project_document") { record = await dbQueries.getProjectDocumentById( this.dbClient, hit.id ); } else { this.logger.error("Unknown hit type encountered", { hitId: hit.id, hitType: hit.type, rank: hit.rank, }); continue; // Skip this hit } // Check if record was found if (record) { // Successfully fetched record, add to hydrated results hydratedResults.push({ hit, record, ftsSnippet: hit.ftsSnippet, }); this.logger.debug("Successfully fetched full record", { hitId: hit.id, hitType: hit.type, recordFound: true, hasFilePath: !!record.file_path, hasAiStatus: !!record.ai_status, }); } else { // Record not found - this indicates FTS data inconsistency this.logger.warn("FTS hit points to non-existent record", { hitId: hit.id, hitType: hit.type, rank: hit.rank, message: "FTS index may be out of sync with main tables", }); // Skip this hit as instructed } } catch (recordError) { this.logger.error("Error fetching full record for FTS hit", { error: recordError.message, stack: recordError.stack, hitId: hit.id, hitType: hit.type, rank: hit.rank, }); // Skip this hit on error } } this.logger.info("Full record fetching completed", { topHitsProcessed: topHits.length, recordsSuccessfullyFetched: hydratedResults.length, recordsFailed: topHits.length - hydratedResults.length, successRate: topHits.length > 0 ? Math.round((hydratedResults.length / topHits.length) * 100) : 0, }); // Log sample of successfully hydrated results if (hydratedResults.length > 0) { const sampleSize = Math.min(2, hydratedResults.length); const hydratedSample = hydratedResults .slice(0, sampleSize) .map((result) => ({ hitId: result.hit.id, hitType: result.hit.type, rank: result.hit.rank, filePath: result.record.file_path, aiStatus: result.record.ai_status, hasContent: !!result.record.raw_content, hasSummary: !!result.record.summary, })); this.logger.debug("Sample of hydrated results", { sampleSize, hydratedSample, }); } } catch (error) { this.logger.error("Error during full record fetching process", { error: error.message, stack: error.stack, allHitsCount: allHits.length, limit, }); // Continue with empty hydrated results hydratedResults = []; } // If no records were successfully fetched, return empty array if (hydratedResults.length === 0) { this.logger.debug("No full records successfully fetched", { topHitsAttempted: Math.min(allHits.length, limit), originalQuery: initialQueryString, }); return initialQueryContextSnippets; } // Task 171: Format final snippets for initialQueryContextSnippets try { this.logger.debug("Starting to format final snippets", { hydratedResultsCount: hydratedResults.length, originalQuery: initialQueryString, }); // Process each hydrated result into the final snippet format for (let i = 0; i < hydratedResults.length; i++) { const { hit, record, ftsSnippet } = hydratedResults[i]; try { this.logger.debug( `Formatting snippet ${i + 1}/${hydratedResults.length}`, { hitId: hit.id, hitType: hit.type, filePath: record.file_path, aiStatus: record.ai_status, } ); // Construct snippet object as per Story 3.5 schema const snippetObject = { filePath: record.file_path, type: hit.type === "code_entity" ? record.entity_type : record.file_type, aiStatus: record.ai_status, }; // Add entityName if it's a code entity with a name if (hit.type === "code_entity" && record.name) { snippetObject.entityName = record.name; } // Determine the best snippet content (priority: AI summary → FTS snippet → raw content) let snippetContent = null; if ( record.ai_status === "completed" && record.summary && record.summary.trim() ) { // Use AI summary if available and completed snippetContent = record.summary.trim(); this.logger.debug(`Using AI summary for snippet ${i + 1}`, { summaryLength: snippetContent.length, }); } else if (ftsSnippet && ftsSnippet.trim()) { // Use FTS highlighted snippet snippetContent = ftsSnippet.trim(); this.logger.debug(`Using FTS snippet for snippet ${i + 1}`, { ftsSnippetLength: snippetContent.length, }); } else if (record.raw_content && record.raw_content.trim()) { // Fallback to truncated raw content const rawContent = record.raw_content.trim(); const maxFallbackLength = 300; if (rawContent.length <= maxFallbackLength) { snippetContent = rawContent; } else { snippetContent = rawContent.substring(0, maxFallbackLength) + "..."; } this.logger.debug( `Using raw content fallback for snippet ${i + 1}`, { originalLength: rawContent.length, truncatedLength: snippetContent.length, } ); } else { // No usable content available snippetContent = "No content available for this result."; this.logger.warn(`No usable content for snippet ${i + 1}`, { hitId: hit.id, hitType: hit.type, hasAiSummary: !!record.summary, hasFtsSnippet: !!ftsSnippet, hasRawContent: !!record.raw_content, }); } // Add the snippet content to the object snippetObject.snippet = snippetContent; // Add the formatted snippet to the final array initialQueryContextSnippets.push(snippetObject); this.logger.debug(`Successfully formatted snippet ${i + 1}`, { filePath: snippetObject.filePath, type: snippetObject.type, hasEntityName: !!snippetObject.entityName, snippetLength: snippetContent.length, aiStatus: snippetObject.aiStatus, }); } catch (snippetError) { this.logger.error(`Error formatting snippet ${i + 1}`, { error: snippetError.message, stack: snippetError.stack, hitId: hit.id, hitType: hit.type, }); // Skip this snippet on error, continue with others } } this.logger.info("Final snippet formatting completed", { totalHydratedResults: hydratedResults.length, successfullyFormattedSnippets: initialQueryContextSnippets.length, formattingSuccessRate: hydratedResults.length > 0 ? Math.round( (initialQueryContextSnippets.length / hydratedResults.length) * 100 ) : 0, originalQuery: initialQueryString, }); // Log sample of formatted snippets for debugging if (initialQueryContextSnippets.length > 0) { const sampleSize = Math.min(2, initialQueryContextSnippets.length); const snippetSample = initialQueryContextSnippets .slice(0, sampleSize) .map((snippet) => ({ filePath: snippet.filePath, type: snippet.type, aiStatus: snippet.aiStatus, hasEntityName: !!snippet.entityName, snippetPreview: snippet.snippet?.substring(0, 100) + "...", })); this.logger.debug("Sample of formatted snippets", { sampleSize, snippetSample, }); } } catch (error) { this.logger.error("Error during final snippet formatting", { error: error.message, stack: error.stack, hydratedResultsCount: hydratedResults.length, originalQuery: initialQueryString, }); // Continue with whatever snippets were successfully formatted // initialQueryContextSnippets will contain partial results } this.logger.info("FTS snippets retrieval completed successfully", { queryProvided: !!initialQueryString, snippetsCount: initialQueryContextSnippets.length, limit, }); return initialQueryContextSnippets; } catch (error) { this.logger.error("Error retrieving FTS snippets for initial query", { error: error.message, stack: error.stack, initialQueryString, limit, }); // Return empty array on error to avoid breaking the context initialization return []; } } // =========================================== // CONTEXT RETRIEVAL METHODS // =========================================== /** * Retrieves relevant context snippets based on a query within a conversation session * This is a stub implementation for Story 4.1 - will be expanded in subsequent stories * @param {string} query - The agent's query for context * @param {string} conversationId - The active conversation session ID * @param {number} tokenBudget - Maximum desired token count for returned snippets * @param {Object} retrievalParameters - Additional retrieval parameters * @returns {Promise<Object>} Object with contextSnippets and retrievalSummary */ async getRelevantContext( query, conversationId, tokenBudget, retrievalParameters ) { // Constants for result limits const MAX_FTS_CANDIDATES_PER_SOURCE = 20; const MAX_KEYWORD_CANDIDATES = 20; const MAX_CONVO_HISTORY_CANDIDATES = 10; // New constant for conversation history limit const MAX_CONVO_TOPIC_CANDIDATES = 5; // New constant for conversation topics limit const MAX_GIT_COMMIT_CANDIDATES = 10; // New constant for Git commit search limit const MAX_GIT_FILE_CHANGE_CANDIDATES = 15; // New constant for Git file change search limit this.logger.debug("getRelevantContext invoked", { query: query, conversationId: conversationId, tokenBudget: tokenBudget, hasRetrievalParameters: !!retrievalParameters, retrievalParameters: retrievalParameters, }); try { // Step 1: Get processed search terms using the helper method from Task 182 const searchTerms = this._getSearchTerms(query); this.logger.debug("Processed search terms from query", { originalQuery: query, searchTerms: searchTerms, searchTermsCount: searchTerms.length, }); // Step 2: Prepare FTS query string from search terms // Using the existing _prepareFtsQueryString method, but we need to join searchTerms // The method expects a natural language query, so we'll reconstruct it from searchTerms const reconstructedQuery = searchTerms.join(" "); const ftsQueryString = this._prepareFtsQueryString(reconstructedQuery); this.logger.debug("Prepared FTS query string", { reconstructedQuery: reconstructedQuery, ftsQueryString: ftsQueryString, }); // Step 3: Perform FTS search on code_entities_fts let codeEntityHits = []; if (ftsQueryString && ftsQueryString.trim() !== "") { try { codeEntityHits = await dbQueries.searchCodeEntitiesFts( this.dbClient, ftsQueryString, MAX_FTS_CANDIDATES_PER_SOURCE ); this.logger.debug("FTS search completed for code entities", { ftsQueryString: ftsQueryString, rawHitsCount: codeEntityHits.length, limit: MAX_FTS_CANDIDATES_PER_SOURCE, }); // Task 241: INFO level logging for stage completion this.logger.info( `Retrieval: FTS search complete, ${codeEntityHits.length} code entity candidates.`, { conversationId: conversationId, stage: "fts_code_entities", candidatesFound: codeEntityHits.length, } ); } catch (ftsError) { this.logger.error("Error during FTS search on code_entities_fts", { error: ftsError.message, stack: ftsError.stack, ftsQueryString: ftsQueryString, }); // Initialize to empty array on error codeEntityHits = []; } } else { this.logger.debug( "FTS query string is empty, skipping code entities FTS search", { originalQuery: query, searchTerms: searchTerms, } ); codeEntityHits = []; } // Step 4: Perform FTS search on project_documents_fts let documentHits = []; if (ftsQueryString && ftsQueryString.trim() !== "") { try { documentHits = await dbQueries.searchProjectDocumentsFts( this.dbClient, ftsQueryString, MAX_FTS_CANDIDATES_PER_SOURCE ); this.logger.debug("FTS search completed for project documents", { ftsQueryString: ftsQueryString, rawHitsCount: documentHits.length, limit: MAX_FTS_CANDIDATES_PER_SOURCE, }); // Task 241: INFO level logging for stage completion this.logger.info( `Retrieval: Document FTS search complete, ${documentHits.length} document candidates.`, { conversationId: conversationId, stage: "fts_documents", candidatesFound: documentHits.length, } ); } catch (ftsError) { this.logger.error( "Error during FTS search on project_documents_fts", { error: ftsError.message, stack: ftsError.stack, ftsQueryString: ftsQueryString, } ); // Initialize to empty array on error documentHits = []; } } else { this.logger.debug( "FTS query string is empty, skipping project documents FTS search", { originalQuery: query, searchTerms: searchTerms, } ); documentHits = []; } // Step 5: Perform keyword search on entity_keywords table let keywordMatchedEntities = []; if (searchTerms && searchTerms.length > 0) { try { keywordMatchedEntities = await dbQueries.searchEntityKeywords( this.dbClient, searchTerms, MAX_KEYWORD_CANDIDATES ); this.logger.debug("Keyword search completed on entity_keywords", { searchTerms: searchTerms, entityIdsFound: keywordMatchedEntities.length, limit: MAX_KEYWORD_CANDIDATES, }); // Task 241: INFO level logging for stage completion this.logger.info( `Retrieval: Keyword search complete, ${keywordMatchedEntities.length} candidates.`, { conversationId: conversationId, stage: "keyword_search", candidatesFound: keywordMatchedEntities.length, } ); } catch (keywordError) { this.logger.error("Error during keyword search on entity_keywords", { error: keywordError.message, stack: keywordError.stack, searchTerms: searchTerms, }); // Initialize to empty array on error keywordMatchedEntities = []; } } else { this.logger.debug( "No search terms available, skipping keyword search", { originalQuery: query, searchTerms: searchTerms, } ); keywordMatchedEntities = []; } // Step 6: Fetch full records for code entities from FTS and keyword search hits const retrievedCodeEntities = {}; try { this.logger.debug("Starting to fetch full code entity records", { codeEntityHitsCount: codeEntityHits.length, keywordMatchedEntitiesCount: keywordMatchedEntities.length, }); // Collect all unique entity_id values const uniqueEntityIds = new Set(); // Add entity IDs from FTS code entity hits for (const hit of codeEntityHits) { if (hit.entity_id) { uniqueEntityIds.add(hit.entity_id); } } // Add entity IDs from keyword matches (these could be code entities or documents) // For this task, we assume they could be code entities and try fetching them for (const match of keywordMatchedEntities) { if (match.entity_id) { uniqueEntityIds.add(match.entity_id); } } this.logger.debug( "Collected unique entity IDs for code entity fetching", { uniqueEntityIdsCount: uniqueEntityIds.size, fromCodeEntityHits: codeEntityHits.length, fromKeywordMatches: keywordMatchedEntities.length, } ); // Fetch full code entity records for each unique ID let successfullyFetchedCount = 0; for (const entityId of uniqueEntityIds) { try { const entityRecord = await dbQueries.getCodeEntityById( this.dbClient, entityId ); if (entityRecord) { // Successfully fetched, store in the map retrievedCodeEntities[entityId] = entityRecord; successfullyFetchedCount++; this.logger.debug("Successfully fetched code entity record", { entityId: entityId, entityName: entityRecord.name, filePath: entityRecord.file_path, entityType: entityRecord.entity_type, language: entityRecord.language, }); } else { // Entity ID doesn't correspond to a code entity (could be a document ID from keywords) this.logger.debug("Entity ID not found in code_entities table", { entityId: entityId, note: "This could be a project document ID from keyword search", }); } } catch (fetchError) { this.logger.error("Error fetching code entity record", { error: fetchError.message, stack: fetchError.stack, entityId: entityId, }); // Skip this entity on error } } this.logger.info("Code entity record fetching completed", { uniqueEntityIdsProcessed: uniqueEntityIds.size, codeEntitiesSuccessfullyFetched: successfullyFetchedCount, fetchSuccessRate: uniqueEntityIds.size > 0 ? Math.round( (successfullyFetchedCount / uniqueEntityIds.size) * 100 ) : 0, }); } catch (error) { this.logger.error("Error during code entity record fetching", { error: error.message, stack: error.stack, codeEntityHitsCount: codeEntityHits.length, keywordMatchedEntitiesCount: keywordMatchedEntities.length, }); // Continue with empty retrievedCodeEntities map } // Step 7: Fetch full records for project documents from FTS and keyword search hits const retrievedProjectDocuments = {}; try { this.logger.debug("Starting to fetch full project document records", { documentHitsCount: documentHits.length, keywordMatchedEntitiesCount: keywordMatchedEntities.length, }); // Collect all unique document_id values const uniqueDocumentIds = new Set(); // Add document IDs from FTS document hits for (const hit of documentHits) { if (hit.document_id) { uniqueDocumentIds.add(hit.document_id); } } // Add entity IDs from keyword matches that weren't found as code entities // These could potentially be project document IDs for (const match of keywordMatchedEntities) { if (match.entity_id && !retrievedCodeEntities[match.entity_id]) { // Only try fetching as document if it wasn't found as a code entity uniqueDocumentIds.add(match.entity_id); } } this.logger.debug( "Collected unique document IDs for project document fetching", { uniqueDocumentIdsCount: uniqueDocumentIds.size, fromDocumentHits: documentHits.length, fromKeywordMatches: keywordMatchedEntities.filter( (match) => !retrievedCodeEntities[match.entity_id] ).length, } ); // Fetch full project document records for each unique ID let successfullyFetchedCount = 0; for (const documentId of uniqueDocumentIds) { try { const documentRecord = await dbQueries.getProjectDocumentById( this.dbClient, documentId ); if (documentRecord) { // Successfully fetched, store in the map retrievedProjectDocuments[documentId] = documentRecord; successfullyFetchedCount++; this.logger.debug( "Successfully fetched project document record", { documentId: documentId, filePath: documentRecord.file_path, fileType: documentRecord.file_type, aiStatus: documentRecord.ai_status, } ); } else { // Document ID not found in project_documents table this.logger.debug( "Document ID not found in project_documents table", { documentId: documentId, note: "This ID may not correspond to a valid project document", } ); } } catch (fetchError) { this.logger.error("Error fetching project document record", { error: fetchError.message, stack: fetchError.stack, documentId: documentId, }); // Skip this document on error } } this.logger.info("Project document record fetching completed", { uniqueDocumentIdsProcessed: uniqueDocumentIds.size, projectDocumentsSuccessfullyFetched: successfullyFetchedCount, fetchSuccessRate: uniqueDocumentIds.size > 0 ? Math.round( (successfullyFetchedCount / uniqueDocumentIds.size) * 100 ) : 0, }); } catch (error) { this.logger.error("Error during project document record fetching", { error: error.message, stack: error.stack, documentHitsCount: documentHits.length, keywordMatchedEntitiesCount: keywordMatchedEntities.length, }); // Continue with empty retrievedProjectDocuments map } // Step 8: Construct candidate code entity snippets from FTS/Keyword results /** * @typedef {Object} CandidateSnippet * @property {string} id - Unique ID of the source item (e.g., entity_id, document_id, message_id, commit_hash, composite IDs) * @property {'code_entity_fts' | 'code_entity_keyword' | 'project_document_fts' | 'project_document_keyword' | 'conversation_message' | 'conversation_topic' | 'git_commit' | 'git_commit_file_change' | 'code_entity_related'} sourceType - The origin/source type of this snippet * @property {string} contentSnippet - The actual text content to be potentially shown to the agent (AI summary, FTS highlight, raw content excerpt, etc.) * @property {number} initialScore - Relevance score from its source retrieval (e.g., FTS rank-based score, keyword match score, conversation relevance, Git relevance) * @property {string} [filePath] - File path if applicable (for code entities, project documents, Git file changes) * @property {string} [entityName] - Name of the code entity, if applicable (function name, class name, etc.) * @property {string} [entityType] - Type of entity or document (e.g., 'function_declaration', 'class_definition', 'markdown', 'javascript', etc.) * @property {string} [language] - Programming language, if code entity (e.g., 'javascript', 'python', 'typescript') * @property {string} [aiStatus] - AI processing status ('pending', 'completed', 'failed', etc.), if applicable * @property {string} [timestamp] - Timestamp for time-sensitive items like conversation messages or Git commits (ISO string format) * @property {Object} [metadata] - Source-specific metadata object containing additional context * @property {string} [metadata.role] - Message role for conversation messages ('user', 'assistant', 'system') * @property {string} [metadata.conversationId] - Conversation ID for conversation messages * @property {string} [metadata.purposeTag] - Purpose tag for conversation topics * @property {string[]} [metadata.keywords] - Parsed keywords array for conversation topics * @property {string} [metadata.commitHash] - Git commit hash for Git-related snippets * @property {string} [metadata.authorName] - Git commit author name * @property {string} [metadata.commitDate] - Git commit date (ISO string) * @property {string} [metadata.status] - Git file change status ('added', 'modified', 'deleted') * @property {string} [metadata.commitMessage] - Full Git commit message for file changes * @property {string} [metadata.commitAuthor] - Git commit author for file changes * @property {Object} [relationshipContext] - For snippets from relationship expansion (future Story 4.7) - contains relationship type and context * @property {number} [consolidatedScore] - Final calculated score after applying ranking factors (populated by ranking logic in Story 4.5) */ /** @type {CandidateSnippet[]} */ const candidateSnippets = []; const processedEntityIds = new Set(); // Track processed entities to avoid duplicates try { this.logger.debug( "Starting to construct candidate code entity snippets", { codeEntityHitsCount: codeEntityHits.length, keywordMatchedEntitiesCount: keywordMatchedEntities.length, retrievedCodeEntitiesCount: Object.keys(retrievedCodeEntities) .length, } ); // Helper function to calculate score from FTS rank (lower rank = higher score) const calculateScoreFromFtsRank = (rank) => { // FTS rank is lower for better matches, so invert it to a 0-1 score // Use a logarithmic scale to differentiate between ranks return Math.max(0, 1 - Math.log(rank + 1) / 10); }; // Helper function to calculate score from keyword matches const calculateScoreFromKeywordMatches = (totalWeight, matchCount) => { // Combine match count and total weight for scoring // Normalize to a 0-1 scale similar to FTS scores const weightScore = Math.min(totalWeight / 10, 1); // Cap at 1 const countScore = Math.min(matchCount / 5, 1); // Cap at 1 return (weightScore + countScore) / 2; // Average the two components }; // Helper function to determine content snippet const determineContentSnippet = (entityRecord, ftsHighlight = null) => { // Priority: AI summary → FTS highlight → truncated raw content if ( entityRecord.ai_status === "completed" && entityRecord.summary && entityRecord.summary.trim() ) { return entityRecord.summary.trim(); } if (ftsHighlight && ftsHighlight.trim()) { return ftsHighlight.trim(); } if (entityRecord.raw_content && entityRecord.raw_content.trim()) { const rawContent = entityRecord.raw_content.trim(); const maxLength = 300; if (rawContent.length <= maxLength) { return rawContent; } return rawContent.substring(0, maxLength) + "..."; } return "No content available for this code entity."; }; // Process FTS code entity hits for (const hit of codeEntityHits) { if (!hit.entity_id) continue; const entityRecord = retrievedCodeEntities[hit.entity_id]; if (!entityRecord) { this.logger.debug("Skipping FTS hit - entity record not found", { entityId: hit.entity_id, rank: hit.rank, }); continue; } // Determine content snippet const contentSnippet = determineContentSnippet( entityRecord, hit.highlight_snippet ); // Calculate initial score from FTS rank const initialScore = calculateScoreFromFtsRank(hit.rank); // Create candidate snippet object const candidateSnippet = { sourceType: "code_entity_fts", id: entityRecord.entity_id, filePath: entityRecord.file_path, entityName: entityRecord.name || null, entityType: entityRecord.entity_type, language: entityRecord.language, aiStatus: entityRecord.ai_status, contentSnippet: contentSnippet, initialScore: initialScore, }; candidateSnippets.push(candidateSnippet); processedEntityIds.add(hit.entity_id); this.logger.debug("Added FTS code entity candidate snippet", { entityId: entityRecord.entity_id, entityName: entityRecord.name, sourceType: "code_entity_fts", initialScore: initialScore, contentSnippetLength: contentSnippet.length, }); } // Process keyword matched entities that are code entities for (const match of keywordMatchedEntities) { if (!match.entity_id) continue; const entityRecord = retrievedCodeEntities[match.entity_id]; if (!entityRecord) { // This entity ID wasn't found as a code entity (likely a document ID) continue; } // Check if we already processed this entity from FTS if (processedEntityIds.has(match.entity_id)) { this.logger.debug( "Skipping keyword match - entity already processed from FTS", { entityId: match.entity_id, entityName: entityRecord.name, } ); continue; } // Determine content snippet (no FTS highlight for keyword matches) const contentSnippet = determineContentSnippet(entityRecord); // Calculate initial score from keyword match data const initialScore = calculateScoreFromKeywordMatches( match.total_weight, match.match_count ); // Create candidate snippet object const candidateSnippet = { sourceType: "code_entity_keyword", id: entityRecord.entity_id, filePath: entityRecord.file_path, entityName: entityRecord.name || null, entityType: entityRecord.entity_type, language: entityRecord.language, aiStatus: entityRecord.ai_status, contentSnippet: contentSnippet, initialScore: initialScore, }; candidateSnippets.push(candidateSnippet); processedEntityIds.add(match.entity_id); this.logger.debug("Added keyword code entity candidate snippet", { entityId: entityRecord.entity_id, entityName: entityRecord.name, sourceType: "code_entity_keyword", initialScore: initialScore, totalWeight: match.total_weight, matchCount: match.match_count, contentSnippetLength: contentSnippet.length, }); } // Filter candidate snippets to only code entities const codeEntityCandidateSnippets = candidateSnippets.filter( (snippet) => snippet.sourceType === "code_entity_fts" || snippet.sourceType === "code_entity_keyword" ); this.logger.info( "Code entity candidate snippets construction completed", { codeEntityFtsSnippets: candidateSnippets.filter( (s) => s.sourceType === "code_entity_fts" ).length, codeEntityKeywordSnippets: candidateSnippets.filter( (s) => s.sourceType === "code_entity_keyword" ).length, totalCodeEntitySnippets: codeEntityCandidateSnippets.length, processedEntityIds: processedEntityIds.size, } ); } catch (error) { this.logger.error( "Error during code entity candidate snippets construction", { error: error.message, stack: error.stack, codeEntityHitsCount: codeEntityHits.length, keywordMatchedEntitiesCount: keywordMatchedEntities.length, } ); // Continue with whatever snippets were successfully constructed } // Step 9: Construct candidate project document snippets from FTS/Keyword results const processedDocumentIds = new Set(); // Track processed documents to avoid duplicates try { this.logger.debug( "Starting to construct candidate project document snippets", { documentHitsCount: documentHits.length, keywordMatchedEntitiesCount: keywordMatchedEntities.length, retrievedProjectDocumentsCount: Object.keys( retrievedProjectDocuments ).length, } ); // Helper function to determine content snippet for documents const determineDocumentContentSnippet = ( documentRecord, ftsHighlight = null ) => { // Priority: AI summary → FTS highlight → truncated raw content if ( documentRecord.ai_status === "completed" && documentRecord.summary && documentRecord.summary.trim() ) { return documentRecord.summary.trim(); } if (ftsHighlight && ftsHighlight.trim()) { return ftsHighlight.trim(); } if (documentRecord.raw_content && documentRecord.raw_content.trim()) { const rawContent = documentRecord.raw_content.trim(); const maxLength = 300; if (rawContent.length <= maxLength) { return rawContent; } return rawContent.substring(0, maxLength) + "..."; } return "No content available for this project document."; }; // Process FTS document hits for (const hit of documentHits) { if (!hit.document_id) continue; const documentRecord = retrievedProjectDocuments[hit.document_id]; if (!documentRecord) { this.logger.debug( "Skipping FTS document hit - document record not found", { documentId: hit.document_id, rank: hit.rank, } ); continue; } // Determine content snippet const contentSnippet = determineDocumentContentSnippet( documentRecord, hit.highlight_snippet ); // Calculate initial score from FTS rank (reuse the same function as code entities) const initialScore = calculateScoreFromFtsRank(hit.rank); // Create candidate snippet object const candidateSnippet = { sourceType: "project_document_fts", id: documentRecord.document_id, filePath: documentRecord.file_path, entityType: documentRecord.file_type, // Using entityType for consistency aiStatus: documentRecord.ai_status, contentSnippet: contentSnippet, initialScore: initialScore, }; candidateSnippets.push(candidateSnippet); processedDocumentIds.add(hit.document_id); this.logger.debug("Added FTS project document candidate snippet", { documentId: documentRecord.document_id, filePath: documentRecord.file_path, sourceType: "project_document_fts", initialScore: initialScore, contentSnippetLength: contentSnippet.length, }); } // Process keyword matched entities that are project documents for (const match of keywordMatchedEntities) { if (!match.entity_id) continue; const documentRecord = retrievedProjectDocuments[match.entity_id]; if (!documentRecord) { // This entity ID wasn't found as a project document (likely a code entity ID) continue; } // Check if we already processed this document from FTS if (processedDocumentIds.has(match.entity_id)) { this.logger.debug( "Skipping keyword match - document already processed from FTS", { documentId: match.entity_id, filePath: documentRecord.file_path, } ); continue; } // Determine content snippet (no FTS highlight for keyword matches) const contentSnippet = determineDocumentContentSnippet(documentRecord); // Calculate initial score from keyword match data (reuse the same function as code entities) const initialScore = calculateScoreFromKeywordMatches( match.total_weight, match.match_count ); // Create candidate snippet object const candidateSnippet = { sourceType: "project_document_keyword", id: documentRecord.document_id, filePath: documentRecord.file_path, entityType: documentRecord.file_type, // Using entityType for consistency aiStatus: documentRecord.ai_status, contentSnippet: contentSnippet, initialScore: initialScore, }; candidateSnippets.push(candidateSnippet); processedDocumentIds.add(match.entity_id); this.logger.debug( "Added keyword project document candidate snippet", { documentId: documentRecord.document_id, filePath: documentRecord.file_path, sourceType: "project_document_keyword", initialScore: initialScore, totalWeight: match.total_weight, matchCount: match.match_count, contentSnippetLength: contentSnippet.length, } ); } // Filter candidate snippets to only project documents const documentCandidateSnippets = candidateSnippets.filter( (snippet) => snippet.sourceType === "project_document_fts" || snippet.sourceType === "project_document_keyword" ); this.logger.info( "Project document candidate snippets construction completed", { projectDocumentFtsSnippets: candidateSnippets.filter( (s) => s.sourceType === "project_document_fts" ).length, projectDocumentKeywordSnippets: candidateSnippets.filter( (s) => s.sourceType === "project_document_keyword" ).length, totalProjectDocumentSnippets: documentCandidateSnippets.length, processedDocumentIds: processedDocumentIds.size, } ); } catch (error) { this.logger.error( "Error during project document candidate snippets construction", { error: error.message, stack: error.stack, documentHitsCount: documentHits.length, keywordMatchedEntitiesCount: keywordMatchedEntities.length, } ); // Continue with whatever snippets were successfully constructed } // Step 10: Search for relevant conversation history let matchedMessages = []; try { this.logger.debug("Starting conversation history search", { conversationId: conversationId, searchTermsCount: searchTerms.length, searchTerms: searchTerms, limit: MAX_CONVO_HISTORY_CANDIDATES, }); if (searchTerms && searchTerms.length > 0) { matchedMessages = await dbQueries.searchConversationHistoryByTerms( this.dbClient, conversationId, searchTerms, MAX_CONVO_HISTORY_CANDIDATES ); this.logger.debug("Conversation history search completed", { conversationId: conversationId, messagesFound: matchedMessages.length, searchTermsCount: searchTerms.length, limit: MAX_CONVO_HISTORY_CANDIDATES, }); // Task 241: INFO level logging for stage completion this.logger.info( `Retrieval: Conversation history search complete, ${matchedMessages.length} candidates.`, { conversationId: conversationId, stage: "conversation_history", candidatesFound: matchedMessages.length, } ); } else { this.logger.debug( "No search terms available for conversation history search", { conversationId: conversationId, } ); } } catch (error) { this.logger.error("Error searching conversation history", { error: error.message, stack: error.stack, conversationId: conversationId, searchTermsCount: searchTerms?.length || 0, }); // Continue with empty matched messages array matchedMessages = []; } // Step 11: Search for relevant conversation topics let matchedTopics = []; try { this.logger.debug("Starting conversation topics search", { conversationId: conversationId, searchTermsCount: searchTerms.length, searchTerms: searchTerms, limit: MAX_CONVO_TOPIC_CANDIDATES, }); if (searchTerms && searchTerms.length > 0) { matchedTopics = await dbQueries.searchConversationTopicsByTerms( this.dbClient, searchTerms, MAX_CONVO_TOPIC_CANDIDATES ); this.logger.debug("Conversation topics search completed", { conversationId: conversationId, topicsFound: matchedTopics.length, searchTermsCount: searchTerms.length, limit: MAX_CONVO_TOPIC_CANDIDATES, }); // Task 241: INFO level logging for stage completion this.logger.info( `Retrieval: Conversation topics search complete, ${matchedTopics.length} candidates.`, { conversationId: conversationId, stage: "conversation_topics", candidatesFound: matchedTopics.length, } ); } else { this.logger.debug( "No search terms available for conversation topics search", { conversationId: conversationId, } ); } } catch (error) { this.logger.error("Error searching conversation topics", { error: error.message, stack: error.stack, conversationId: conversationId, searchTermsCount: searchTerms?.length || 0, }); // Continue with empty matched topics array matchedTopics = []; } // Step 12: Construct candidate snippets from matched conversation history messages try { this.logger.debug( "Starting to construct conversation message snippets", { conversationId: conversationId, matchedMessagesCount: matchedMessages.length, } ); // Helper function to calculate score for conversation messages const calculateScoreForMessage = ( message, queryTerms, currentConversationId ) => { let score = 0; // Base score for message from current conversation (higher priority) if (message.conversation_id === currentConversationId) { score += 0.5; // 50% bonus for current conversation } // Recency score (newer messages get higher scores) // Convert timestamp to Date and calculate days ago try { const messageDate = new Date(message.timestamp); const now = new Date(); const daysAgo = (now - messageDate) / (1000 * 60 * 60 * 24); // Recency score: newer messages get higher scores, decay over time // Maximum 0.3 points for very recent (same day), decay exponentially const recencyScore = Math.max(0, 0.3 * Math.exp(-daysAgo / 7)); // 7-day half-life score += recencyScore; } catch (dateError) { this.logger.debug("Error parsing message timestamp for scoring", { messageId: message.message_id, timestamp: message.timestamp, error: dateError.message, }); // Continue without recency score if timestamp parsing fails } // Relevance score based on query terms found in message content if (queryTerms && queryTerms.length > 0 && message.content) { const contentLower = message.content.toLowerCase(); let matchCount = 0; for (const term of queryTerms) { if (contentLower.includes(term.toLowerCase())) { matchCount++; } } // Relevance score: up to 0.2 points based on term matches const relevanceScore = Math.min( 0.2, (matchCount / queryTerms.length) * 0.2 ); score += relevanceScore; } // Ensure score is between 0 and 1 return Math.min(1, Math.max(0, score)); }; // Process each matched message into a candidate snippet for (const message of matchedMessages) { try { // Calculate initial score for this message const initialScore = calculateScoreForMessage( message, searchTerms, conversationId ); // Create candidate snippet object const candidateSnippet = { sourceType: "conversation_message", id: message.message_id, contentSnippet: message.content, // Full message content as snippet metadata: { role: message.role, timestamp: message.timestamp, // ISO string conversationId: message.conversation_id, }, initialScore: initialScore, }; candidateSnippets.push(candidateSnippet); this.logger.debug("Added conversation message candidate snippet", { messageId: message.message_id, role: message.role, conversationId: message.conversation_id, sourceType: "conversation_message", initialScore: initialScore, contentSnippetLength: message.content?.length || 0, }); } catch (snippetError) { this.logger.error( "Error constructing conversation message snippet", { error: snippetError.message, stack: snippetError.stack, messageId: message.message_id, conversationId: conversationId, } ); // Skip this message on error } } // Filter candidate snippets to only conversation messages for logging const conversationMessageSnippets = candidateSnippets.filter( (snippet) => snippet.sourceType === "conversation_message" ); this.logger.info( "Conversation message snippets construction completed", { conversationId: conversationId, messagesProcessed: matchedMessages.length, conversationMessageSnippets: conversationMessageSnippets.length, } ); } catch (error) { this.logger.error( "Error during conversation message snippets construction", { error: error.message, stack: error.stack, conversationId: conversationId, matchedMessagesCount: matchedMessages.length, } ); // Continue with whatever snippets were successfully constructed } // Step 13: Construct candidate snippets from matched conversation topics try { this.logger.debug("Starting to construct conversation topic snippets", { conversationId: conversationId, matchedTopicsCount: matchedTopics.length, }); // Helper function to calculate score for conversation topics const calculateScoreForTopic = (topic, queryTerms) => { let score = 0; // Relevance score based on query terms found in topic summary if (queryTerms && queryTerms.length > 0 && topic.summary) { const summaryLower = topic.summary.toLowerCase(); let summaryMatchCount = 0; for (const term of queryTerms) { if (summaryLower.includes(term.toLowerCase())) { summaryMatchCount++; } } // Summary matches get higher weight (up to 0.6 points) const summaryScore = Math.min( 0.6, (summaryMatchCount / queryTerms.length) * 0.6 ); score += summaryScore; } // Additional relevance score based on query terms found in keywords if (queryTerms && queryTerms.length > 0 && topic.keywords) { try { const parsedKeywords = JSON.parse(topic.keywords || "[]"); if (Array.isArray(parsedKeywords)) { let keywordMatchCount = 0; for (const keyword of parsedKeywords) { const keywordLower = keyword.toLowerCase(); for (const term of queryTerms) { if (keywordLower.includes(term.toLowerCase())) { keywordMatchCount++; break; // Count each keyword match only once per term } } } // Keyword matches get moderate weight (up to 0.4 points) const keywordScore = Math.min( 0.4, (keywordMatchCount / Math.max(parsedKeywords.length, 1)) * 0.4 ); score += keywordScore; } } catch (keywordParseError) { this.logger.debug("Error parsing topic keywords for scoring", { topicId: topic.topic_id, keywords: topic.keywords, error: keywordParseError.message, }); // Continue without keyword scoring if parsing fails } } // Ensure score is between 0 and 1 return Math.min(1, Math.max(0, score)); }; // Process each matched topic into a candidate snippet for (const topic of matchedTopics) { try { // Parse keywords safely let parsedKeywords = []; try { parsedKeywords = JSON.parse(topic.keywords || "[]"); if (!Array.isArray(parsedKeywords)) { parsedKeywords = []; } } catch (keywordParseError) { this.logger.debug("Error parsing topic keywords", { topicId: topic.topic_id, keywords: topic.keywords, error: keywordParseError.message, }); parsedKeywords = []; } // Calculate initial score for this topic const initialScore = calculateScoreForTopic(topic, searchTerms); // Create candidate snippet object const candidateSnippet = { sourceType: "conversation_topic", id: topic.topic_id, contentSnippet: topic.summary, // Topic summary as the snippet metadata: { purposeTag: topic.purpose_tag, keywords: parsedKeywords, // Parse keywords JSON string from DB }, initialScore: initialScore, }; candidateSnippets.push(candidateSnippet); this.logger.debug("Added conversation topic candidate snippet", { topicId: topic.topic_id, purposeTag: topic.purpose_tag, sourceType: "conversation_topic", initialScore: initialScore, contentSnippetLength: topic.summary?.length || 0, keywordsCount: parsedKeywords.length, }); } catch (snippetError) { this.logger.error("Error constructing conversation topic snippet", { error: snippetError.message, stack: snippetError.stack, topicId: topic.topic_id, conversationId: conversationId, }); // Skip this topic on error } } // Filter candidate snippets to only conversation topics for logging const conversationTopicSnippets = candidateSnippets.filter( (snippet) => snippet.sourceType === "conversation_topic" ); this.logger.info("Conversation topic snippets construction completed", { conversationId: conversationId, topicsProcessed: matchedTopics.length, conversationTopicSnippets: conversationTopicSnippets.length, }); } catch (error) { this.logger.error( "Error during conversation topic snippets construction", { error: error.message, stack: error.stack, conversationId: conversationId, matchedTopicsCount: matchedTopics.length, } ); // Continue with whatever snippets were successfully constructed } // Step 14: Search for relevant Git commits let matchedGitCommits = []; try { this.logger.debug("Starting Git commit search", { conversationId: conversationId, searchTermsCount: searchTerms.length, searchTerms: searchTerms, limit: MAX_GIT_COMMIT_CANDIDATES, }); // Use the Git history heuristic to determine if Git search should be performed const isGitRelevantQuery = this._isGitHistoryQuery(query, searchTerms); this.logger.debug("Git history relevance check completed", { query: query, isGitRelevantQuery: isGitRelevantQuery, searchTermsCount: searchTerms.length, }); // Perform Git commit search if we have search terms and the query suggests Git relevance // For now, we'll search if either the heuristic suggests it OR if we have valid search terms // This allows for both targeted Git searches and general inclusion of Git context if (searchTerms && searchTerms.length > 0) { if (isGitRelevantQuery) { this.logger.debug( "Query identified as Git-relevant, performing Git commit search", { searchTermsCount: searchTerms.length, limit: MAX_GIT_COMMIT_CANDIDATES, } ); } else { this.logger.debug( "Query not specifically Git-relevant, but performing Git commit search with search terms", { searchTermsCount: searchTerms.length, limit: MAX_GIT_COMMIT_CANDIDATES, } ); } matchedGitCommits = await dbQueries.searchGitCommitsByTerms( this.dbClient, searchTerms, MAX_GIT_COMMIT_CANDIDATES ); this.logger.debug("Git commit search completed", { conversationId: conversationId, commitsFound: matchedGitCommits.length, searchTermsCount: searchTerms.length, limit: MAX_GIT_COMMIT_CANDIDATES, isGitRelevantQuery: isGitRelevantQuery, }); // Task 241: INFO level logging for stage completion this.logger.info( `Retrieval: Git commits search complete, ${matchedGitCommits.length} candidates.`, { conversationId: conversationId, stage: "git_commits", candidatesFound: matchedGitCommits.length, } ); } else { this.logger.debug("No search terms available for Git commit search", { conversationId: conversationId, isGitRelevantQuery: isGitRelevantQuery, }); } } catch (error) { this.logger.error("Error searching Git commits", { error: error.message, stack: error.stack, conversationId: conversationId, searchTermsCount: searchTerms?.length || 0, query: query, }); // Continue with empty matched commits array matchedGitCommits = []; } // Step 14.5: Construct candidate snippets from matched Git commits try { this.logger.debug( "Starting to construct Git commit candidate snippets", { conversationId: conversationId, matchedGitCommitsCount: matchedGitCommits.length, } ); // Helper function to calculate score for Git commits const calculateScoreForGitCommit = (commit, queryTerms) => { let score = 0; // Base relevance score based on query terms found in commit message if (queryTerms && queryTerms.length > 0 && commit.message) { const messageLower = commit.message.toLowerCase(); let matchCount = 0; for (const term of queryTerms) { if (messageLower.includes(term.toLowerCase())) { matchCount++; } } // Message matches get moderate weight (up to 0.5 points) const messageScore = Math.min( 0.5, (matchCount / queryTerms.length) * 0.5 ); score += messageScore; } // Author relevance score based on query terms found in author name if (queryTerms && queryTerms.length > 0 && commit.author_name) { const authorLower = commit.author_name.toLowerCase(); let authorMatchCount = 0; for (const term of queryTerms) { if (authorLower.includes(term.toLowerCase())) { authorMatchCount++; } } // Author matches get lower weight (up to 0.2 points) const authorScore = Math.min( 0.2, (authorMatchCount / queryTerms.length) * 0.2 ); score += authorScore; } // Recency score (newer commits get higher scores) try { const commitDate = new Date(commit.commit_date); const now = new Date(); const daysAgo = (now - commitDate) / (1000 * 60 * 60 * 24); // Recency score: newer commits get higher scores, decay over time // Maximum 0.3 points for very recent (same day), decay exponentially const recencyScore = Math.max(0, 0.3 * Math.exp(-daysAgo / 30)); // 30-day half-life score += recencyScore; } catch (dateError) { this.logger.debug("Error parsing commit date for scoring", { commitHash: commit.commit_hash, commitDate: commit.commit_date, error: dateError.message, }); // Continue without recency score if date parsing fails } // Ensure score is between 0 and 1 return Math.min(1, Math.max(0, score)); }; // Process each matched Git commit into a candidate snippet for (const commit of matchedGitCommits) { try { // Calculate initial score for this commit const initialScore = calculateScoreForGitCommit( commit, searchTerms ); // Create candidate snippet object const candidateSnippet = { sourceType: "git_commit", id: commit.commit_hash, contentSnippet: commit.message, // Commit message as the snippet metadata: { commitHash: commit.commit_hash, authorName: commit.author_name, commitDate: commit.commit_date, // ISO string or Date object // Note: List of changed files could be added here if available from the query }, initialScore: initialScore, }; candidateSnippets.push(candidateSnippet); this.logger.debug("Added Git commit candidate snippet", { commitHash: commit.commit_hash, authorName: commit.author_name, sourceType: "git_commit", initialScore: initialScore, contentSnippetLength: commit.message?.length || 0, }); } catch (snippetError) { this.logger.error("Error constructing Git commit snippet", { error: snippetError.message, stack: snippetError.stack, commitHash: commit.commit_hash, conversationId: conversationId, }); // Skip this commit on error } } // Filter candidate snippets to only Git commits for logging const gitCommitSnippets = candidateSnippets.filter( (snippet) => snippet.sourceType === "git_commit" ); this.logger.info("Git commit snippets construction completed", { conversationId: conversationId, commitsProcessed: matchedGitCommits.length, gitCommitSnippets: gitCommitSnippets.length, }); } catch (error) { this.logger.error("Error during Git commit snippets construction", { error: error.message, stack: error.stack, conversationId: conversationId, matchedGitCommitsCount: matchedGitCommits.length, }); // Continue with whatever snippets were successfully constructed } // Step 15: Search for relevant Git commit file changes let matchedCommitFiles = []; try { this.logger.debug("Starting Git commit file change search", { conversationId: conversationId, searchTermsCount: searchTerms.length, searchTerms: searchTerms, limit: MAX_GIT_FILE_CHANGE_CANDIDATES, }); // Use the same Git history heuristic as for commits const isGitRelevantQuery = this._isGitHistoryQuery(query, searchTerms); this.logger.debug( "Git history relevance check for file changes completed", { query: query, isGitRelevantQuery: isGitRelevantQuery, searchTermsCount: searchTerms.length, } ); // Extract terms that are likely file paths or use all search terms // File path terms are those containing "/" or ending with common file extensions let pathSearchTerms = []; if (searchTerms && searchTerms.length > 0) { // Filter for path-like terms (contains "/" or ends with file extensions) const fileExtensions = [ ".js", ".ts", ".jsx", ".tsx", ".py", ".java", ".cpp", ".c", ".h", ".cs", ".php", ".rb", ".go", ".rs", ".swift", ".kt", ".scala", ".html", ".css", ".scss", ".sass", ".json", ".xml", ".yaml", ".yml", ".md", ".txt", ".sql", ]; pathSearchTerms = searchTerms.filter((term) => { // Check if term contains forward slash (path separator) if (term.includes("/")) { return true; } // Check if term ends with common file extension return fileExtensions.some((ext) => term.toLowerCase().endsWith(ext) ); }); // If no path-like terms found, use all search terms if (pathSearchTerms.length === 0) { pathSearchTerms = searchTerms; this.logger.debug( "No path-like terms found, using all search terms for file change search", { searchTermsCount: searchTerms.length, } ); } else { this.logger.debug("Found path-like terms for file change search", { pathSearchTerms: pathSearchTerms, pathTermsCount: pathSearchTerms.length, totalSearchTerms: searchTerms.length, }); } // Perform Git commit file change search if (isGitRelevantQuery) { this.logger.debug( "Query identified as Git-relevant, performing Git commit file change search", { pathSearchTermsCount: pathSearchTerms.length, limit: MAX_GIT_FILE_CHANGE_CANDIDATES, } ); } else { this.logger.debug( "Query not specifically Git-relevant, but performing Git commit file change search with path terms", { pathSearchTermsCount: pathSearchTerms.length, limit: MAX_GIT_FILE_CHANGE_CANDIDATES, } ); } matchedCommitFiles = await dbQueries.searchGitCommitFilesByTerms( this.dbClient, pathSearchTerms, MAX_GIT_FILE_CHANGE_CANDIDATES ); this.logger.debug("Git commit file change search completed", { conversationId: conversationId, fileChangesFound: matchedCommitFiles.length, searchTermsCount: searchTerms.length, pathSearchTermsCount: pathSearchTerms.length, limit: MAX_GIT_FILE_CHANGE_CANDIDATES, isGitRelevantQuery: isGitRelevantQuery, }); // Task 241: INFO level logging for stage completion this.logger.info( `Retrieval: Git commit files search complete, ${matchedCommitFiles.length} candidates.`, { conversationId: conversationId, stage: "git_commit_files", candidatesFound: matchedCommitFiles.length, } ); } else { this.logger.debug( "No search terms available for Git commit file change search", { conversationId: conversationId, isGitRelevantQuery: isGitRelevantQuery, } ); } } catch (error) { this.logger.error("Error searching Git commit file changes", { error: error.message, stack: error.stack, conversationId: conversationId, searchTermsCount: searchTerms?.length || 0, query: query, }); // Continue with empty matched commit files array matchedCommitFiles = []; } // Step 15.5: Construct candidate snippets from matched Git commit file changes try { this.logger.debug( "Starting to construct Git commit file change candidate snippets", { conversationId: conversationId, matchedCommitFilesCount: matchedCommitFiles.length, } ); // Helper function to calculate score for Git commit file changes const calculateScoreForFileChange = (change, queryTerms) => { let score = 0; // File path relevance score based on query terms found in file_path if (queryTerms && queryTerms.length > 0 && change.file_path) { const filePathLower = change.file_path.toLowerCase(); let pathMatchCount = 0; for (const term of queryTerms) { if (filePathLower.includes(term.toLowerCase())) { pathMatchCount++; } } // File path matches get high weight (up to 0.6 points) const pathScore = Math.min( 0.6, (pathMatchCount / queryTerms.length) * 0.6 ); score += pathScore; } // Commit message relevance score based on query terms found in commit message if (queryTerms && queryTerms.length > 0 && change.commit_message) { const messageLower = change.commit_message.toLowerCase(); let messageMatchCount = 0; for (const term of queryTerms) { if (messageLower.includes(term.toLowerCase())) { messageMatchCount++; } } // Message matches get moderate weight (up to 0.3 points) const messageScore = Math.min( 0.3, (messageMatchCount / queryTerms.length) * 0.3 ); score += messageScore; } // Change status bonus (some statuses might be more relevant) if (change.status) { const statusLower = change.status.toLowerCase(); if (statusLower === "modified" || statusLower === "added") { score += 0.05; // Small bonus for modified/added files } else if (statusLower === "deleted") { score += 0.02; // Smaller bonus for deleted files } } // Recency score based on commit date (newer commits get higher scores) try { const commitDate = new Date(change.commit_date); const now = new Date(); const daysAgo = (now - commitDate) / (1000 * 60 * 60 * 24); // Recency score: newer commits get higher scores, decay over time // Maximum 0.2 points for very recent (same day), decay exponentially const recencyScore = Math.max(0, 0.2 * Math.exp(-daysAgo / 30)); // 30-day half-life score += recencyScore; } catch (dateError) { this.logger.debug( "Error parsing commit date for file change scoring", { commitHash: change.commit_hash, filePath: change.file_path, commitDate: change.commit_date, error: dateError.message, } ); // Continue without recency score if date parsing fails } // Ensure score is between 0 and 1 return Math.min(1, Math.max(0, score)); }; // Process each matched Git commit file change into a candidate snippet for (const change of matchedCommitFiles) { try { // Calculate initial score for this file change const initialScore = calculateScoreForFileChange( change, searchTerms ); // Create a unique ID for this file change (composite of commit hash and file path) const uniqueId = `${change.commit_hash}_${change.file_path}`; // Create informative content snippet const truncatedMessage = change.commit_message && change.commit_message.length > 100 ? change.commit_message.substring(0, 100) + "..." : change.commit_message || "No commit message"; const contentSnippet = `File '${change.file_path}' was ${change.status}. Commit: ${truncatedMessage}`; // Create candidate snippet object const candidateSnippet = { sourceType: "git_commit_file_change", id: uniqueId, contentSnippet: contentSnippet, metadata: { filePath: change.file_path, status: change.status, commitHash: change.commit_hash, commitMessage: change.commit_message, commitAuthor: change.commit_author, commitDate: change.commit_date, }, initialScore: initialScore, }; candidateSnippets.push(candidateSnippet); this.logger.debug( "Added Git commit file change candidate snippet", { filePath: change.file_path, status: change.status, commitHash: change.commit_hash, sourceType: "git_commit_file_change", initialScore: initialScore, contentSnippetLength: contentSnippet.length, } ); } catch (snippetError) { this.logger.error( "Error constructing Git commit file change snippet", { error: snippetError.message, stack: snippetError.stack, commitHash: change.commit_hash, filePath: change.file_path, conversationId: conversationId, } ); // Skip this file change on error } } // Filter candidate snippets to only Git commit file changes for logging const gitCommitFileChangeSnippets = candidateSnippets.filter( (snippet) => snippet.sourceType === "git_commit_file_change" ); this.logger.info( "Git commit file change snippets construction completed", { conversationId: conversationId, commitFilesProcessed: matchedCommitFiles.length, gitCommitFileChangeSnippets: gitCommitFileChangeSnippets.length, } ); } catch (error) { this.logger.error( "Error during Git commit file change snippets construction", { error: error.message, stack: error.stack, conversationId: conversationId, matchedCommitFilesCount: matchedCommitFiles.length, } ); // Continue with whatever snippets were successfully constructed } // Step 15a: Identify seed entities for relationship expansion (Task 235) const seedEntities = []; try { this.logger.debug( "Starting seed entity identification for relationship expansion", { conversationId: conversationId, totalCandidateSnippets: candidateSnippets.length, } ); // Import configuration for maximum seed entities const config = (await import("../config.js")).default; const maxSeedEntities = config.MAX_SEED_ENTITIES_FOR_EXPANSION; // Filter candidate snippets to get only code_entity types const codeEntitySnippets = candidateSnippets.filter( (snippet) => snippet.sourceType === "code_entity_fts" || snippet.sourceType === "code_entity_keyword" ); this.logger.debug("Filtered candidate snippets to code entities", { totalCandidateSnippets: candidateSnippets.length, codeEntitySnippets: codeEntitySnippets.length, }); if (codeEntitySnippets.length === 0) { this.logger.debug( "No code entity snippets found for seed entity identification", { conversationId: conversationId, } ); } else { // Sort code entity snippets by their initialScore in descending order const sortedCodeEntitySnippets = [...codeEntitySnippets].sort( (a, b) => (b.initialScore || 0) - (a.initialScore || 0) ); // Select the top N snippets as seed entities const selectedSeedSnippets = sortedCodeEntitySnippets.slice( 0, maxSeedEntities ); // Extract seed entity information for (const snippet of selectedSeedSnippets) { const seedEntity = { id: snippet.id, seedEntityScore: snippet.initialScore || 0, entityName: snippet.entityName, sourceType: snippet.sourceType, filePath: snippet.filePath, entityType: snippet.entityType, }; seedEntities.push(seedEntity); this.logger.debug( "Selected seed entity for relationship expansion", { entityId: seedEntity.id, entityName: seedEntity.entityName, seedEntityScore: seedEntity.seedEntityScore, sourceType: seedEntity.sourceType, filePath: seedEntity.filePath, } ); } this.logger.info("Seed entity identification completed", { conversationId: conversationId, totalCodeEntitySnippets: codeEntitySnippets.length, maxSeedEntities: maxSeedEntities, seedEntitiesIdentified: seedEntities.length, seedEntityIds: seedEntities.map((se) => se.id), }); } } catch (error) { this.logger.error("Error during seed entity identification", { error: error.message, stack: error.stack, conversationId: conversationId, }); // Continue without seed entities if there's an error } // Step 15b: Perform relationship expansion for seed entities (Task 236) and merge results (Task 237) let relationshipDerivedSnippets = []; // Collect all relationship-derived snippets try { this.logger.debug("Starting relationship expansion for seed entities", { conversationId: conversationId, seedEntitiesCount: seedEntities.length, hasRelationshipManager: !!this.relationshipManager, }); if (seedEntities.length > 0 && this.relationshipManager) { // Process each seed entity for relationship expansion for (let i = 0; i < seedEntities.length; i++) { const seedEntity = seedEntities[i]; try { this.logger.debug( `Processing seed entity ${i + 1}/${ seedEntities.length } for relationship expansion`, { seedEntityId: seedEntity.id, seedEntityName: seedEntity.entityName, seedEntityScore: seedEntity.initialScore, conversationId: conversationId, } ); // Call RelationshipManager to get related entities const relatedSnippets = await this.relationshipManager.getRelatedEntities( seedEntity.id, searchTerms, // Pass the search terms for query relevance scoring seedEntity.initialScore // Pass the seed entity's score for scoring calculations ); this.logger.debug( "Relationship expansion completed for seed entity", { seedEntityId: seedEntity.id, seedEntityName: seedEntity.entityName, relatedSnippetsFound: relatedSnippets.length, conversationId: conversationId, } ); // Collect relationship-derived snippets for later merging if (relatedSnippets.length > 0) { relationshipDerivedSnippets.push(...relatedSnippets); this.logger.debug( "Collected relationship-derived snippets for merging", { seedEntityId: seedEntity.id, addedSnippetsCount: relatedSnippets.length, totalRelationshipSnippetsCollected: relationshipDerivedSnippets.length, conversationId: conversationId, } ); } } catch (seedEntityError) { this.logger.error( "Error processing seed entity for relationship expansion", { error: seedEntityError.message, stack: seedEntityError.stack, seedEntityId: seedEntity.id, seedEntityName: seedEntity.entityName, conversationId: conversationId, } ); // Continue with the next seed entity } } this.logger.info( "Relationship expansion completed for all seed entities", { conversationId: conversationId, seedEntitiesProcessed: seedEntities.length, relationshipDerivedSnippetsCollected: relationshipDerivedSnippets.length, relationshipExpansionComplete: true, } ); } else { this.logger.debug("Skipping relationship expansion", { conversationId: conversationId, reason: seedEntities.length === 0 ? "No seed entities identified" : "RelationshipManager not available", seedEntitiesCount: seedEntities.length, hasRelationshipManager: !!this.relationshipManager, }); } } catch (error) { this.logger.error("Error during relationship expansion", { error: error.message, stack: error.stack, conversationId: conversationId, seedEntitiesCount: seedEntities.length, }); // Continue with empty relationship-derived snippets relationshipDerivedSnippets = []; } // Task 237: Merge relationship-derived snippets into main candidate list with duplicate handling try { this.logger.debug("Starting merge of relationship-derived snippets", { conversationId: conversationId, existingCandidateSnippets: candidateSnippets.length, relationshipDerivedSnippets: relationshipDerivedSnippets.length, }); if (relationshipDerivedSnippets.length > 0) { // Use Map to handle duplicates based on entity ID for code entities const candidateSnippetsMap = new Map(); // First, add all existing candidate snippets to the map for (const snippet of candidateSnippets) { let mapKey; // For code entities, use entity ID as the key to detect duplicates if ( (snippet.sourceType === "code_entity_fts" || snippet.sourceType === "code_entity_keyword") && snippet.id ) { mapKey = `entity_${snippet.id}`; } else { // For other types (documents, conversations, git), use their specific IDs mapKey = `${snippet.sourceType}_${snippet.id}`; } candidateSnippetsMap.set(mapKey, snippet); } this.logger.debug("Added existing candidate snippets to merge map", { conversationId: conversationId, candidateSnippetsInMap: candidateSnippetsMap.size, }); // Process relationship-derived snippets and handle duplicates let mergedCount = 0; let duplicatesHandled = 0; for (const relationshipSnippet of relationshipDerivedSnippets) { // For relationship-derived snippets (which should be code entities), // use entity ID as the key const mapKey = `entity_${relationshipSnippet.id}`; if (candidateSnippetsMap.has(mapKey)) { // Handle duplicate - compare scores and keep the better one const existingSnippet = candidateSnippetsMap.get(mapKey); const existingScore = existingSnippet.initialScore || 0; const relationshipScore = relationshipSnippet.initialScore || 0; if (relationshipScore > existingScore) { // Relationship snippet has better score, replace the existing one // But preserve any existing relationship context if both exist if ( existingSnippet.relationshipContext && relationshipSnippet.relationshipContext ) { // Keep the existing relationship context and log the conflict this.logger.debug( "Multiple relationship contexts found for entity, keeping existing", { entityId: relationshipSnippet.id, existingRelationshipType: existingSnippet.relationshipContext.relationshipType, newRelationshipType: relationshipSnippet.relationshipContext .relationshipType, conversationId: conversationId, } ); } candidateSnippetsMap.set(mapKey, relationshipSnippet); duplicatesHandled++; this.logger.debug( "Replaced existing snippet with higher-scoring relationship snippet", { entityId: relationshipSnippet.id, existingScore: existingScore, relationshipScore: relationshipScore, conversationId: conversationId, } ); } else { // Existing snippet has better score, but add relationship context if missing if ( !existingSnippet.relationshipContext && relationshipSnippet.relationshipContext ) { existingSnippet.relationshipContext = relationshipSnippet.relationshipContext; this.logger.debug( "Added relationship context to existing higher-scoring snippet", { entityId: relationshipSnippet.id, existingScore: existingScore, relationshipScore: relationshipScore, relationshipType: relationshipSnippet.relationshipContext .relationshipType, conversationId: conversationId, } ); } duplicatesHandled++; } } else { // No duplicate, add the relationship-derived snippet candidateSnippetsMap.set(mapKey, relationshipSnippet); mergedCount++; } } // Convert the map back to an array candidateSnippets = Array.from(candidateSnippetsMap.values()); this.logger.info( "Relationship-derived snippets merged successfully", { conversationId: conversationId, relationshipSnippetsProcessed: relationshipDerivedSnippets.length, newSnippetsMerged: mergedCount, duplicatesHandled: duplicatesHandled, finalCandidateSnippetsCount: candidateSnippets.length, } ); // Task 241: INFO level logging for all sources merged stage completion this.logger.info( `Retrieval: Merged all sources, ${candidateSnippets.length} total candidates.`, { conversationId: conversationId, stage: "merged_all_sources", totalCandidates: candidateSnippets.length, sourceBreakdown: { code_entity_fts: candidateSnippets.filter( (s) => s.sourceType === "code_entity_fts" ).length, code_entity_keyword: candidateSnippets.filter( (s) => s.sourceType === "code_entity_keyword" ).length, project_document_fts: candidateSnippets.filter( (s) => s.sourceType === "project_document_fts" ).length, project_document_keyword: candidateSnippets.filter( (s) => s.sourceType === "project_document_keyword" ).length, conversation_message: candidateSnippets.filter( (s) => s.sourceType === "conversation_message" ).length, conversation_topic: candidateSnippets.filter( (s) => s.sourceType === "conversation_topic" ).length, git_commit: candidateSnippets.filter( (s) => s.sourceType === "git_commit" ).length, git_commit_file_change: candidateSnippets.filter( (s) => s.sourceType === "git_commit_file_change" ).length, code_entity_related: candidateSnippets.filter( (s) => s.sourceType === "code_entity_related" ).length, }, } ); } else { this.logger.debug("No relationship-derived snippets to merge", { conversationId: conversationId, existingCandidateSnippets: candidateSnippets.length, }); } } catch (mergeError) { this.logger.error("Error during relationship snippets merging", { error: mergeError.message, stack: mergeError.stack, conversationId: conversationId, relationshipDerivedSnippetsCount: relationshipDerivedSnippets.length, existingCandidateSnippetsCount: candidateSnippets.length, }); // Fallback: simple concatenation if merging fails if (relationshipDerivedSnippets.length > 0) { candidateSnippets.push(...relationshipDerivedSnippets); this.logger.debug( "Applied fallback merge strategy (simple concatenation)", { conversationId: conversationId, finalCandidateSnippetsCount: candidateSnippets.length, } ); } } // Step 16: Calculate consolidated scores for all candidate snippets // Apply multi-factor ranking using source type weights, AI status weights, and recency factors try { // Task 213: Log number of candidate snippets before ranking this.logger.debug("Number of candidate snippets before ranking", { conversationId: conversationId, candidateSnippetsBeforeRanking: candidateSnippets.length, }); this.logger.debug( "Starting multi-factor score calculation for candidate snippets", { conversationId: conversationId, totalCandidateSnippets: candidateSnippets.length, } ); // Import ranking factor weights from configuration const { RANKING_FACTOR_WEIGHTS } = await import("../config.js"); // Helper function to calculate recency boost for time-sensitive snippets const calculateRecencyBoost = (timestampString) => { if (!timestampString) { return 0; // No recency boost if no timestamp } try { const itemDate = new Date(timestampString); const now = new Date(); const ageInMillis = now.getTime() - itemDate.getTime(); const ageInHours = ageInMillis / (1000 * 60 * 60); const { maxBoost, decayRateHours, minAgeForDecay, maxAgeForBoost } = RANKING_FACTOR_WEIGHTS.recency; // No boost if item is too old if (ageInHours > maxAgeForBoost) { return 0; } // No decay if item is very recent if (ageInHours <= minAgeForDecay) { return maxBoost; } // Exponential decay based on age const decayFactor = Math.exp(-ageInHours / decayRateHours); return maxBoost * decayFactor; } catch (dateError) { this.logger.debug( "Error parsing timestamp for recency calculation", { timestamp: timestampString, error: dateError.message, } ); return 0; // No boost if timestamp is invalid } }; // Process each candidate snippet to calculate consolidated score let scoreCalculationCount = 0; let scoreCalculationErrors = 0; let relationshipSnippetsProcessed = 0; // Task 238: Track relationship-derived snippets for (const snippet of candidateSnippets) { try { // Start with normalized initial score let consolidatedScore = snippet.initialScore || 0; // Apply source type weight const sourceTypeWeight = RANKING_FACTOR_WEIGHTS.sourceType[snippet.sourceType] || 1.0; consolidatedScore *= sourceTypeWeight; // Apply AI status weight if applicable if (snippet.aiStatus) { const aiStatusWeight = RANKING_FACTOR_WEIGHTS.aiStatus[snippet.aiStatus] || 1.0; consolidatedScore *= aiStatusWeight; } // Task 238: Special handling for relationship-derived snippets let relationshipBoost = 0; let relationshipTypeWeight = 1.0; if (snippet.relationshipContext) { relationshipSnippetsProcessed++; // Apply relationship type-specific weight if available const relationshipType = snippet.relationshipContext.relationshipType; relationshipTypeWeight = RANKING_FACTOR_WEIGHTS.relationshipType[relationshipType] || 1.0; // Apply relationship type weight as a multiplier consolidatedScore *= relationshipTypeWeight; // Add relationship context boost for being derived from a relevant relationship // This provides an additional boost beyond just the source type weight relationshipBoost = 0.1; // Base relationship context boost // Additional boost for high-priority relationship types const highPriorityTypes = [ "CALLS_FUNCTION", "CALLS_METHOD", "IMPLEMENTS_INTERFACE", "EXTENDS_CLASS", ]; if (highPriorityTypes.includes(relationshipType)) { relationshipBoost += 0.05; // Extra boost for high-priority relationships } consolidatedScore += relationshipBoost; this.logger.debug("Applied relationship context scoring", { snippetId: snippet.id, relationshipType: relationshipType, relationshipDirection: snippet.relationshipContext.direction, relationshipTypeWeight: relationshipTypeWeight, relationshipBoost: relationshipBoost, relatedToSeedEntityId: snippet.relationshipContext.relatedToSeedEntityId, }); } // Apply recency boost for time-sensitive snippets let recencyBoost = 0; if ( snippet.timestamp || snippet.metadata?.timestamp || snippet.metadata?.commitDate ) { const timestampToUse = snippet.timestamp || snippet.metadata?.timestamp || snippet.metadata?.commitDate; recencyBoost = calculateRecencyBoost(timestampToUse); consolidatedScore += recencyBoost; } // Ensure consolidated score stays within reasonable bounds (0-2.0 max to account for boosts) consolidatedScore = Math.min(2.0, Math.max(0.0, consolidatedScore)); // Store the consolidated score on the snippet snippet.consolidatedScore = consolidatedScore; scoreCalculationCount++; // Log detailed calculation for a sample of snippets for debugging/tuning if (scoreCalculationCount <= 5) { this.logger.debug("Detailed score calculation sample", { snippetId: snippet.id, sourceType: snippet.sourceType, initialScore: snippet.initialScore, sourceTypeWeight: sourceTypeWeight, aiStatus: snippet.aiStatus, aiStatusWeight: snippet.aiStatus ? RANKING_FACTOR_WEIGHTS.aiStatus[snippet.aiStatus] : "N/A", relationshipTypeWeight: relationshipTypeWeight, // Task 238: Log relationship weight relationshipBoost: relationshipBoost, // Task 238: Log relationship boost recencyBoost: recencyBoost, consolidatedScore: consolidatedScore, hasTimestamp: !!( snippet.timestamp || snippet.metadata?.timestamp || snippet.metadata?.commitDate ), hasRelationshipContext: !!snippet.relationshipContext, // Task 238: Log relationship context presence }); // Task 213: Enhanced ranking details in specific format for tuning const aiStatusWeightValue = snippet.aiStatus ? RANKING_FACTOR_WEIGHTS.aiStatus[snippet.aiStatus] || 1.0 : 1.0; // Task 238: Enhanced logging to include relationship context effects if (snippet.relationshipContext) { this.logger.debug( `Ranking snippet ${snippet.id} (type: ${snippet.sourceType}): initial=${snippet.initialScore}, sourceWeight=${sourceTypeWeight}, aiStatusWeight=${aiStatusWeightValue}, relationshipTypeWeight=${relationshipTypeWeight}, relationshipBoost=${relationshipBoost}, recencyBoost=${recencyBoost} => consolidated=${consolidatedScore}` ); } else { this.logger.debug( `Ranking snippet ${snippet.id} (type: ${snippet.sourceType}): initial=${snippet.initialScore}, sourceWeight=${sourceTypeWeight}, aiStatusWeight=${aiStatusWeightValue}, recencyBoost=${recencyBoost} => consolidated=${consolidatedScore}` ); } } } catch (snippetError) { this.logger.error( "Error calculating consolidated score for snippet", { error: snippetError.message, stack: snippetError.stack, snippetId: snippet.id, sourceType: snippet.sourceType, conversationId: conversationId, } ); // Set a fallback score to prevent breaking the entire process snippet.consolidatedScore = snippet.initialScore || 0; scoreCalculationErrors++; } } this.logger.info("Multi-factor score calculation completed", { conversationId: conversationId, snippetsProcessed: scoreCalculationCount, relationshipSnippetsProcessed: relationshipSnippetsProcessed, // Task 238: Log relationship snippet count calculationErrors: scoreCalculationErrors, successRate: scoreCalculationCount > 0 ? Math.round( ((scoreCalculationCount - scoreCalculationErrors) / scoreCalculationCount) * 100 ) : 0, }); // Task 241: INFO level logging for ranking stage completion this.logger.info(`Retrieval: Ranking complete.`, { conversationId: conversationId, stage: "ranking_complete", snippetsRanked: scoreCalculationCount, rankingErrors: scoreCalculationErrors, }); // Calculate score distribution after consolidation for comparison const consolidatedScores = candidateSnippets .map((s) => s.consolidatedScore) .filter((score) => typeof score === "number" && !isNaN(score)); if (consolidatedScores.length > 0) { const sortedScores = [...consolidatedScores].sort((a, b) => a - b); const consolidatedStats = { count: consolidatedScores.length, min: sortedScores[0], max: sortedScores[sortedScores.length - 1], avg: consolidatedScores.reduce((sum, s) => sum + s, 0) / consolidatedScores.length, median: sortedScores[Math.floor(sortedScores.length / 2)], }; this.logger.debug("Consolidated score distribution", { consolidatedStats: consolidatedStats, scoreRange: consolidatedStats.max - consolidatedStats.min, }); } } catch (error) { this.logger.error("Error during multi-factor score calculation", { error: error.message, stack: error.stack, conversationId: conversationId, candidateSnippetsCount: candidateSnippets.length, }); // Set fallback consolidated scores to prevent breaking the process for (const snippet of candidateSnippets) { if (typeof snippet.consolidatedScore !== "number") { snippet.consolidatedScore = snippet.initialScore || 0; } } } // Step 17: Sort candidate snippets by consolidated score (highest first) try { this.logger.debug( "Starting to sort candidate snippets by consolidated score", { conversationId: conversationId, totalCandidateSnippets: candidateSnippets.length, } ); // Sort the candidateSnippets array in descending order by consolidatedScore candidateSnippets.sort( (a, b) => b.consolidatedScore - a.consolidatedScore ); this.logger.info("Candidate snippets sorted by consolidated score", { conversationId: conversationId, totalRankedSnippets: candidateSnippets.length, topSnippetScore: candidateSnippets.length > 0 ? candidateSnippets[0].consolidatedScore : null, bottomSnippetScore: candidateSnippets.length > 0 ? candidateSnippets[candidateSnippets.length - 1] .consolidatedScore : null, }); // Log top few snippets for debugging/tuning if (candidateSnippets.length > 0) { const topSnippetsCount = Math.min(5, candidateSnippets.length); const topSnippetsSample = candidateSnippets .slice(0, topSnippetsCount) .map((snippet, index) => ({ rank: index + 1, id: snippet.id, sourceType: snippet.sourceType, consolidatedScore: snippet.consolidatedScore, initialScore: snippet.initialScore, })); this.logger.debug("Top ranked snippets after sorting", { conversationId: conversationId, topSnippetsSample: topSnippetsSample, }); } } catch (error) { this.logger.error("Error during candidate snippets sorting", { error: error.message, stack: error.stack, conversationId: conversationId, candidateSnippetsCount: candidateSnippets.length, }); // Continue without sorting if there's an error } // Analyze score distribution across all candidate snippets for debugging/tuning this._analyzeScoreDistribution(candidateSnippets); // Log overall candidate snippet statistics including normalization status this.logger.info("Candidate snippet collection and analysis completed", { conversationId: conversationId, totalCandidateSnippets: candidateSnippets.length, candidateSnippetsBySource: { code_entity_fts: candidateSnippets.filter( (s) => s.sourceType === "code_entity_fts" ).length, code_entity_keyword: candidateSnippets.filter( (s) => s.sourceType === "code_entity_keyword" ).length, project_document_fts: candidateSnippets.filter( (s) => s.sourceType === "project_document_fts" ).length, project_document_keyword: candidateSnippets.filter( (s) => s.sourceType === "project_document_keyword" ).length, conversation_message: candidateSnippets.filter( (s) => s.sourceType === "conversation_message" ).length, conversation_topic: candidateSnippets.filter( (s) => s.sourceType === "conversation_topic" ).length, git_commit: candidateSnippets.filter( (s) => s.sourceType === "git_commit" ).length, git_commit_file_change: candidateSnippets.filter( (s) => s.sourceType === "git_commit_file_change" ).length, relationship_derived: candidateSnippets.filter( // New: Track relationship-derived snippets (s) => s.sourceType === "relationship_derived" ).length, }, normalizationAnalysisComplete: true, }); // Task 222: Apply compression to ranked candidate snippets using CompressionService this.logger.info("Starting context compression process", { conversationId: conversationId, rankedCandidateSnippets: candidateSnippets.length, tokenBudget: tokenBudget, }); let compressionResult; try { // Call CompressionService with the ranked snippets and token budget compressionResult = this.compressionService.compressSnippets( candidateSnippets, tokenBudget ); this.logger.info("Context compression completed successfully", { conversationId: conversationId, snippetsFoundBeforeCompression: compressionResult.summaryStats.snippetsFoundBeforeCompression, snippetsReturnedAfterCompression: compressionResult.summaryStats.snippetsReturnedAfterCompression, estimatedTokensIn: compressionResult.summaryStats.estimatedTokensIn, estimatedTokensOut: compressionResult.summaryStats.estimatedTokensOut, tokenBudgetGiven: compressionResult.summaryStats.tokenBudgetGiven, tokenBudgetRemaining: compressionResult.summaryStats.tokenBudgetRemaining, }); // Task 241: INFO level logging for compression stage completion this.logger.info(`Retrieval: Compression complete.`, { conversationId: conversationId, stage: "compression_complete", snippetsBeforeCompression: compressionResult.summaryStats.snippetsFoundBeforeCompression, snippetsAfterCompression: compressionResult.summaryStats.snippetsReturnedAfterCompression, tokenBudgetUsed: compressionResult.summaryStats.tokenBudgetGiven - compressionResult.summaryStats.tokenBudgetRemaining, }); } catch (compressionError) { this.logger.error("Error during context compression", { error: compressionError.message, stack: compressionError.stack, conversationId: conversationId, candidateSnippetsCount: candidateSnippets.length, tokenBudget: tokenBudget, }); // Create fallback result if compression fails compressionResult = { finalSnippets: [], summaryStats: { snippetsFoundBeforeCompression: candidateSnippets.length, snippetsReturnedAfterCompression: 0, estimatedTokensIn: 0, estimatedTokensOut: 0, tokenBudgetGiven: tokenBudget, tokenBudgetRemaining: tokenBudget, error: compressionError.message, }, }; } // For Story 4.1, return the stub response while storing the hits for future use // TODO: In subsequent tasks, these hits will be processed into actual context snippets this.logger.info("getRelevantContext processing completed", { conversationId: conversationId, codeEntityHitsFound: codeEntityHits.length, documentHitsFound: documentHits.length, keywordMatchedEntitiesFound: keywordMatchedEntities.length, codeEntitiesFetched: Object.keys(retrievedCodeEntities).length, projectDocumentsFetched: Object.keys(retrievedProjectDocuments).length, codeEntityCandidateSnippets: candidateSnippets.filter( (s) => s.sourceType === "code_entity_fts" || s.sourceType === "code_entity_keyword" ).length, projectDocumentCandidateSnippets: candidateSnippets.filter( (s) => s.sourceType === "project_document_fts" || s.sourceType === "project_document_keyword" ).length, totalCandidateSnippets: candidateSnippets.length, conversationHistorySnippets: matchedMessages.length, conversationTopicsSnippets: matchedTopics.length, conversationMessageCandidateSnippets: candidateSnippets.filter( (s) => s.sourceType === "conversation_message" ).length, conversationTopicCandidateSnippets: candidateSnippets.filter( (s) => s.sourceType === "conversation_topic" ).length, gitCommitsFound: matchedGitCommits.length, // New: Git commit search results gitCommitFilesFound: matchedCommitFiles.length, // New: Git commit file change search results seedEntitiesIdentified: seedEntities.length, // New: Seed entities for relationship expansion relationshipDerivedSnippets: candidateSnippets.filter( // New: Relationship-derived snippets (s) => s.sourceType === "relationship_derived" ).length, finalContextSnippets: compressionResult.finalSnippets.length, // New: Final compressed snippets // Will be expanded with snippet processing in subsequent tasks }); return { contextSnippets: compressionResult.finalSnippets, retrievalSummary: compressionResult.summaryStats, // processedOk is handled by the handler based on whether this throws }; } catch (error) { this.logger.error("Error in getRelevantContext", { error: error.message, stack: error.stack, query: query, conversationId: conversationId, }); // Re-throw the error to let the handler deal with it throw error; } } /** * Private helper method to tokenize and normalize a query string for FTS and keyword searches * @param {string} queryString - The raw query string from the agent * @returns {string[]} Array of processed search terms * @private */ _getSearchTerms(queryString) { if (!queryString || typeof queryString !== "string") { return []; } // Convert to lowercase const lowerQuery = queryString.toLowerCase(); // Define common English stop words to filter out const stopWords = new Set([ "the", "is", "a", "an", "to", "of", "for", "and", "or", "but", "in", "on", "at", "with", "by", "from", "as", "be", "are", "was", "were", "been", "have", "has", "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "can", "this", "that", "these", "those", "i", "you", "he", "she", "it", "we", "they", "me", "him", "her", "us", "them", "my", "your", "his", "her", "its", "our", "their", ]); // Define significant short terms that should NOT be filtered out const significantShortTerms = new Set([ "go", "js", "ts", "py", "c#", "cs", "cc", "c++", "sql", "xml", "css", "dom", "api", "url", "uri", "id", "ui", "ux", "ai", "ml", "db", "os", "io", "if", "or", "and", "not", ]); // Split by spaces and common punctuation, keeping alphanumeric sequences // This regex splits on whitespace and punctuation but preserves alphanumeric characters, underscores, and hyphens const tokens = lowerQuery .split(/[\s\.,\{\}\[\]:;!@#$%^&*+=<>?/\\|"'`~]+/) .filter((token) => token.length > 0); // Remove empty strings // Filter tokens based on criteria const processedTerms = tokens.filter((token) => { // Keep significant short terms regardless of length if (significantShortTerms.has(token)) { return true; } // Filter out stop words if (stopWords.has(token)) { return false; } // Filter out very short tokens (less than 2 characters) // unless they are significant programming-related terms if (token.length < 2) { return false; } return true; }); return processedTerms; } // =========================================== // GIT HISTORY HEURISTICS // =========================================== /** * Analyzes the input query for terms or patterns that strongly suggest Git history relevance * Used to determine whether to prioritize or gate Git searches in context retrieval * @param {string} queryString - Original query string * @param {Array<string>} searchTerms - Tokenized search terms * @returns {boolean} True if Git-related terms/patterns are found, false otherwise */ _isGitHistoryQuery(queryString, searchTerms) { try { this.logger.debug("Analyzing query for Git history relevance", { queryString: queryString, searchTermsCount: searchTerms?.length || 0, searchTerms: searchTerms, }); // Ensure we have valid inputs if (!queryString || typeof queryString !== "string") { this.logger.debug("Invalid query string provided", { queryString: queryString, type: typeof queryString, }); return false; } if (!searchTerms || !Array.isArray(searchTerms)) { this.logger.debug("Invalid search terms provided", { searchTerms: searchTerms, isArray: Array.isArray(searchTerms), }); return false; } const lowerQuery = queryString.toLowerCase(); // Git-related keywords to check for const gitKeywords = [ "commit", "commits", "history", "change", "changes", "changed", "log", "logs", "author", "authors", "blame", "version", "versions", "branch", "branches", "merge", "merged", "diff", "diffs", "revision", "revisions", "checkout", "pull", "push", "repository", "repo", ]; // Check for Git keywords in the original query let hasGitKeywords = false; for (const keyword of gitKeywords) { if (lowerQuery.includes(keyword)) { hasGitKeywords = true; this.logger.debug("Found Git keyword in query", { keyword: keyword, queryString: queryString, }); break; } } // Check if any search terms look like file paths // File paths typically contain forward slashes or have common file extensions const fileExtensions = [ ".js", ".ts", ".jsx", ".tsx", ".py", ".java", ".cpp", ".c", ".h", ".cs", ".php", ".rb", ".go", ".rs", ".swift", ".kt", ".scala", ".clj", ".ml", ".hs", ".elm", ".dart", ".vue", ".svelte", ".html", ".css", ".scss", ".sass", ".less", ".json", ".xml", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf", ".md", ".txt", ".sql", ]; let hasFilePaths = false; for (const term of searchTerms) { // Check if term contains forward slash (path separator) if (term.includes("/")) { hasFilePaths = true; this.logger.debug("Found file path pattern in search terms", { term: term, pattern: "contains forward slash", }); break; } // Check if term ends with common file extension for (const extension of fileExtensions) { if (term.toLowerCase().endsWith(extension)) { hasFilePaths = true; this.logger.debug("Found file extension pattern in search terms", { term: term, extension: extension, }); break; } } if (hasFilePaths) break; } // Check for patterns resembling commit hashes (7+ hex characters) const commitHashPattern = /\b[a-f0-9]{7,}\b/i; let hasCommitHashes = false; // Check in original query if (commitHashPattern.test(lowerQuery)) { hasCommitHashes = true; this.logger.debug("Found commit hash pattern in query", { queryString: queryString, pattern: "7+ hex characters", }); } // Check in search terms if (!hasCommitHashes) { for (const term of searchTerms) { if (commitHashPattern.test(term)) { hasCommitHashes = true; this.logger.debug("Found commit hash pattern in search terms", { term: term, pattern: "7+ hex characters", }); break; } } } // Determine if query suggests Git history relevance const isGitHistoryRelevant = hasGitKeywords || hasFilePaths || hasCommitHashes; this.logger.debug("Git history relevance analysis completed", { queryString: queryString, searchTermsCount: searchTerms.length, hasGitKeywords: hasGitKeywords, hasFilePaths: hasFilePaths, hasCommitHashes: hasCommitHashes, isGitHistoryRelevant: isGitHistoryRelevant, }); return isGitHistoryRelevant; } catch (error) { this.logger.error("Error analyzing query for Git history relevance", { error: error.message, stack: error.stack, queryString: queryString, searchTermsCount: searchTerms?.length || 0, }); // Return false on error to be safe return false; } } // =========================================== // SCORE NORMALIZATION // =========================================== /** * Analyzes and optionally normalizes initial scores from different sources to ensure comparability * Currently, all scoring functions are designed to return 0-1 range, but this provides * centralized score analysis and potential future normalization if needed * @param {number} score - The initial score to analyze/normalize * @param {string} sourceType - The source type of the snippet (e.g., 'code_entity_fts', 'conversation_message') * @param {Object} scoreProperties - Additional properties about the score calculation (e.g., rank, matchCount) * @returns {number} Normalized score in 0-1 range * @private */ _normalizeScore(score, sourceType, scoreProperties = {}) { try { // Validate input score if (typeof score !== "number" || isNaN(score)) { this.logger.debug("Invalid score provided for normalization", { score: score, sourceType: sourceType, scoreProperties: scoreProperties, }); return 0.0; // Return minimum score for invalid input } // Current analysis: All scoring functions are designed to return 0-1 range // FTS: calculateScoreFromFtsRank uses logarithmic scale, returns 0-1 // Keyword: calculateScoreFromKeywordMatches normalizes to 0-1 // Conversation: calculateScoreForMessage/Topic ensures 0-1 range // Git: calculateScoreForGitCommit/FileChange ensures 0-1 range // Since all sources already normalize to 0-1, we primarily validate and log let normalizedScore = Math.min(1.0, Math.max(0.0, score)); // Log score distribution for analysis and tuning this.logger.debug("Score normalization analysis", { sourceType: sourceType, originalScore: score, normalizedScore: normalizedScore, scoreProperties: scoreProperties, wasNormalizationNeeded: score !== normalizedScore, }); // Future enhancement: If different sources show vastly different effective ranges, // source-specific normalization could be implemented here: /* switch (sourceType) { case 'code_entity_fts': // FTS scores based on rank - currently well normalized break; case 'code_entity_keyword': // Keyword scores based on weight/count - currently well normalized break; case 'conversation_message': // Message scores with recency/relevance - currently well normalized break; case 'git_commit': // Git commit scores with recency/relevance - currently well normalized break; // Add other cases as needed } */ return normalizedScore; } catch (error) { this.logger.error("Error during score normalization", { error: error.message, stack: error.stack, score: score, sourceType: sourceType, scoreProperties: scoreProperties, }); // Return a safe default score on error return 0.0; } } /** * Analyzes score distribution across all candidate snippets for debugging and tuning * @param {Array<Object>} candidateSnippets - Array of candidate snippet objects * @private */ _analyzeScoreDistribution(candidateSnippets) { if (!candidateSnippets || candidateSnippets.length === 0) { return; } try { // Group scores by source type for analysis const scoresBySource = {}; let allScores = []; for (const snippet of candidateSnippets) { const sourceType = snippet.sourceType; const score = snippet.initialScore; if (typeof score === "number" && !isNaN(score)) { if (!scoresBySource[sourceType]) { scoresBySource[sourceType] = []; } scoresBySource[sourceType].push(score); allScores.push(score); } } // Calculate distribution statistics const calculateStats = (scores) => { if (scores.length === 0) return null; const sorted = [...scores].sort((a, b) => a - b); return { count: scores.length, min: sorted[0], max: sorted[sorted.length - 1], avg: scores.reduce((sum, s) => sum + s, 0) / scores.length, median: sorted[Math.floor(sorted.length / 2)], }; }; // Log overall distribution const overallStats = calculateStats(allScores); this.logger.debug("Overall score distribution analysis", { totalSnippets: candidateSnippets.length, scoredSnippets: allScores.length, stats: overallStats, }); // Log per-source distribution for (const [sourceType, scores] of Object.entries(scoresBySource)) { const sourceStats = calculateStats(scores); this.logger.debug(`Score distribution for ${sourceType}`, { sourceType: sourceType, stats: sourceStats, }); } // Log potential normalization concerns if (overallStats && overallStats.max - overallStats.min > 0.8) { this.logger.debug( "Wide score range detected - normalization effectiveness confirmed", { scoreRange: overallStats.max - overallStats.min, minScore: overallStats.min, maxScore: overallStats.max, } ); } } catch (error) { this.logger.error("Error during score distribution analysis", { error: error.message, stack: error.stack, candidateSnippetsCount: candidateSnippets.length, }); } } // =========================================== // TODO SECTIONS FOR FUTURE IMPLEMENTATION // =========================================== /** * TODO: Implement recent development activity context retrieval * Will support Story 3.3: Recent development activity context * * Methods to be implemented: * - getRecentCommits() * - getRecentFileChanges() * - getRecentConversations() */ /** * TODO: Implement relevant conversation history retrieval * Will support Story 3.4: Relevant conversation history context * * Methods to be implemented: * - getRelevantConversations() * - searchConversationsByQuery() * - getConversationsByTopic() */ /** * TODO: Implement smart entity and document recommendations * Will support Story 3.5: Smart entity and document recommendations * * Methods to be implemented: * - getRecommendedEntities() * - getRecommendedDocuments() * - getSemanticMatches() */ /** * TODO: Implement comprehensive context assembly * Will support assembling all context components into structured response * * Methods to be implemented: * - assembleComprehensiveContext() * - prioritizeContextComponents() * - enforceTokenLimits() */ // =========================================== // SCORE NORMALIZATION // =========================================== /** * Calculates a recency boost score based on the age of an item * More recent items receive higher boosts, with exponential decay over time * @param {string|Date} itemTimestampStringOrDate - Timestamp from git commits, conversation messages, etc. * @returns {Promise<number>} Recency boost value (additive to consolidated score) * @private */ async _calculateRecencyBoost(itemTimestampStringOrDate) { try { // Return 0 if no timestamp provided if (!itemTimestampStringOrDate) { this.logger.debug("No timestamp provided for recency calculation"); return 0; } // Convert input to Date object if it's a string let itemDate; if (typeof itemTimestampStringOrDate === "string") { itemDate = new Date(itemTimestampStringOrDate); } else if (itemTimestampStringOrDate instanceof Date) { itemDate = itemTimestampStringOrDate; } else { this.logger.debug("Invalid timestamp type for recency calculation", { timestamp: itemTimestampStringOrDate, type: typeof itemTimestampStringOrDate, }); return 0; } // Validate the parsed date if (isNaN(itemDate.getTime())) { this.logger.debug("Invalid date for recency calculation", { timestamp: itemTimestampStringOrDate, parsedDate: itemDate, }); return 0; } // Calculate age of the item const now = new Date(); const ageInMillis = now.getTime() - itemDate.getTime(); const ageInHours = ageInMillis / (1000 * 60 * 60); // Get recency configuration from ranking factor weights let maxBoost = 0.2; let decayRateHours = 24; let minAgeForDecay = 1; let maxAgeForBoost = 168; // 1 week try { // Import configuration dynamically to avoid circular dependencies const { RANKING_FACTOR_WEIGHTS } = await import("../config.js"); if (RANKING_FACTOR_WEIGHTS.recency) { maxBoost = RANKING_FACTOR_WEIGHTS.recency.maxBoost || maxBoost; decayRateHours = RANKING_FACTOR_WEIGHTS.recency.decayRateHours || decayRateHours; minAgeForDecay = RANKING_FACTOR_WEIGHTS.recency.minAgeForDecay || minAgeForDecay; maxAgeForBoost = RANKING_FACTOR_WEIGHTS.recency.maxAgeForBoost || maxAgeForBoost; } } catch (configError) { this.logger.debug( "Could not load recency configuration, using defaults", { error: configError.message, } ); // Continue with default values } // Calculate recency boost based on age let recencyBoost = 0; // No boost if item is too old if (ageInHours > maxAgeForBoost) { recencyBoost = 0; } // Maximum boost if item is very recent else if (ageInHours <= minAgeForDecay) { recencyBoost = maxBoost; } // Exponential decay for items between minAgeForDecay and maxAgeForBoost else { const decayFactor = Math.exp(-ageInHours / decayRateHours); recencyBoost = maxBoost * decayFactor; } this.logger.debug("Recency boost calculated", { timestamp: itemTimestampStringOrDate, ageInHours: ageInHours, recencyBoost: recencyBoost, maxBoost: maxBoost, decayRateHours: decayRateHours, }); return recencyBoost; } catch (error) { this.logger.error("Error calculating recency boost", { error: error.message, stack: error.stack, timestamp: itemTimestampStringOrDate, }); // Return 0 boost on error to avoid breaking the scoring process return 0; } } /** * Analyzes score distribution across all candidate snippets for debugging and tuning * @param {Array<Object>} candidateSnippets - Array of candidate snippet objects * @private */ _analyzeScoreDistribution(candidateSnippets) { if (!candidateSnippets || candidateSnippets.length === 0) { return; } try { // Group scores by source type for analysis const scoresBySource = {}; let allScores = []; for (const snippet of candidateSnippets) { const sourceType = snippet.sourceType; const score = snippet.initialScore; if (typeof score === "number" && !isNaN(score)) { if (!scoresBySource[sourceType]) { scoresBySource[sourceType] = []; } scoresBySource[sourceType].push(score); allScores.push(score); } } // Calculate distribution statistics const calculateStats = (scores) => { if (scores.length === 0) return null; const sorted = [...scores].sort((a, b) => a - b); return { count: scores.length, min: sorted[0], max: sorted[sorted.length - 1], avg: scores.reduce((sum, s) => sum + s, 0) / scores.length, median: sorted[Math.floor(sorted.length / 2)], }; }; // Log overall distribution const overallStats = calculateStats(allScores); this.logger.debug("Overall score distribution analysis", { totalSnippets: candidateSnippets.length, scoredSnippets: allScores.length, stats: overallStats, }); // Log per-source distribution for (const [sourceType, scores] of Object.entries(scoresBySource)) { const sourceStats = calculateStats(scores); this.logger.debug(`Score distribution for ${sourceType}`, { sourceType: sourceType, stats: sourceStats, }); } // Log potential normalization concerns if (overallStats && overallStats.max - overallStats.min > 0.8) { this.logger.debug( "Wide score range detected - normalization effectiveness confirmed", { scoreRange: overallStats.max - overallStats.min, minScore: overallStats.min, maxScore: overallStats.max, } ); } } catch (error) { this.logger.error("Error during score distribution analysis", { error: error.message, stack: error.stack, candidateSnippetsCount: candidateSnippets.length, }); } } } export default RetrievalService;

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/aurda012/devcontext'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

retrieval.service.js•162 KiB