Skip to main content
Glama
full-text-search-engine.ts11.8 kB
/** * Full-Text Search Engine * * PostgreSQL ts_vector-based full-text search with query parsing, * result ranking, highlighting, and caching. * * Requirements: 4.1, 4.2, 4.3, 4.4, 4.5 */ import type { DatabaseConnectionManager } from "../database/connection-manager"; import { MemoryValidationError } from "../memory/types"; import { QueryParser } from "./query-parser"; import { ResultCache } from "./result-cache"; import { DEFAULT_SEARCH_CONFIG, SearchQueryError, SearchValidationError, type FullTextSearchQuery, type FullTextSearchResponse, type FullTextSearchResult, type SearchConfig, type SearchStatistics, } from "./types"; /** * Full-text search engine using PostgreSQL ts_vector */ export class FullTextSearchEngine { private readonly db: DatabaseConnectionManager; private readonly config: SearchConfig; private readonly queryParser: QueryParser; private readonly cache: ResultCache; constructor(db: DatabaseConnectionManager, config?: Partial<SearchConfig>) { this.db = db; this.config = { ...DEFAULT_SEARCH_CONFIG, ...config }; this.queryParser = new QueryParser(this.config.maxQueryLength); this.cache = new ResultCache({ maxSize: 1000, // Cache up to 1000 unique queries ttl: this.config.cacheTTL, }); } /** * Execute full-text search * * @param query - Search query parameters * @returns Search results with statistics * @throws SearchValidationError if query is invalid * @throws SearchQueryError if database query fails */ async search(query: FullTextSearchQuery): Promise<FullTextSearchResponse> { // Validate query this.validateQuery(query); // Check cache if enabled if (this.config.enableCache) { const cacheKey = this.cache.generateKey(query); const cached = this.cache.get(cacheKey); if (cached) { // Apply pagination to cached results const start = query.offset ?? 0; const end = start + (query.maxResults ?? this.config.defaultMaxResults); const paginatedResults = cached.results.slice(start, end); return { results: paginatedResults, statistics: { ...cached.statistics, searchTime: 0, // Cached result, no search time }, }; } } const startTime = Date.now(); try { // Parse query const language = query.language ?? this.config.defaultLanguage; const parsedQuery = this.queryParser.parse(query.query, language); // Build and execute search SQL const { sql, params } = this.buildSearchSQL(query, parsedQuery, language); // Pass original query for proper matchedTerms extraction (excludes NOT terms) const results = await this.executeSearch(sql, params, query.query); // Get total count const totalResults = await this.getTotalCount(query, parsedQuery, language); // Verify index usage const indexUsed = await this.verifyIndexUsage(query, parsedQuery, language); const searchTime = Date.now() - startTime; const statistics: SearchStatistics = { totalResults, searchTime, indexUsed, }; const response: FullTextSearchResponse = { results, statistics, }; // Cache result if enabled (cache full result set, not paginated) if (this.config.enableCache) { const cacheKey = this.cache.generateKey(query); // Cache the full result set for pagination const fullResponse: FullTextSearchResponse = { results: await this.executeSearch( this.buildSearchSQL( { ...query, maxResults: totalResults, offset: 0 }, parsedQuery, language ).sql, this.buildSearchSQL( { ...query, maxResults: totalResults, offset: 0 }, parsedQuery, language ).params, query.query ), statistics, }; this.cache.set(cacheKey, fullResponse); } return response; } catch (err) { if (err instanceof SearchValidationError) { throw err; } // Re-throw MemoryValidationError as-is if (err instanceof MemoryValidationError) { throw err; } throw new SearchQueryError( `Failed to execute search: ${err instanceof Error ? err.message : String(err)}`, query.query, err instanceof Error ? err : undefined ); } } /** * Validate query parameters * * @param query - Query to validate * @throws SearchValidationError if query is invalid */ private validateQuery(query: FullTextSearchQuery): void { // Query parser will validate the query string itself this.queryParser.validate(query.query); // Validate maxResults if (query.maxResults !== undefined) { if (query.maxResults < 1) { throw new SearchValidationError( "maxResults must be at least 1", "maxResults", query.maxResults ); } if (query.maxResults > this.config.maxAllowedResults) { throw new SearchValidationError( `maxResults cannot exceed ${this.config.maxAllowedResults}`, "maxResults", query.maxResults ); } } // Validate offset if (query.offset !== undefined && query.offset < 0) { throw new SearchValidationError("offset must be non-negative", "offset", query.offset); } // Validate strength threshold if (query.minStrength !== undefined) { if (query.minStrength < 0 || query.minStrength > 1) { throw new SearchValidationError( "minStrength must be between 0 and 1", "minStrength", query.minStrength ); } } // Validate salience threshold if (query.minSalience !== undefined) { if (query.minSalience < 0 || query.minSalience > 1) { throw new SearchValidationError( "minSalience must be between 0 and 1", "minSalience", query.minSalience ); } } } /** * Build SQL query for full-text search * * @param query - Search query parameters * @param parsedQuery - Parsed ts_query string * @param language - Text search language * @returns SQL query and parameters */ private buildSearchSQL( query: FullTextSearchQuery, parsedQuery: string, language: string ): { sql: string; params: (string | number)[] } { const rankingMode = query.rankingMode ?? this.config.defaultRankingMode; const maxResults = Math.min( query.maxResults ?? this.config.defaultMaxResults, this.config.maxAllowedResults ); const offset = query.offset ?? 0; // Use ts_rank or ts_rank_cd based on ranking mode const rankFunction = rankingMode === "rank_cd" ? "ts_rank_cd" : "ts_rank"; let sql = ` SELECT id as "memoryId", content, ts_headline($1, content, to_tsquery($1, $2), 'MaxWords=50, MinWords=25, MaxFragments=1') as headline, ${rankFunction}(search_vector, to_tsquery($1, $2)) as rank, created_at as "createdAt", salience, strength FROM memories WHERE search_vector @@ to_tsquery($1, $2) `; const params: (string | number)[] = [language, parsedQuery]; // Add filters if (query.userId) { params.push(query.userId); sql += ` AND user_id = $${params.length}`; } if (query.minStrength !== undefined) { params.push(query.minStrength); sql += ` AND strength >= $${params.length}`; } if (query.minSalience !== undefined) { params.push(query.minSalience); sql += ` AND salience >= $${params.length}`; } // Add ordering and pagination params.push(maxResults, offset); sql += ` ORDER BY rank DESC LIMIT $${params.length - 1} OFFSET $${params.length}`; return { sql, params }; } /** * Execute search query and return results * * @param sql - SQL query string * @param params - Query parameters * @param originalQuery - Original user query for term extraction * @returns Array of search results */ private async executeSearch( sql: string, params: (string | number)[], originalQuery?: string ): Promise<FullTextSearchResult[]> { const client = await this.db.getConnection(); try { const result = await client.query(sql, params); // Extract matched terms from the original query (excludes NOT terms) // If originalQuery provided, use it; otherwise fall back to extractAllTerms from tsQuery const matchedTerms = originalQuery ? this.queryParser.extractTerms(originalQuery) : this.queryParser.extractAllTerms(params[1] as string); return result.rows.map((row) => ({ memoryId: row.memoryId, content: row.content, headline: row.headline, rank: parseFloat(row.rank), matchedTerms, createdAt: row.createdAt, salience: row.salience, strength: row.strength, })); } finally { this.db.releaseConnection(client); } } /** * Get total count of matching results * * @param query - Search query parameters * @param parsedQuery - Parsed ts_query string * @param language - Text search language * @returns Total number of matching results */ private async getTotalCount( query: FullTextSearchQuery, parsedQuery: string, language: string ): Promise<number> { const client = await this.db.getConnection(); try { let sql = ` SELECT COUNT(*) as total FROM memories WHERE search_vector @@ to_tsquery($1, $2) `; const params: (string | number)[] = [language, parsedQuery]; // Add same filters as main query if (query.userId) { params.push(query.userId); sql += ` AND user_id = $${params.length}`; } if (query.minStrength !== undefined) { params.push(query.minStrength); sql += ` AND strength >= $${params.length}`; } if (query.minSalience !== undefined) { params.push(query.minSalience); sql += ` AND salience >= $${params.length}`; } const result = await client.query(sql, params); return parseInt(result.rows[0].total); } finally { this.db.releaseConnection(client); } } /** * Verify that GIN index is used for the search * * @param _query - Search query parameters (unused, for future filtering) * @param parsedQuery - Parsed ts_query string * @param language - Text search language * @returns True if GIN index is used */ private async verifyIndexUsage( _query: FullTextSearchQuery, parsedQuery: string, language: string ): Promise<boolean> { const client = await this.db.getConnection(); try { const explainSQL = ` EXPLAIN (FORMAT JSON) SELECT * FROM memories WHERE search_vector @@ to_tsquery($1, $2) `; const result = await client.query(explainSQL, [language, parsedQuery]); const plan = JSON.stringify(result.rows[0]); // Check if plan includes index scan on our GIN index return plan.includes("Index") && plan.includes("idx_memories_search_vector"); } catch { // If EXPLAIN fails, assume index is not used return false; } finally { this.db.releaseConnection(client); } } /** * Get cache statistics * * @returns Cache performance statistics */ getCacheStats(): ReturnType<ResultCache["getStats"]> { return this.cache.getStats(); } /** * Clear search result cache */ clearCache(): void { this.cache.clear(); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/keyurgolani/ThoughtMcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server