ProDisco

Overview Inspect Schema Related Servers Score Discussions

search-engine.ts•9.93 kB

/** * Orama search engine wrapper with typed search */ import { create, insert, insertMultiple, update, updateMultiple, search, remove, type Orama, type Results, type SearchParams, } from '@orama/orama'; import { baseSchema, type BaseDocument, type OramaSchemaType } from '../schema/base-schema.js'; import { splitCamelCase } from '../extractor/ast-parser.js'; /** * Options for initializing the search engine */ export interface SearchEngineOptions { /** Custom schema (defaults to baseSchema) */ schema?: OramaSchemaType; /** Tokenizer options */ tokenizerOptions?: { stemming?: boolean; stemmerSkipProperties?: string[]; }; } /** * Search query options */ export interface SearchOptions { /** Full-text search term */ query?: string; /** Filter by document type */ documentType?: string; /** Filter by category */ category?: string; /** Filter by library */ library?: string; /** Exclusion filters */ exclude?: { categories?: string[]; libraries?: string[]; }; /** Maximum results (default: 10) */ limit?: number; /** Pagination offset (default: 0) */ offset?: number; /** Field boost weights */ boost?: Record<string, number>; } /** * Search result */ export interface SearchResult<TDoc = BaseDocument> { /** Matched documents */ results: TDoc[]; /** Total matches before pagination */ totalMatches: number; /** Facet counts */ facets: { documentType: Record<string, number>; library: Record<string, number>; category: Record<string, number>; }; /** Search execution time in ms */ searchTime: number; } // eslint-disable-next-line @typescript-eslint/no-explicit-any type AnyOrama = Orama<any>; /** * Preprocess a search query by splitting camelCase terms. * This ensures queries like "readNamespacedPodLog" match indexed documents * that have "read Namespaced Pod Log" in their search tokens. * * Includes both the original term and the split version to handle cases like: * - "readNamespacedPodLog" → "readNamespacedPodLog read Namespaced Pod Log" * - "TeSt" → "TeSt" (no additional split since it's not useful camelCase) */ function preprocessQuery(query: string): string { const tokens: string[] = []; for (const word of query.split(/\s+/)) { if (!word) continue; // Always include the original word tokens.push(word); // Try camelCase splitting const split = splitCamelCase(word); // Only add the split version if: // 1. It's different from the original // 2. It produces tokens of reasonable length (at least 3 chars each) if (split !== word) { const splitParts = split.split(' '); const allPartsReasonable = splitParts.every((part) => part.length >= 3); if (allPartsReasonable) { tokens.push(split); } } } return tokens.join(' '); } /** * Search engine wrapping Orama */ export class SearchEngine<TDoc extends BaseDocument = BaseDocument> { private db: AnyOrama | null = null; private options: SearchEngineOptions; constructor(options: SearchEngineOptions = {}) { this.options = options; } /** * Initialize the search engine */ async initialize(): Promise<void> { if (this.db) return; const schema = this.options.schema || baseSchema; this.db = await create({ schema, components: { tokenizer: { stemming: this.options.tokenizerOptions?.stemming ?? true, stemmerSkipProperties: this.options.tokenizerOptions?.stemmerSkipProperties || [ 'name', 'className', 'library', ], }, }, }); } /** * Insert a single document (uses update if document already exists) */ async insert(doc: TDoc): Promise<void> { await this.ensureInitialized(); try { await insert(this.db!, doc); } catch (error) { // If document already exists, update it instead if (error instanceof Error && error.message.includes('already exists')) { await update(this.db!, (doc as { id: string }).id, doc); } else { throw error; } } } /** * Insert multiple documents (uses update if documents already exist) */ async insertBatch(docs: TDoc[]): Promise<void> { if (docs.length === 0) return; await this.ensureInitialized(); try { await insertMultiple(this.db!, docs); } catch (error) { // If some documents already exist, fall back to individual upserts if (error instanceof Error && error.message.includes('already exists')) { for (const doc of docs) { await this.insert(doc); } } else { throw error; } } } /** * Remove a document by ID */ async remove(id: string): Promise<void> { await this.ensureInitialized(); await remove(this.db!, id); } /** * Search the index */ async search(options: SearchOptions): Promise<SearchResult<TDoc>> { await this.ensureInitialized(); const { query = '', documentType, category, library, exclude, limit = 10, offset = 0, boost, } = options; const startTime = performance.now(); // Preprocess query to split camelCase terms const processedQuery = preprocessQuery(query); // Build where clause for database-level filtering (more efficient than post-filtering) // For enum types in Orama, we need to use the 'eq' operator // eslint-disable-next-line @typescript-eslint/no-explicit-any const where: Record<string, any> = {}; if (documentType && documentType !== 'all') { where.documentType = { eq: documentType }; } if (category) { where.category = { eq: category }; } if (library) { where.library = { eq: library }; } // Build search params const searchParams: SearchParams<AnyOrama, TDoc> = { term: processedQuery, properties: ['name', 'description', 'searchTokens', 'properties'], boost: boost || { name: 3, searchTokens: 2, description: 1, properties: 0.5, }, tolerance: 1, // Allow fuzzy matching // Fetch more results for exclusion filtering limit: exclude ? Math.max(limit * 10, 500) : limit + offset + 50, facets: { documentType: {}, library: {}, category: {}, }, ...(Object.keys(where).length > 0 ? { where } : {}), }; // Execute search const searchResult: Results<TDoc> = await search(this.db!, searchParams); // Extract query terms for ranking (split by whitespace, filter short terms) const queryTerms = processedQuery .toLowerCase() .split(/\s+/) .filter((term) => term.length >= 2); // Sort by relevance with custom ranking: // 1. Exact name match // 2. Name contains ALL query terms (higher = better) // 3. Orama's score const queryLower = query.toLowerCase(); const sortedHits = [...searchResult.hits].sort((a, b) => { const aNameLower = a.document.name.toLowerCase(); const bNameLower = b.document.name.toLowerCase(); // Priority 1: Exact name match const aExact = aNameLower === queryLower; const bExact = bNameLower === queryLower; if (aExact && !bExact) return -1; if (!aExact && bExact) return 1; // Priority 2: Count how many query terms appear in the name // Split names by camelCase to get individual tokens for matching // Note: Must split BEFORE lowercasing since camelCase detection needs case info const aNameTokens = splitCamelCase(a.document.name).toLowerCase().split(/\s+/); const bNameTokens = splitCamelCase(b.document.name).toLowerCase().split(/\s+/); // Check if query term matches (prefix match to handle plurals like "logs" → "log") const termMatches = (tokens: string[], term: string) => tokens.some((token) => token.startsWith(term) || term.startsWith(token)); const aTermCount = queryTerms.filter((term) => termMatches(aNameTokens, term)).length; const bTermCount = queryTerms.filter((term) => termMatches(bNameTokens, term)).length; // Boost documents where name contains more query terms if (aTermCount !== bTermCount) { return bTermCount - aTermCount; } // Priority 3: Fall back to Orama's score return (b.score || 0) - (a.score || 0); }); // Apply exclusion filters (positive filters are handled by Orama's where clause) let filtered = sortedHits.map((hit) => hit.document); if (exclude?.categories) { filtered = filtered.filter( (doc) => !exclude.categories!.includes(doc.category) ); } if (exclude?.libraries) { filtered = filtered.filter( (doc) => !exclude.libraries!.includes(doc.library) ); } const totalMatches = filtered.length; // Apply pagination const paginated = filtered.slice(offset, offset + limit); const searchTime = performance.now() - startTime; return { results: paginated, totalMatches, facets: { documentType: (searchResult.facets?.documentType?.values || {}) as Record<string, number>, library: (searchResult.facets?.library?.values || {}) as Record<string, number>, category: (searchResult.facets?.category?.values || {}) as Record<string, number>, }, searchTime, }; } /** * Get the underlying Orama database */ getDb(): AnyOrama | null { return this.db; } /** * Check if the engine is initialized */ isInitialized(): boolean { return this.db !== null; } /** * Shutdown the engine */ async shutdown(): Promise<void> { this.db = null; } /** * Ensure the engine is initialized */ private async ensureInitialized(): Promise<void> { if (!this.db) { await this.initialize(); } } }

Latest Blog Posts

What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat
MCP Moves to the Linux Foundation: Neutral Stewardship for Agentic Infrastructure
By Om-Shree-0709 on December 15, 2025.
mcp
anthropic
Linux Foundation
Code Execution with MCP: Architecting Agentic Efficiency
By Om-Shree-0709 on December 14, 2025.
mcp
Token bloat

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/harche/ProDisco'

If you have feedback or need assistance with the MCP directory API, please join our Discord server