Skip to main content
Glama
search-utils.ts11.4 kB
/** * Search Utilities - Intelligent query processing for Minecraft modding documentation * Handles tokenization, synonyms, query expansion, and relevance scoring */ /** * Minecraft modding domain-specific synonyms and related terms */ const MINECRAFT_SYNONYMS: Record<string, string[]> = { // Registration patterns register: [ 'registration', 'registering', 'create', 'add', 'define', 'declare', 'init', 'initialize', ], item: ['items', 'itemstack', 'tool', 'weapon', 'armor'], block: ['blocks', 'blockstate', 'tile'], entity: ['entities', 'mob', 'creature', 'living'], // Common actions create: ['make', 'build', 'generate', 'new', 'add', 'register'], custom: ['custom', 'modded', 'new', 'own'], add: ['adding', 'insert', 'include', 'register', 'create'], // Technical terms mixin: ['mixins', 'injection', 'inject', 'hook', 'patch'], event: ['events', 'listener', 'handler', 'callback', 'subscribe'], network: ['networking', 'packet', 'packets', 'sync', 'synchronize', 's2c', 'c2s'], render: ['rendering', 'renderer', 'draw', 'display', 'model', 'texture'], data: ['datagen', 'data-generation', 'datapack', 'json', 'recipe', 'loot'], // Fabric-specific fabric: ['fabricmc', 'fabric-api', 'fapi'], entrypoint: ['entrypoints', 'initializer', 'mod-initializer', 'main', 'client', 'server'], registry: ['registries', 'registered', 'identifier', 'id'], // Block types blockentity: ['block-entity', 'tile-entity', 'tileentity', 'be'], container: ['inventory', 'chest', 'storage', 'gui', 'screen', 'menu'], // Features recipe: ['recipes', 'crafting', 'smelting', 'cooking'], loot: ['loottable', 'loot-table', 'drops', 'drop'], tag: ['tags', 'tagging', 'itemtag', 'blocktag'], sound: ['sounds', 'audio', 'music', 'sfx'], particle: ['particles', 'effect', 'effects', 'fx'], // Commands command: ['commands', 'cmd', 'brigadier', 'argument'], keybind: ['keybinding', 'keybinds', 'key', 'hotkey', 'shortcut', 'input'], // World world: ['level', 'dimension', 'worldgen', 'generation', 'biome'], structure: ['structures', 'building', 'feature', 'worldgen'], }; /** * Common Minecraft class/method patterns to help with code search */ const CODE_PATTERNS: Record<string, string[]> = { register: ['Registry.register', 'REGISTRY', 'Registries', 'RegistryKey'], item: ['Item', 'ItemStack', 'Item.Settings', 'FabricItemSettings'], block: ['Block', 'BlockState', 'Block.Settings', 'FabricBlockSettings'], entity: ['Entity', 'LivingEntity', 'EntityType', 'FabricEntityTypeBuilder'], blockentity: ['BlockEntity', 'BlockEntityType', 'FabricBlockEntityTypeBuilder'], mixin: ['@Mixin', '@Inject', '@Redirect', '@ModifyVariable', 'CallbackInfo'], event: ['Event', 'Callback', 'register()', 'ServerLifecycleEvents', 'ClientLifecycleEvents'], network: ['PacketByteBuf', 'ServerPlayNetworking', 'ClientPlayNetworking', 'PayloadTypeRegistry'], recipe: ['Recipe', 'RecipeSerializer', 'RecipeType', 'Ingredient'], command: ['CommandRegistrationCallback', 'LiteralArgumentBuilder', 'RequiredArgumentBuilder'], keybind: ['KeyBinding', 'KeyBindingHelper', 'GLFW'], screen: ['Screen', 'HandledScreen', 'ScreenHandler', 'ContainerScreen'], render: ['Renderer', 'RenderLayer', 'VertexConsumer', 'MatrixStack', 'DrawContext'], }; export interface TokenizedQuery { original: string; tokens: string[]; expandedTokens: string[]; ftsQuery: string; likePatterns: string[]; codePatterns: string[]; } /** * Tokenize and preprocess a search query */ export function tokenizeQuery(query: string): TokenizedQuery { const original = query.trim(); // Tokenize: split on whitespace and common separators const tokens = original .toLowerCase() .split(/[\s\-_.,;:!?()[\]{}'"]+/) .filter((t) => t.length > 1) .map((t) => t.replace(/[^a-z0-9]/g, '')); // Expand tokens with synonyms const expandedSet = new Set<string>(); for (const token of tokens) { expandedSet.add(token); // Add direct synonyms if (MINECRAFT_SYNONYMS[token]) { for (const syn of MINECRAFT_SYNONYMS[token]) { expandedSet.add(syn); } } // Check if token is a synonym of something else for (const [key, synonyms] of Object.entries(MINECRAFT_SYNONYMS)) { if (synonyms.includes(token)) { expandedSet.add(key); for (const syn of synonyms) { expandedSet.add(syn); } } } } const expandedTokens = Array.from(expandedSet); // Build FTS5 query with OR logic const ftsQuery = buildFtsQuery(tokens, expandedTokens); // Build LIKE patterns for fallback const likePatterns = buildLikePatterns(tokens); // Get relevant code patterns const codePatterns = getCodePatterns(tokens); return { original, tokens, expandedTokens, ftsQuery, likePatterns, codePatterns, }; } /** * Build an FTS5 query with proper syntax * Uses OR for expanded terms, with original terms boosted */ function buildFtsQuery(tokens: string[], expandedTokens: string[]): string { if (tokens.length === 0) return ''; // Simple approach: use OR between all expanded tokens // FTS5 will handle ranking based on term frequency const uniqueTerms = Array.from(new Set([...tokens, ...expandedTokens])); // Filter out very short terms and escape special characters const validTerms = uniqueTerms .filter((t) => t.length > 2) .map((t) => t.replace(/['"]/g, '')) .filter((t) => t.length > 0); if (validTerms.length === 0) { // Fallback: use original tokens even if short return tokens.map((t) => `"${t}"`).join(' OR '); } // Build query: original tokens get quoted (phrase-like), expanded get OR'd const parts: string[] = []; // Add original query as a phrase attempt (high relevance if matches) if (tokens.length > 1) { parts.push(`"${tokens.join(' ')}"`); } // Add individual tokens for (const term of validTerms) { parts.push(term); } return parts.join(' OR '); } /** * Build LIKE patterns for SQL fallback search */ function buildLikePatterns(tokens: string[]): string[] { const patterns: string[] = []; // Full phrase pattern if (tokens.length > 0) { patterns.push(`%${tokens.join('%')}%`); } // Individual token patterns for (const token of tokens) { if (token.length > 2) { patterns.push(`%${token}%`); } } return patterns; } /** * Get relevant code patterns based on query tokens */ function getCodePatterns(tokens: string[]): string[] { const patterns: string[] = []; for (const token of tokens) { if (CODE_PATTERNS[token]) { patterns.push(...CODE_PATTERNS[token]); } // Check synonyms too for (const [key, synonyms] of Object.entries(MINECRAFT_SYNONYMS)) { if (synonyms.includes(token) && CODE_PATTERNS[key]) { patterns.push(...CODE_PATTERNS[key]); } } } return Array.from(new Set(patterns)); } export interface ScoredResult<T> { item: T; score: number; matchReasons: string[]; } /** * Calculate relevance score for a search result */ export function calculateRelevanceScore( item: { title?: string; content?: string; section_heading?: string | null; section_content?: string; code?: string; caption?: string | null; category?: string; url?: string; }, query: TokenizedQuery ): { score: number; reasons: string[] } { let score = 0; const reasons: string[] = []; const titleLower = (item.title || '').toLowerCase(); const headingLower = (item.section_heading || '').toLowerCase(); const contentLower = (item.content || item.section_content || '').toLowerCase(); const codeLower = (item.code || '').toLowerCase(); const captionLower = (item.caption || '').toLowerCase(); const categoryLower = (item.category || '').toLowerCase(); const urlLower = (item.url || '').toLowerCase(); // Exact phrase match (highest score) const originalLower = query.original.toLowerCase(); if (titleLower.includes(originalLower)) { score += 100; reasons.push('exact phrase in title'); } if (headingLower.includes(originalLower)) { score += 80; reasons.push('exact phrase in heading'); } if (captionLower.includes(originalLower)) { score += 70; reasons.push('exact phrase in caption'); } // Token matches in different fields for (const token of query.tokens) { if (token.length < 2) continue; // Title matches (high value) if (titleLower.includes(token)) { score += 20; if (!reasons.includes('token in title')) reasons.push('token in title'); } // Heading matches (high value) if (headingLower.includes(token)) { score += 18; if (!reasons.includes('token in heading')) reasons.push('token in heading'); } // Caption matches if (captionLower.includes(token)) { score += 15; if (!reasons.includes('token in caption')) reasons.push('token in caption'); } // URL/path matches (indicates topic relevance) if (urlLower.includes(token)) { score += 12; if (!reasons.includes('token in URL')) reasons.push('token in URL'); } // Category matches if (categoryLower.includes(token)) { score += 10; if (!reasons.includes('token in category')) reasons.push('token in category'); } // Content matches if (contentLower.includes(token)) { score += 5; if (!reasons.includes('token in content')) reasons.push('token in content'); } } // Code pattern matches for (const pattern of query.codePatterns) { if (codeLower.includes(pattern.toLowerCase())) { score += 25; if (!reasons.includes('code pattern match')) reasons.push('code pattern match'); } } // Expanded token matches (lower weight) for (const token of query.expandedTokens) { if (query.tokens.includes(token)) continue; // Skip original tokens if (titleLower.includes(token) || headingLower.includes(token)) { score += 8; if (!reasons.includes('synonym match')) reasons.push('synonym match'); } } // Boost for having code if (item.code && item.code.length > 50) { score += 10; reasons.push('has substantial code'); } return { score, reasons }; } /** * Deduplicate and rank results */ export function deduplicateAndRank< T extends { code?: string; url?: string; section_heading?: string | null }, >(results: ScoredResult<T>[], limit: number): ScoredResult<T>[] { // Sort by score descending const sorted = [...results].sort((a, b) => b.score - a.score); // Deduplicate by code content (or URL + heading if no code) const seen = new Set<string>(); const deduplicated: ScoredResult<T>[] = []; for (const result of sorted) { // Create a dedup key let key: string; if (result.item.code) { // Use first 200 chars of code as key key = result.item.code.substring(0, 200).replace(/\s+/g, ' '); } else { key = `${result.item.url || ''}::${result.item.section_heading || ''}`; } if (!seen.has(key)) { seen.add(key); deduplicated.push(result); } if (deduplicated.length >= limit) break; } return deduplicated; } /** * Normalize a score to 0-100 range */ export function normalizeScore(score: number, maxPossible: number = 200): number { return Math.min(100, Math.round((score / maxPossible) * 100)); }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/OGMatrix/mcmodding-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server