/**
* Search Utilities - Intelligent query processing for Minecraft modding documentation
* Handles tokenization, synonyms, query expansion, and relevance scoring
*/
/**
* Minecraft modding domain-specific synonyms and related terms
*/
const MINECRAFT_SYNONYMS: Record<string, string[]> = {
// Registration patterns
register: [
'registration',
'registering',
'create',
'add',
'define',
'declare',
'init',
'initialize',
],
item: ['items', 'itemstack', 'tool', 'weapon', 'armor'],
block: ['blocks', 'blockstate', 'tile'],
entity: ['entities', 'mob', 'creature', 'living'],
// Common actions
create: ['make', 'build', 'generate', 'new', 'add', 'register'],
custom: ['custom', 'modded', 'new', 'own'],
add: ['adding', 'insert', 'include', 'register', 'create'],
// Technical terms
mixin: ['mixins', 'injection', 'inject', 'hook', 'patch'],
event: ['events', 'listener', 'handler', 'callback', 'subscribe'],
network: ['networking', 'packet', 'packets', 'sync', 'synchronize', 's2c', 'c2s'],
render: ['rendering', 'renderer', 'draw', 'display', 'model', 'texture'],
data: ['datagen', 'data-generation', 'datapack', 'json', 'recipe', 'loot'],
// Fabric-specific
fabric: ['fabricmc', 'fabric-api', 'fapi'],
entrypoint: ['entrypoints', 'initializer', 'mod-initializer', 'main', 'client', 'server'],
registry: ['registries', 'registered', 'identifier', 'id'],
// Block types
blockentity: ['block-entity', 'tile-entity', 'tileentity', 'be'],
container: ['inventory', 'chest', 'storage', 'gui', 'screen', 'menu'],
// Features
recipe: ['recipes', 'crafting', 'smelting', 'cooking'],
loot: ['loottable', 'loot-table', 'drops', 'drop'],
tag: ['tags', 'tagging', 'itemtag', 'blocktag'],
sound: ['sounds', 'audio', 'music', 'sfx'],
particle: ['particles', 'effect', 'effects', 'fx'],
// Commands
command: ['commands', 'cmd', 'brigadier', 'argument'],
keybind: ['keybinding', 'keybinds', 'key', 'hotkey', 'shortcut', 'input'],
// World
world: ['level', 'dimension', 'worldgen', 'generation', 'biome'],
structure: ['structures', 'building', 'feature', 'worldgen'],
};
/**
* Common Minecraft class/method patterns to help with code search
*/
const CODE_PATTERNS: Record<string, string[]> = {
register: ['Registry.register', 'REGISTRY', 'Registries', 'RegistryKey'],
item: ['Item', 'ItemStack', 'Item.Settings', 'FabricItemSettings'],
block: ['Block', 'BlockState', 'Block.Settings', 'FabricBlockSettings'],
entity: ['Entity', 'LivingEntity', 'EntityType', 'FabricEntityTypeBuilder'],
blockentity: ['BlockEntity', 'BlockEntityType', 'FabricBlockEntityTypeBuilder'],
mixin: ['@Mixin', '@Inject', '@Redirect', '@ModifyVariable', 'CallbackInfo'],
event: ['Event', 'Callback', 'register()', 'ServerLifecycleEvents', 'ClientLifecycleEvents'],
network: ['PacketByteBuf', 'ServerPlayNetworking', 'ClientPlayNetworking', 'PayloadTypeRegistry'],
recipe: ['Recipe', 'RecipeSerializer', 'RecipeType', 'Ingredient'],
command: ['CommandRegistrationCallback', 'LiteralArgumentBuilder', 'RequiredArgumentBuilder'],
keybind: ['KeyBinding', 'KeyBindingHelper', 'GLFW'],
screen: ['Screen', 'HandledScreen', 'ScreenHandler', 'ContainerScreen'],
render: ['Renderer', 'RenderLayer', 'VertexConsumer', 'MatrixStack', 'DrawContext'],
};
export interface TokenizedQuery {
original: string;
tokens: string[];
expandedTokens: string[];
ftsQuery: string;
likePatterns: string[];
codePatterns: string[];
}
/**
* Tokenize and preprocess a search query
*/
export function tokenizeQuery(query: string): TokenizedQuery {
const original = query.trim();
// Tokenize: split on whitespace and common separators
const tokens = original
.toLowerCase()
.split(/[\s\-_.,;:!?()[\]{}'"]+/)
.filter((t) => t.length > 1)
.map((t) => t.replace(/[^a-z0-9]/g, ''));
// Expand tokens with synonyms
const expandedSet = new Set<string>();
for (const token of tokens) {
expandedSet.add(token);
// Add direct synonyms
if (MINECRAFT_SYNONYMS[token]) {
for (const syn of MINECRAFT_SYNONYMS[token]) {
expandedSet.add(syn);
}
}
// Check if token is a synonym of something else
for (const [key, synonyms] of Object.entries(MINECRAFT_SYNONYMS)) {
if (synonyms.includes(token)) {
expandedSet.add(key);
for (const syn of synonyms) {
expandedSet.add(syn);
}
}
}
}
const expandedTokens = Array.from(expandedSet);
// Build FTS5 query with OR logic
const ftsQuery = buildFtsQuery(tokens, expandedTokens);
// Build LIKE patterns for fallback
const likePatterns = buildLikePatterns(tokens);
// Get relevant code patterns
const codePatterns = getCodePatterns(tokens);
return {
original,
tokens,
expandedTokens,
ftsQuery,
likePatterns,
codePatterns,
};
}
/**
* Build an FTS5 query with proper syntax
* Uses OR for expanded terms, with original terms boosted
*/
function buildFtsQuery(tokens: string[], expandedTokens: string[]): string {
if (tokens.length === 0) return '';
// Simple approach: use OR between all expanded tokens
// FTS5 will handle ranking based on term frequency
const uniqueTerms = Array.from(new Set([...tokens, ...expandedTokens]));
// Filter out very short terms and escape special characters
const validTerms = uniqueTerms
.filter((t) => t.length > 2)
.map((t) => t.replace(/['"]/g, ''))
.filter((t) => t.length > 0);
if (validTerms.length === 0) {
// Fallback: use original tokens even if short
return tokens.map((t) => `"${t}"`).join(' OR ');
}
// Build query: original tokens get quoted (phrase-like), expanded get OR'd
const parts: string[] = [];
// Add original query as a phrase attempt (high relevance if matches)
if (tokens.length > 1) {
parts.push(`"${tokens.join(' ')}"`);
}
// Add individual tokens
for (const term of validTerms) {
parts.push(term);
}
return parts.join(' OR ');
}
/**
* Build LIKE patterns for SQL fallback search
*/
function buildLikePatterns(tokens: string[]): string[] {
const patterns: string[] = [];
// Full phrase pattern
if (tokens.length > 0) {
patterns.push(`%${tokens.join('%')}%`);
}
// Individual token patterns
for (const token of tokens) {
if (token.length > 2) {
patterns.push(`%${token}%`);
}
}
return patterns;
}
/**
* Get relevant code patterns based on query tokens
*/
function getCodePatterns(tokens: string[]): string[] {
const patterns: string[] = [];
for (const token of tokens) {
if (CODE_PATTERNS[token]) {
patterns.push(...CODE_PATTERNS[token]);
}
// Check synonyms too
for (const [key, synonyms] of Object.entries(MINECRAFT_SYNONYMS)) {
if (synonyms.includes(token) && CODE_PATTERNS[key]) {
patterns.push(...CODE_PATTERNS[key]);
}
}
}
return Array.from(new Set(patterns));
}
export interface ScoredResult<T> {
item: T;
score: number;
matchReasons: string[];
}
/**
* Calculate relevance score for a search result
*/
export function calculateRelevanceScore(
item: {
title?: string;
content?: string;
section_heading?: string | null;
section_content?: string;
code?: string;
caption?: string | null;
category?: string;
url?: string;
},
query: TokenizedQuery
): { score: number; reasons: string[] } {
let score = 0;
const reasons: string[] = [];
const titleLower = (item.title || '').toLowerCase();
const headingLower = (item.section_heading || '').toLowerCase();
const contentLower = (item.content || item.section_content || '').toLowerCase();
const codeLower = (item.code || '').toLowerCase();
const captionLower = (item.caption || '').toLowerCase();
const categoryLower = (item.category || '').toLowerCase();
const urlLower = (item.url || '').toLowerCase();
// Exact phrase match (highest score)
const originalLower = query.original.toLowerCase();
if (titleLower.includes(originalLower)) {
score += 100;
reasons.push('exact phrase in title');
}
if (headingLower.includes(originalLower)) {
score += 80;
reasons.push('exact phrase in heading');
}
if (captionLower.includes(originalLower)) {
score += 70;
reasons.push('exact phrase in caption');
}
// Token matches in different fields
for (const token of query.tokens) {
if (token.length < 2) continue;
// Title matches (high value)
if (titleLower.includes(token)) {
score += 20;
if (!reasons.includes('token in title')) reasons.push('token in title');
}
// Heading matches (high value)
if (headingLower.includes(token)) {
score += 18;
if (!reasons.includes('token in heading')) reasons.push('token in heading');
}
// Caption matches
if (captionLower.includes(token)) {
score += 15;
if (!reasons.includes('token in caption')) reasons.push('token in caption');
}
// URL/path matches (indicates topic relevance)
if (urlLower.includes(token)) {
score += 12;
if (!reasons.includes('token in URL')) reasons.push('token in URL');
}
// Category matches
if (categoryLower.includes(token)) {
score += 10;
if (!reasons.includes('token in category')) reasons.push('token in category');
}
// Content matches
if (contentLower.includes(token)) {
score += 5;
if (!reasons.includes('token in content')) reasons.push('token in content');
}
}
// Code pattern matches
for (const pattern of query.codePatterns) {
if (codeLower.includes(pattern.toLowerCase())) {
score += 25;
if (!reasons.includes('code pattern match')) reasons.push('code pattern match');
}
}
// Expanded token matches (lower weight)
for (const token of query.expandedTokens) {
if (query.tokens.includes(token)) continue; // Skip original tokens
if (titleLower.includes(token) || headingLower.includes(token)) {
score += 8;
if (!reasons.includes('synonym match')) reasons.push('synonym match');
}
}
// Boost for having code
if (item.code && item.code.length > 50) {
score += 10;
reasons.push('has substantial code');
}
return { score, reasons };
}
/**
* Deduplicate and rank results
*/
export function deduplicateAndRank<
T extends { code?: string; url?: string; section_heading?: string | null },
>(results: ScoredResult<T>[], limit: number): ScoredResult<T>[] {
// Sort by score descending
const sorted = [...results].sort((a, b) => b.score - a.score);
// Deduplicate by code content (or URL + heading if no code)
const seen = new Set<string>();
const deduplicated: ScoredResult<T>[] = [];
for (const result of sorted) {
// Create a dedup key
let key: string;
if (result.item.code) {
// Use first 200 chars of code as key
key = result.item.code.substring(0, 200).replace(/\s+/g, ' ');
} else {
key = `${result.item.url || ''}::${result.item.section_heading || ''}`;
}
if (!seen.has(key)) {
seen.add(key);
deduplicated.push(result);
}
if (deduplicated.length >= limit) break;
}
return deduplicated;
}
/**
* Normalize a score to 0-100 range
*/
export function normalizeScore(score: number, maxPossible: number = 200): number {
return Math.min(100, Math.round((score / maxPossible) * 100));
}