Skip to main content
Glama
query.ts5.95 kB
import { fuzzy } from "fast-fuzzy"; /** * Summary * → Tokenize keys and values * → generate shorter tokens using grams * → store tokens in an inverted index with their JSON paths Map<string, []<path, weight>> * → fuzzy-match query tokens * → score by similarity + weight + frequency * → group results by JSON branch * → return highest-scoring paths. * * 1) Tokenization * convert text to lowercase * split into alphanumeric words * also generate short overlapping substrings (“grams”) to allow partial / fuzzy matches * (e.g., "mcdonalds" → "mcdonalds", "mc", "mcd", "cdon", ...) * Each produced token is stored in an inverted index: * token → list of { path, weight } * path = the full JSON access path (brands.fast_food.items[2].name) * weight = importance (keys get higher weight than values) * * 2) Query Phase * The query text is tokenized the same way. * For each query token, the index returns all candidate (path, weight) entries. * A fuzzy similarity score is computed between the query token and the indexed token using fast-fuzzy. * Scores are accumulated per path, increasing when: * the same region matches multiple tokens * the token was originally in a key (higher weight) * the similarity is high * Return a fixed number of results * * 3) Result Grouping * Paths that belong to the same structural branch of the JSON (e.g., x.y.z[3].name, x.y.z[3].address) are grouped together. * Groups with more hits and deeper (more specific) paths get higher final scores. * The search finally returns the top scoring representative paths. * Final One-Line Summary (Precise) */ export interface SearchResult { path: string; score: number; } export type Posting = { path: string; weight: number }; export type InvertedIndex = Map<string, Posting[]>; function normalize(text: string): string { return text // Split lower→Upper: fooBar → foo Bar .replace(/([a-z])([A-Z])/g, "$1 $2") // Split acronym→word: HTMLFile → HTML File .replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2") // Replace snake_case, kebab-case, dotted.name .replace(/[_\-.]+/g, " ") .toLowerCase(); } function tokenize(text: string): string[] { if (!text) return []; text = normalize(text); // Extract tokens const baseTokens = text.match(/[a-z0-9]+/g) ?? []; const out = new Set<string>(); for (const token of baseTokens) { out.add(token); // always keep base word // Generate 3–5 char n-grams for fuzzy matching for (let size = 3; size <= 5; size++) { if (token.length <= size) continue; for (let i = 0; i <= token.length - size; i++) { out.add(token.slice(i, i + size)); } } } return [...out]; } export function buildInvertedIndex(data: any): InvertedIndex { const index: InvertedIndex = new Map(); function add(token: string, path: string, weight = 1) { token = token.toLowerCase(); if (!index.has(token)) index.set(token, []); index.get(token)!.push({ path, weight }); } function walk(node: any, prefix: string) { if (typeof node === "string") { for (const t of tokenize(node)) add(t, prefix, 1); return; } if (Array.isArray(node)) { for (let i = 0; i < node.length; i++) { const next = prefix ? `${prefix}[${i}]` : `[${i}]`; walk(node[i], next); } return; } if (node && typeof node === "object") { for (const key in node) { const next = prefix ? `${prefix}.${key}` : key; for (const t of tokenize(key)) add(t, next, 2); walk(node[key], next); } } } walk(data, ""); return index; } export function queryIndex(index: InvertedIndex, query: string, limit: number) { const tokens = tokenize(query); const qLower = query.toLowerCase(); const scores = new Map<string, number>(); for (const token of tokens) { const postings = index.get(token); if (!postings) continue; const sim = fuzzy(token, qLower); for (const { path, weight } of postings) { scores.set(path, (scores.get(path) || 0) + weight * sim); } } return [...scores.entries()] .sort((a, b) => b[1] - a[1]) .slice(0, limit) .map(([path, score]) => ({ path, score })); } function groupKey(path: string): string { const parts = path.split("."); const res: string[] = []; for (const p of parts) { res.push(p); if (/\[\d+\]$/.test(p)) break; } return res.join("."); } function finalizeGrouping( results: { path: string; score: number }[], limit: number ): SearchResult[] { const groups = new Map< string, { representative: string; freq: number; depth: number; } >(); for (const r of results) { const key = groupKey(r.path); const depth = key.split(".").length; if (!groups.has(key)) { groups.set(key, { representative: r.path, freq: 0, depth, }); } const g = groups.get(key)!; g.freq++; } return [...groups.entries()] .map(([gKey, g]) => { const groupScore = g.freq * g.depth; const isSingle = g.freq === 1; return { path: isSingle ? g.representative : gKey, score: groupScore, } as SearchResult; }) .sort((a, b) => b.score - a.score) .slice(0, limit); } export function query(data: any, text: string, limit: number): SearchResult[] { const index = buildInvertedIndex(data); const raw = queryIndex(index, text, limit * 5); return finalizeGrouping(raw, limit); }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Adsdworld/mcp-json-navigator'

If you have feedback or need assistance with the MCP directory API, please join our Discord server