Universal Documentation MCP Server

mcpbook
src

textProcessor.ts•4.31 KiB

import { PorterStemmer, WordTokenizer, stopwords } from 'natural'; export class TextProcessor { private tokenizer = new WordTokenizer(); private stopWords = new Set(stopwords); private normalizationRules = new Map([ // Common tech abbreviations ['txs', 'transactions'], ['tx', 'transaction'], ['auth', 'authentication'], ['config', 'configuration'], ['repo', 'repository'], ['db', 'database'], ['api', 'interface'], ['ui', 'interface'], ['ux', 'experience'], // Cross-chain variations ['crosschain', 'cross-chain'], ['multichain', 'cross-chain'], ['xchain', 'cross-chain'], ['xm', 'cross-chain'], ['bridge', 'cross-chain'], // Authentication variations ['signin', 'sign-in'], ['signup', 'sign-up'], ['login', 'sign-in'], ['logout', 'sign-out'], // Common word variations ['javascript', 'js'], ['typescript', 'ts'], ['documentation', 'docs'], ['application', 'app'], ['development', 'dev'], ['production', 'prod'], ['environment', 'env'], ['configuration', 'config'], ['implementation', 'impl'], ]); private compoundWords = new Set([ 'cross-chain', 'multi-chain', 'end-to-end', 'real-time', 'client-side', 'server-side', 'full-stack', 'open-source', 'peer-to-peer', 'point-to-point', 'state-of-the-art', ]); processText(text: string): string[] { if (!text) return []; // Normalize text const normalized = this.normalizeText(text); // Tokenize const tokens = this.tokenizer.tokenize(normalized) || []; // Process tokens const processed = tokens .map(token => this.normalizeToken(token)) .filter(token => this.shouldKeepToken(token)) .map(token => this.stemToken(token)); return [...new Set(processed)]; // Remove duplicates } processQuery(query: string): string[] { // For queries, we want to expand rather than just normalize const expanded = this.expandQuery(query); return this.processText(expanded); } private normalizeText(text: string): string { return text .toLowerCase() .replace(/[^\w\s-]/g, ' ') // Keep hyphens for compound words .replace(/\s+/g, ' ') .trim(); } private normalizeToken(token: string): string { const normalized = this.normalizationRules.get(token); return normalized || token; } private shouldKeepToken(token: string): boolean { if (token.length < 2) return false; if (this.stopWords.has(token)) return false; if (/^\d+$/.test(token)) return false; // Pure numbers return true; } private stemToken(token: string): string { // Don't stem compound words or technical terms if (this.compoundWords.has(token)) return token; if (token.includes('-')) return token; if (token.length < 4) return token; // Don't stem short words return PorterStemmer.stem(token); } private expandQuery(query: string): string { const words = query.toLowerCase().split(/\s+/); const expanded: string[] = []; for (const word of words) { expanded.push(word); // Add normalized version const normalized = this.normalizationRules.get(word); if (normalized) { expanded.push(normalized); } // Add reverse mappings (e.g., if query has "transaction", add "tx") for (const [abbrev, full] of this.normalizationRules) { if (full === word || full.includes(word)) { expanded.push(abbrev); } } } return expanded.join(' '); } // Create searchable text for indexing createSearchableText(title: string, content: string, section: string, subsection?: string): string { const parts = [title, content, section]; if (subsection) parts.push(subsection); const processed = parts .map(part => this.processText(part).join(' ')) .filter(Boolean); return processed.join(' '); } // Get search terms with different strategies getSearchTerms(query: string): { exact: string[]; expanded: string[]; stemmed: string[]; } { const exact = query.toLowerCase().split(/\s+/).filter(Boolean); const expanded = this.expandQuery(query).split(/\s+/).filter(Boolean); const stemmed = this.processQuery(query); return { exact, expanded, stemmed }; } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tcsenpai/mcpbook'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

textProcessor.ts•4.31 KiB