Skip to main content
Glama
index.ts7.58 kB
/** * Embeddings Module - Semantic Tool Search * * Provides semantic search over WP Navigator tools using dual-mode matching: * 1. TF-IDF (default): Fast keyword-based scoring, no model download * 2. Embeddings (optional): Higher accuracy via @xenova/transformers * * Tool vectors are pre-computed at build time and shipped with the package. * * @package WP_Navigator_MCP * @since 2.7.0 */ import { cosineSimilarity } from './cosine.js'; import { buildIndex, score as tfidfScore, TFIDFIndex, extractKeywords } from './tf-idf.js'; /** * Tool embedding data structure */ export interface ToolEmbedding { /** Tool name (e.g., wpnav_list_posts) */ name: string; /** Tool description */ description: string; /** Tool category */ category: string; /** Neural embedding vector (384-dim, optional) */ vector?: number[]; /** Pre-extracted keywords for TF-IDF */ keywords: string[]; } /** * Tool vectors file structure */ export interface ToolVectorsFile { /** Generation timestamp */ generated: string; /** Embedding model used */ model: string; /** Array of tool embeddings */ tools: ToolEmbedding[]; } /** * Search options */ export interface SearchOptions { /** Maximum results to return (default: 10) */ limit?: number; /** Use neural embeddings for query (default: false) */ useEmbeddings?: boolean; /** Minimum score threshold (default: 0.1) */ minScore?: number; } /** * Search result */ export interface ToolSearchResult { /** Tool name */ name: string; /** Tool description */ description: string; /** Tool category */ category: string; /** Relevance score (0-1) */ score: number; } // Module state let toolVectors: ToolEmbedding[] | null = null; let tfidfIndex: TFIDFIndex | null = null; let isLoaded = false; /** * Load tool vectors from the pre-generated JSON file * * This is called lazily on first search. The vectors are embedded * in the package at build time. * * @returns Array of tool embeddings */ export function loadToolVectors(): ToolEmbedding[] { if (toolVectors !== null) { return toolVectors; } try { // Dynamic import of the JSON file // In production, this is bundled with the package // eslint-disable-next-line @typescript-eslint/no-require-imports const data = require('./tool-vectors.json') as ToolVectorsFile; toolVectors = data.tools; isLoaded = true; // Build TF-IDF index from tool descriptions const documents = toolVectors.map((tool) => ({ id: tool.name, text: `${tool.name} ${tool.description} ${tool.keywords.join(' ')}`, })); tfidfIndex = buildIndex(documents); return toolVectors; } catch { // Return empty array if vectors not found (e.g., during development) console.warn('Tool vectors not found. Run npm run build:embeddings to generate.'); toolVectors = []; return toolVectors; } } /** * Check if tool vectors are loaded */ export function isVectorsLoaded(): boolean { return isLoaded && toolVectors !== null && toolVectors.length > 0; } /** * Search tools by natural language query * * Uses TF-IDF by default for fast, accurate keyword matching. * Set useEmbeddings: true for neural embedding similarity (requires model). * * @param query Natural language search query * @param options Search options * @returns Sorted array of matching tools with scores */ export function searchTools(query: string, options: SearchOptions = {}): ToolSearchResult[] { const { limit = 10, useEmbeddings = false, minScore = 0.1 } = options; // Ensure vectors are loaded const tools = loadToolVectors(); if (tools.length === 0) { return []; } // Validate query const trimmedQuery = query.trim(); if (!trimmedQuery) { return []; } // Use embeddings if requested and available if (useEmbeddings) { return searchWithEmbeddings(trimmedQuery, tools, limit, minScore); } // Default: Use TF-IDF return searchWithTFIDF(trimmedQuery, limit, minScore); } /** * Search using TF-IDF scoring */ function searchWithTFIDF(query: string, limit: number, minScore: number): ToolSearchResult[] { if (!tfidfIndex || !toolVectors) { return []; } const results = tfidfScore(query, tfidfIndex, limit * 2); // Get extra for filtering // Map results to ToolSearchResult const toolMap = new Map(toolVectors.map((t) => [t.name, t])); const output: ToolSearchResult[] = []; for (const result of results) { if (result.score < minScore) { continue; } const tool = toolMap.get(result.id); if (tool) { output.push({ name: tool.name, description: tool.description, category: tool.category, score: Math.min(result.score, 1), // Normalize to max 1 }); } if (output.length >= limit) { break; } } return output; } /** * Search using neural embeddings (requires runtime model) * * Falls back to TF-IDF if embeddings are not available. */ function searchWithEmbeddings( query: string, tools: ToolEmbedding[], limit: number, minScore: number ): ToolSearchResult[] { // Check if tools have embeddings const hasEmbeddings = tools.some((t) => t.vector && t.vector.length > 0); if (!hasEmbeddings) { // Fall back to TF-IDF console.warn('Neural embeddings not available, falling back to TF-IDF'); return searchWithTFIDF(query, limit, minScore); } // Try to get query embedding (async operation - not yet implemented) // For now, fall back to TF-IDF // TODO: Implement runtime query embedding in runtime-embeddings.ts return searchWithTFIDF(query, limit, minScore); } /** * Search tools by category * * @param category Tool category to filter by * @returns Array of tools in the category */ export function searchByCategory(category: string): ToolSearchResult[] { const tools = loadToolVectors(); if (tools.length === 0) { return []; } const normalizedCategory = category.toLowerCase(); return tools .filter((tool) => tool.category.toLowerCase() === normalizedCategory) .map((tool) => ({ name: tool.name, description: tool.description, category: tool.category, score: 1.0, // Perfect match for category filter })); } /** * Get all available categories * * @returns Array of unique category names */ export function getCategories(): string[] { const tools = loadToolVectors(); const categories = new Set(tools.map((t) => t.category)); return Array.from(categories).sort(); } /** * Get tool count statistics */ export function getStats(): { total: number; byCategory: Record<string, number> } { const tools = loadToolVectors(); const byCategory: Record<string, number> = {}; for (const tool of tools) { byCategory[tool.category] = (byCategory[tool.category] || 0) + 1; } return { total: tools.length, byCategory, }; } /** * Reset module state (for testing) * @internal */ export function _resetState(): void { toolVectors = null; tfidfIndex = null; isLoaded = false; } /** * Set tool vectors directly (for testing) * @internal */ export function _setToolVectors(tools: ToolEmbedding[]): void { toolVectors = tools; isLoaded = true; // Build TF-IDF index const documents = tools.map((tool) => ({ id: tool.name, text: `${tool.name} ${tool.description} ${tool.keywords.join(' ')}`, })); tfidfIndex = buildIndex(documents); } // Re-export utilities for use in build scripts export { extractKeywords } from './tf-idf.js'; export { cosineSimilarity, normalize } from './cosine.js';

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/littlebearapps/wp-navigator-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server