Skip to main content
Glama
text-extraction.ts•2.58 kB
/** * Extract meaningful text from various content formats for embedding generation */ export function extractTextForEmbedding(content: unknown, tags?: string[], type?: string): string { const textParts: string[] = []; // Extract the most important text from content const extractedText = extractKeyTermsFromValue(content); if (extractedText) { textParts.push(extractedText); } // Add type for context if (type) { textParts.push(type); } // Tags can be included but shouldn't dominate // They're better used for filtering than semantic search if (tags && tags.length > 0) { textParts.push(tags.join(' ')); } // Join with spaces and clean up return textParts .join(' ') .replace(/\s+/g, ' ') // Normalize whitespace .trim(); } /** * Extract only key terms from content (titles, names, etc) */ function extractKeyTermsFromValue(value: unknown, maxDepth = 3): string { if (maxDepth <= 0) return ''; const texts: string[] = []; if (typeof value === 'string') { // Only include if it's short and likely a key term if (value.length < 100) { texts.push(value); } } else if (value && typeof value === 'object' && !Array.isArray(value)) { // Expanded priority keys to capture more relevant content const priorityKeys = [ 'title', 'name', 'description', 'summary', 'text', 'message', 'content', 'value', 'result', 'pattern', 'implementation', 'query', 'answer', 'insight', 'decision', 'preference', 'task', 'action', 'error', 'status', 'type', ]; const obj = value as Record<string, unknown>; for (const key of priorityKeys) { if (key in obj) { const val = obj[key]; if (typeof val === 'string' && val.length < 200) { texts.push(val); } else if (val && typeof val === 'object') { // Recursively extract from nested objects const nested = extractKeyTermsFromValue(val, maxDepth - 1); if (nested) texts.push(nested); } } } } else if (Array.isArray(value)) { // Process first few array items for (let i = 0; i < Math.min(3, value.length); i++) { const text = extractKeyTermsFromValue(value[i], maxDepth - 1); if (text) texts.push(text); } } return texts .filter((t) => t && t.length > 0) .join(' ') .replace(/[{}[\]"]/g, '') // Remove JSON structure characters .replace(/\s+/g, ' ') // Normalize whitespace .trim(); }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/scanadi/mcp-ai-memory'

If you have feedback or need assistance with the MCP directory API, please join our Discord server