Doclea MCP

Official

Overview Schema Related Servers Score Discussions

fallback.ts•7.49 KiB

/** * Fallback entity extraction using regex patterns * * Used when no LLM API key is available or when LLM extraction fails. * Provides basic entity detection through pattern matching. */ import type { EntityType, ExtractedEntity } from "../types"; /** * Common words to filter out from entity detection */ const COMMON_WORDS = new Set([ // Articles and pronouns "The", "This", "That", "These", "Those", "When", "Where", "What", "Which", "Who", "How", "Why", "Here", "There", "Some", "All", "Any", "Each", "Every", "Both", "Few", "More", "Most", "Other", "Such", // Common programming words that look like proper nouns "True", "False", "Null", "None", "Error", "Warning", "Info", "Debug", "Success", "Failed", "Todo", "Note", "Important", // Common verbs starting with capital "Create", "Update", "Delete", "Read", "Write", "Add", "Remove", "Get", "Set", "Check", "Test", "Run", "Start", "Stop", "Build", "Deploy", "Install", "Configure", "Setup", // Days and months "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", ]); /** * Technology patterns (case-insensitive matches) */ const TECHNOLOGY_PATTERNS: Array<{ pattern: RegExp; type: EntityType }> = [ // Languages { pattern: /\b(JavaScript|TypeScript|Python|Rust|Go|Java|Ruby|PHP|Swift|Kotlin|C\+\+|C#)\b/gi, type: "TECHNOLOGY", }, // Frameworks { pattern: /\b(React|Vue|Angular|Next\.js|Nuxt|Svelte|Express|FastAPI|Django|Flask|Spring|Rails)\b/gi, type: "TECHNOLOGY", }, // Databases { pattern: /\b(PostgreSQL|MySQL|MongoDB|Redis|SQLite|Elasticsearch|DynamoDB|Cassandra|Firebase)\b/gi, type: "TECHNOLOGY", }, // Cloud/Infrastructure { pattern: /\b(AWS|Azure|GCP|Kubernetes|Docker|Terraform|Jenkins|GitHub Actions|Vercel|Netlify)\b/gi, type: "TECHNOLOGY", }, // Tools { pattern: /\b(npm|yarn|pnpm|bun|pip|cargo|maven|gradle|webpack|vite)\b/gi, type: "TECHNOLOGY", }, ]; /** * Organization patterns */ const ORG_PATTERNS = [ // Common suffixes /\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:Inc|Corp|LLC|Ltd|Company|Technologies|Software|Labs|Studios)\b/g, // Well-known companies /\b(Google|Microsoft|Amazon|Apple|Meta|Facebook|Netflix|Uber|Airbnb|Stripe|OpenAI|Anthropic|GitHub|GitLab|Atlassian|Slack|Notion)\b/g, ]; /** * Extract entities using regex patterns (fallback method) * * @param content - Text content to extract entities from * @returns Array of extracted entities with low confidence scores */ export function extractEntitiesFallback(content: string): ExtractedEntity[] { const entities: ExtractedEntity[] = []; const seen = new Set<string>(); // Extract technologies for (const { pattern, type } of TECHNOLOGY_PATTERNS) { let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { const name = match[1] || match[0]; const normalizedName = normalizeName(name); if (!seen.has(normalizedName.toLowerCase())) { seen.add(normalizedName.toLowerCase()); entities.push({ canonicalName: normalizedName, entityType: type, confidence: 0.6, // Medium confidence for pattern matches mentionText: match[0], }); } } } // Extract organizations for (const pattern of ORG_PATTERNS) { let match: RegExpExecArray | null; while ((match = pattern.exec(content)) !== null) { const name = match[1] || match[0]; const normalizedName = normalizeName(name); if (!seen.has(normalizedName.toLowerCase())) { seen.add(normalizedName.toLowerCase()); entities.push({ canonicalName: normalizedName, entityType: "ORGANIZATION", confidence: 0.5, mentionText: match[0], }); } } } // Extract capitalized phrases (potential proper nouns) const capitalizedPattern = /\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})\b/g; let match: RegExpExecArray | null; while ((match = capitalizedPattern.exec(content)) !== null) { const name = match[1]; // Skip common words and already seen entities if (COMMON_WORDS.has(name) || seen.has(name.toLowerCase())) { continue; } // Skip if it's at the start of a sentence (likely not a proper noun) const beforeIndex = match.index - 2; if (beforeIndex >= 0) { const charBefore = content[beforeIndex]; if (charBefore === "." || charBefore === "!" || charBefore === "?") { continue; } } // Skip very short names (likely false positives) if (name.length < 4) { continue; } seen.add(name.toLowerCase()); entities.push({ canonicalName: name, entityType: guessEntityType(name, content), confidence: 0.3, // Low confidence for generic pattern mentionText: name, }); } return entities; } /** * Normalize entity name */ function normalizeName(name: string): string { return name .trim() .replace(/\s+/g, " ") .replace(/^the\s+/i, ""); } /** * Guess entity type based on context */ function guessEntityType(name: string, context: string): EntityType { const lowerName = name.toLowerCase(); const lowerContext = context.toLowerCase(); // Check for person indicators const personIndicators = [ "said", "wrote", "created", "developed", "founded", "led", "managed", "designed", ]; const nameIndex = lowerContext.indexOf(lowerName); if (nameIndex !== -1) { const nearby = lowerContext.slice( Math.max(0, nameIndex - 50), nameIndex + lowerName.length + 50, ); for (const indicator of personIndicators) { if (nearby.includes(indicator)) { return "PERSON"; } } } // Check for concept indicators const conceptIndicators = [ "pattern", "principle", "approach", "method", "strategy", "architecture", "design", ]; for (const indicator of conceptIndicators) { if (lowerName.includes(indicator)) { return "CONCEPT"; } } // Default to OTHER return "OTHER"; } /** * Extract simple relationships from fallback entities * (Very limited - just co-occurrence based) */ export function extractRelationshipsFallback( entities: ExtractedEntity[], content: string, ): Array<{ sourceEntity: string; targetEntity: string; relationshipType: string; strength: number; confidence: number; }> { const relationships: Array<{ sourceEntity: string; targetEntity: string; relationshipType: string; strength: number; confidence: number; }> = []; // Simple co-occurrence: if two entities appear in the same sentence, they might be related const sentences = content.split(/[.!?]+/); for (const sentence of sentences) { const sentenceEntities = entities.filter((e) => sentence.includes(e.mentionText), ); // Create relationships between entities in the same sentence for (let i = 0; i < sentenceEntities.length; i++) { for (let j = i + 1; j < sentenceEntities.length; j++) { relationships.push({ sourceEntity: sentenceEntities[i].canonicalName, targetEntity: sentenceEntities[j].canonicalName, relationshipType: "RELATED_TO", strength: 3, // Low strength for co-occurrence confidence: 0.3, }); } } } return relationships; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/docleaai/doclea-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

fallback.ts•7.49 KiB