Skip to main content
Glama

Analytical MCP Server

enhanced_fact_extraction.ts12.5 kB
/** * Enhanced Fact Extraction * * This module provides advanced fact extraction capabilities by combining * named entity recognition, coreference resolution, and relationship extraction. */ import { Logger } from './logger.js'; import { DataProcessingError } from './errors.js'; import { advancedNER, RecognizedEntity, EntityType } from './advanced_ner.js'; import { coreferenceResolver, CoreferenceChain } from './coreference_resolver.js'; import { relationshipExtractor, Relationship, RelationshipType, RelationshipSubtype } from './relationship_extractor.js'; import * as mathjs from 'mathjs'; /** * Types of facts that can be extracted */ export enum FactType { ENTITY = 'ENTITY', // Named entity RELATIONSHIP = 'RELATIONSHIP', // Relationship between entities COREFERENCE = 'COREFERENCE', // Coreference between mentions ATTRIBUTE = 'ATTRIBUTE', // Entity attribute STATEMENT = 'STATEMENT', // General statement SENTIMENT = 'SENTIMENT', // Sentiment expression EVENT = 'EVENT' // Event description } /** * Extracted fact with confidence score */ export interface ExtractedFact { factId: string; // Unique identifier text: string; // Text representation of the fact type: FactType; // Type of fact confidence: number; // Confidence score (0-1) startIndex: number; // Start position in original text endIndex: number; // End position in original text // Type-specific properties entityType?: EntityType | undefined; // For ENTITY facts relationship?: Relationship | undefined; // For RELATIONSHIP facts coreference?: CoreferenceChain | undefined; // For COREFERENCE facts entities?: RecognizedEntity[] | undefined; // Related entities attributes?: Record<string, any> | undefined; // Additional attributes } /** * Result of fact extraction */ export interface FactExtractionResult { text: string; // Original text facts: ExtractedFact[]; // Extracted facts confidence: number; // Overall confidence entityCount: number; // Number of entities found relationshipCount: number; // Number of relationships found coreferenceCount: number; // Number of coreference chains found } /** * Parameters for fact extraction */ export interface FactExtractionParams { enableCoreference?: boolean; // Enable coreference resolution enableRelationships?: boolean; // Enable relationship extraction maxFacts?: number; // Maximum number of facts to return minConfidence?: number; // Minimum confidence threshold includeEvidence?: boolean; // Include evidence text for relationships prioritizeFactTypes?: FactType[]; // Fact types to prioritize } /** * Enhanced Fact Extractor Class * Implements comprehensive fact extraction by combining multiple techniques */ export class EnhancedFactExtractor { /** * Extract facts from text using multiple advanced techniques */ async extractFacts( text: string, params: FactExtractionParams = {} ): Promise<FactExtractionResult> { const { enableCoreference = true, enableRelationships = true, maxFacts = 50, minConfidence = 0.5, includeEvidence = true, prioritizeFactTypes = [FactType.ENTITY, FactType.RELATIONSHIP, FactType.COREFERENCE] } = params; try { Logger.debug('Starting enhanced fact extraction', { textLength: text.length }); // Initialize result containers const facts: ExtractedFact[] = []; let entityCount = 0; let relationshipCount = 0; let coreferenceCount = 0; // 1. Extract entities and convert to facts const entities = await advancedNER.recognizeEntities(text); entityCount = entities.length; Logger.debug('Entity extraction complete', { count: entityCount }); // Convert entities to facts for (const entity of entities) { if (entity.confidence >= minConfidence) { facts.push({ factId: `entity_${facts.length}`, text: entity.text, type: FactType.ENTITY, confidence: entity.confidence, startIndex: entity.startIndex, endIndex: entity.endIndex, entityType: entity.type, entities: [entity] }); } } // 2. Extract coreference chains if enabled if (enableCoreference) { const corefResult = await coreferenceResolver.resolveCoreferences(text); coreferenceCount = corefResult.chains.length; Logger.debug('Coreference resolution complete', { chainCount: coreferenceCount, confidence: corefResult.confidence }); // Convert coreference chains to facts for (const chain of corefResult.chains) { // Only include chains with multiple mentions if (chain.mentions.length > 1 && chain.confidence >= minConfidence) { // Find entities that match the representative mention const relatedEntities = entities.filter(entity => chain.mentions.some(mention => entity.startIndex === mention.startIndex && entity.endIndex === mention.endIndex ) ); facts.push({ factId: `coref_${facts.length}`, text: chain.representativeMention.text, type: FactType.COREFERENCE, confidence: chain.confidence, startIndex: chain.representativeMention.startIndex, endIndex: chain.representativeMention.endIndex, entityType: chain.entityType as EntityType, coreference: chain, entities: relatedEntities }); } } } // 3. Extract relationships if enabled if (enableRelationships) { const relationshipParams = { resolveCoref: enableCoreference, minConfidence, includeEvidence }; const relationshipResult = await relationshipExtractor.extractRelationships( text, relationshipParams ); relationshipCount = relationshipResult.relationships.length; Logger.debug('Relationship extraction complete', { count: relationshipCount, confidence: relationshipResult.confidence }); // Convert relationships to facts for (const relationship of relationshipResult.relationships) { if (relationship.confidence >= minConfidence) { const relatedEntities = [ relationship.sourceEntity, relationship.targetEntity ]; facts.push({ factId: `rel_${facts.length}`, text: relationship.evidence || `${relationship.sourceEntity.text} ${relationship.type} ${relationship.targetEntity.text}`, type: FactType.RELATIONSHIP, confidence: relationship.confidence, startIndex: relationship.startIndex, endIndex: relationship.endIndex, relationship, entities: relatedEntities }); } } } // 4. Sort and filter facts const sortedFacts = this.sortFactsByPriority(facts, prioritizeFactTypes); const filteredFacts = sortedFacts .filter(fact => fact.confidence >= minConfidence) .slice(0, maxFacts); // 5. Calculate overall confidence const overallConfidence = this.calculateOverallConfidence(filteredFacts); Logger.debug('Fact extraction complete', { factCount: filteredFacts.length, confidence: overallConfidence }); return { text, facts: filteredFacts, confidence: overallConfidence, entityCount, relationshipCount, coreferenceCount }; } catch (error) { Logger.error('Enhanced fact extraction failed', error); throw new DataProcessingError( 'ERR_1001', 'Failed to extract facts', { originalText: text, error: error instanceof Error ? error.message : String(error) } ); } } /** * Sort facts by priority and confidence */ private sortFactsByPriority(facts: ExtractedFact[], priorityTypes: FactType[]): ExtractedFact[] { return [...facts].sort((a, b) => { // First sort by priority type const priorityA = priorityTypes.indexOf(a.type); const priorityB = priorityTypes.indexOf(b.type); if (priorityA !== priorityB) { // Lower index = higher priority return priorityA - priorityB; } // Then sort by confidence (higher first) return b.confidence - a.confidence; }); } /** * Calculate overall confidence of fact extraction */ private calculateOverallConfidence(facts: ExtractedFact[]): number { if (facts.length === 0) { return 0; } // Weight each fact type differently const typeWeights: Record<FactType, number> = { [FactType.ENTITY]: 1.0, [FactType.RELATIONSHIP]: 1.2, [FactType.COREFERENCE]: 0.9, [FactType.ATTRIBUTE]: 0.8, [FactType.STATEMENT]: 0.7, [FactType.SENTIMENT]: 0.6, [FactType.EVENT]: 1.1 }; let weightedSum = 0; let totalWeight = 0; for (const fact of facts) { const weight = typeWeights[fact.type] || 1.0; weightedSum += fact.confidence * weight; totalWeight += weight; } // Logarithmic scaling based on fact count to reward more facts // but with diminishing returns const countFactor = 1 + Math.log10(Math.max(1, facts.length) / 10); // Final confidence score (capped at 1.0) return mathjs.min(1, (weightedSum / totalWeight) * countFactor); } /** * Extract statements as facts from text * This can be used separately or as part of the main extraction process */ extractStatementFacts(text: string, minLength = 30): ExtractedFact[] { try { const facts: ExtractedFact[] = []; // Simple sentence splitting const sentences = text.split(/[.!?]/) .map(s => s.trim()) .filter(s => s.length >= minLength); // Find sentence positions in original text let currentPosition = 0; for (const sentence of sentences) { const sentenceIndex = text.indexOf(sentence, currentPosition); if (sentenceIndex !== -1) { // Compute confidence based on sentence length and complexity const confidence = this.computeSentenceConfidence(sentence); facts.push({ factId: `stmt_${facts.length}`, text: sentence, type: FactType.STATEMENT, confidence, startIndex: sentenceIndex, endIndex: sentenceIndex + sentence.length }); currentPosition = sentenceIndex + sentence.length; } } return facts; } catch (error) { Logger.error('Statement extraction failed', error); return []; } } /** * Compute confidence score for a sentence */ private computeSentenceConfidence(sentence: string): number { // Factors affecting confidence: // 1. Length (longer = higher confidence, up to a point) const lengthFactor = Math.min(1, sentence.length / 200); // 2. Complexity (more unique words = higher confidence) const words = sentence.split(/\s+/); const uniqueWords = new Set(words.map(w => w.toLowerCase())); const complexityFactor = Math.min(1, uniqueWords.size / words.length); // 3. Structure (sentences with verbs are more likely to be meaningful) const hasVerb = /\b(is|are|was|were|has|have|had|do|does|did|will|shall|should|would|can|could|may|might)\b/i.test(sentence); const structureFactor = hasVerb ? 0.8 : 0.4; // Combined confidence score return mathjs.round( (lengthFactor * 0.3) + (complexityFactor * 0.3) + (structureFactor * 0.4), 2 ); } } // Singleton instance export const enhancedFactExtractor = new EnhancedFactExtractor();

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/quanticsoul4772/analytical-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server