Analytical MCP Server

MIT License

Overview InspectNew Endpoints Schema Related Servers Reviews Score

mention_extraction_provider.ts•12.3 kB

/** * Mention Extraction Provider * * Handles extraction of all types of mentions from text: * - Proper mentions (named entities) * - Pronominal mentions (pronouns) * - Nominal mentions (common nouns) */ import { Logger } from './logger.js'; import { ValidationHelpers } from './validation_helpers.js'; import { advancedNER, RecognizedEntity, EntityType } from './advanced_ner.js'; import { nlpToolkit } from './nlp_toolkit.js'; /** * Represents a mention of an entity in text */ export interface Mention { text: string; type: MentionType; startIndex: number; endIndex: number; headWord: string; gender?: Gender; number?: NumberType; animacy?: Animacy; } /** * Types of mentions */ export enum MentionType { PROPER = 'PROPER', // Proper nouns: "John Smith", "Google" NOMINAL = 'NOMINAL', // Common nouns: "the man", "the company" PRONOMINAL = 'PRONOMINAL' // Pronouns: "he", "she", "it", "they" } /** * Gender attributes for mentions */ export enum Gender { MALE = 'MALE', FEMALE = 'FEMALE', NEUTRAL = 'NEUTRAL', UNKNOWN = 'UNKNOWN' } /** * Number attributes for mentions */ export enum NumberType { SINGULAR = 'SINGULAR', PLURAL = 'PLURAL', UNKNOWN = 'UNKNOWN' } /** * Animacy attributes for mentions */ export enum Animacy { ANIMATE = 'ANIMATE', INANIMATE = 'INANIMATE', UNKNOWN = 'UNKNOWN' } /** * Mention Extraction Provider Class * Extracts mentions from text using multiple strategies */ export class MentionExtractionProvider { private readonly PRONOUN_MAP: Record<string, {gender: Gender, number: NumberType, animacy: Animacy}> = { // Personal pronouns 'he': { gender: Gender.MALE, number: NumberType.SINGULAR, animacy: Animacy.ANIMATE }, 'him': { gender: Gender.MALE, number: NumberType.SINGULAR, animacy: Animacy.ANIMATE }, 'his': { gender: Gender.MALE, number: NumberType.SINGULAR, animacy: Animacy.ANIMATE }, 'himself': { gender: Gender.MALE, number: NumberType.SINGULAR, animacy: Animacy.ANIMATE }, 'she': { gender: Gender.FEMALE, number: NumberType.SINGULAR, animacy: Animacy.ANIMATE }, 'her': { gender: Gender.FEMALE, number: NumberType.SINGULAR, animacy: Animacy.ANIMATE }, 'hers': { gender: Gender.FEMALE, number: NumberType.SINGULAR, animacy: Animacy.ANIMATE }, 'herself': { gender: Gender.FEMALE, number: NumberType.SINGULAR, animacy: Animacy.ANIMATE }, 'it': { gender: Gender.NEUTRAL, number: NumberType.SINGULAR, animacy: Animacy.INANIMATE }, 'its': { gender: Gender.NEUTRAL, number: NumberType.SINGULAR, animacy: Animacy.INANIMATE }, 'itself': { gender: Gender.NEUTRAL, number: NumberType.SINGULAR, animacy: Animacy.INANIMATE }, 'they': { gender: Gender.UNKNOWN, number: NumberType.PLURAL, animacy: Animacy.UNKNOWN }, 'them': { gender: Gender.UNKNOWN, number: NumberType.PLURAL, animacy: Animacy.UNKNOWN }, 'their': { gender: Gender.UNKNOWN, number: NumberType.PLURAL, animacy: Animacy.UNKNOWN }, 'theirs': { gender: Gender.UNKNOWN, number: NumberType.PLURAL, animacy: Animacy.UNKNOWN }, 'themselves': { gender: Gender.UNKNOWN, number: NumberType.PLURAL, animacy: Animacy.UNKNOWN }, // Demonstrative pronouns 'this': { gender: Gender.NEUTRAL, number: NumberType.SINGULAR, animacy: Animacy.UNKNOWN }, 'that': { gender: Gender.NEUTRAL, number: NumberType.SINGULAR, animacy: Animacy.UNKNOWN }, 'these': { gender: Gender.NEUTRAL, number: NumberType.PLURAL, animacy: Animacy.UNKNOWN }, 'those': { gender: Gender.NEUTRAL, number: NumberType.PLURAL, animacy: Animacy.UNKNOWN } }; /** * Extract all potential mentions from text */ async extractMentions(text: string): Promise<Mention[]> { // Early validation using ValidationHelpers ValidationHelpers.throwIfInvalid(ValidationHelpers.validateNonEmptyString(text)); const mentions: Mention[] = []; // Extract entities first (for proper mentions) const entities = await advancedNER.recognizeEntities(text); // Convert entities to mentions for (const entity of entities) { mentions.push({ text: entity.text, type: MentionType.PROPER, startIndex: entity.startIndex, endIndex: entity.endIndex, headWord: this.extractHeadWord(entity.text), ...this.inferAttributes(entity) }); } // Extract pronominal mentions const pronounMentions = this.extractPronominalMentions(text); mentions.push(...pronounMentions); // Extract nominal mentions const nominalMentions = this.extractNominalMentions(text, entities); mentions.push(...nominalMentions); // Sort mentions by position in text return mentions.sort((a, b) => a.startIndex - b.startIndex); } /** * Extract pronominal mentions (pronouns) from text */ extractPronominalMentions(text: string): Mention[] { // Early validation using ValidationHelpers ValidationHelpers.throwIfInvalid(ValidationHelpers.validateNonEmptyString(text)); const mentions: Mention[] = []; const words = text.split(/\s+/); let currentIndex = 0; for (const word of words) { // Clean the word (remove punctuation) const cleanWord = word.toLowerCase().replace(/[^\w\s]/g, ''); // Check if it's a pronoun if (this.PRONOUN_MAP[cleanWord]) { // Find the exact position in the original text const wordIndex = text.indexOf(word, currentIndex); if (wordIndex !== -1) { mentions.push({ text: word, type: MentionType.PRONOMINAL, startIndex: wordIndex, endIndex: wordIndex + word.length, headWord: word, // Use the pronoun itself as the head word ...this.PRONOUN_MAP[cleanWord] }); } } currentIndex += word.length + 1; // +1 for space } return mentions; } /** * Extract nominal mentions from text using NLP toolkit */ extractNominalMentions(text: string, entities: RecognizedEntity[]): Mention[] { // Early validation using ValidationHelpers ValidationHelpers.throwIfInvalid(ValidationHelpers.validateNonEmptyString(text)); const mentions: Mention[] = []; try { // Use POS tagging to find determiners followed by nouns // POS tagging would be done here with proper API const posTags: any[] = []; // Placeholder for now for (let i = 0; i < posTags.length - 1; i++) { const current = posTags[i]; // Look for determiners that indicate mentions if (this.isDeterminer(current.tag)) { const nounResult = this.findNounAfterDeterminer(posTags, i, text); if (!nounResult) continue; const { nounIndex, nounPhrase } = nounResult; if (nounIndex !== -1 && nounPhrase) { // Find the phrase in the original text const phraseIndex = text.indexOf(nounPhrase); if (phraseIndex !== -1) { // Skip if this overlaps with an entity const overlaps = this.checkEntityOverlap(phraseIndex, nounPhrase, entities); if (!overlaps) { const nounPosTag = posTags[nounIndex]; if (nounPosTag) { // Determine attributes based on the head noun const headWord = nounPosTag.word; const attributes = this.inferAttributesFromNoun(headWord, nounPosTag.tag); mentions.push({ text: nounPhrase, type: MentionType.NOMINAL, startIndex: phraseIndex, endIndex: phraseIndex + nounPhrase.length, headWord, ...attributes }); } } } } } } } catch (error) { Logger.error('Error extracting nominal mentions', error); } return mentions; } /** * Extract head word from a phrase */ private extractHeadWord(phrase: string): string { ValidationHelpers.throwIfInvalid(ValidationHelpers.validateNonEmptyString(phrase)); // Simple strategy: use the last word as head const words = phrase.trim().split(/\s+/); return words[words.length - 1]; } /** * Infer attributes from entity */ private inferAttributes(entity: RecognizedEntity): {gender: Gender, number: NumberType, animacy: Animacy} { const defaults = { gender: Gender.UNKNOWN, number: NumberType.UNKNOWN, animacy: Animacy.UNKNOWN }; if (!entity.type) return defaults; // Basic attribute inference based on entity type switch (entity.type) { case EntityType.PERSON: return { gender: Gender.UNKNOWN, // Would need additional analysis number: NumberType.SINGULAR, animacy: Animacy.ANIMATE }; case EntityType.ORGANIZATION: case EntityType.LOCATION: case EntityType.FACILITY: return { gender: Gender.NEUTRAL, number: NumberType.SINGULAR, animacy: Animacy.INANIMATE }; default: return defaults; } } /** * Infer gender, number, and animacy attributes from a noun and its POS tag */ private inferAttributesFromNoun(noun: string, posTag: string): {gender: Gender, number: NumberType, animacy: Animacy} { const attributes = { gender: Gender.UNKNOWN, number: this.inferNumber(noun, posTag), animacy: this.inferAnimacy(noun) }; return attributes; } /** * Infer number from noun and POS tag */ private inferNumber(noun: string, posTag: string): NumberType { // Plural POS tags if (posTag === 'NNS' || posTag === 'NNPS') { return NumberType.PLURAL; } // Singular POS tags if (posTag === 'NN' || posTag === 'NNP') { return NumberType.SINGULAR; } return NumberType.UNKNOWN; } /** * Infer animacy from noun */ private inferAnimacy(noun: string): Animacy { const lowerNoun = noun.toLowerCase(); // Animate words const animateWords = ['person', 'people', 'man', 'woman', 'child', 'baby', 'boy', 'girl', 'doctor', 'teacher', 'student']; if (animateWords.some(word => lowerNoun.includes(word))) { return Animacy.ANIMATE; } // Inanimate words const inanimateWords = ['thing', 'object', 'item', 'building', 'car', 'book', 'table', 'computer']; if (inanimateWords.some(word => lowerNoun.includes(word))) { return Animacy.INANIMATE; } return Animacy.UNKNOWN; } /** * Check if POS tag is a determiner */ private isDeterminer(tag: string): boolean { return tag === 'DT' || tag === 'PRP$' || tag === 'WDT'; } /** * Find noun phrase after determiner */ private findNounAfterDeterminer(posTags: any[], determinerIndex: number, text: string): {nounIndex: number, nounPhrase: string} | null { for (let i = determinerIndex + 1; i < posTags.length; i++) { const tag = posTags[i]; // Stop at sentence boundaries or other determiners if (tag.tag === '.' || tag.tag === '!' || tag.tag === '?' || this.isDeterminer(tag.tag)) { break; } // Found a noun if (tag.tag.startsWith('NN')) { // Build the noun phrase from determiner to noun const startIndex = determinerIndex; const endIndex = i; const phrase = posTags.slice(startIndex, endIndex + 1) .map(t => t.word) .join(' '); return { nounIndex: i, nounPhrase: phrase }; } } return null; } /** * Check if a phrase overlaps with any entities */ private checkEntityOverlap(phraseStart: number, phrase: string, entities: RecognizedEntity[]): boolean { const phraseEnd = phraseStart + phrase.length; for (const entity of entities) { // Check for overlap if (!(phraseEnd <= entity.startIndex || phraseStart >= entity.endIndex)) { return true; // Overlap found } } return false; // No overlap } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/quanticsoul4772/analytical-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server