FedMCP - Federal Parliamentary Information

mentionParser.ts•18.2 KiB

/** * Mention Parser - Parse @mention syntax from text * * Supported mention formats: * - @bill:c-234 - Bill reference * - @bill:c-234:s2.1 - Bill section reference * - @mp:pierre-poilievre - MP reference * - @committee:fina - Committee reference * - @committee:fina:45 - Committee meeting reference * - @vote:45-1:234 - Vote reference * - @debate:2025-12-09:14:30 - Debate timestamp reference * - @petition:e-4823 - Petition reference * - @username - User mention (forum posts only) * * Natural language patterns (opt-in via options): * - "Bill C-234" / "bill c-234" - Bill reference * - "Projet de loi C-234" - Bill reference (French) * - "Standing Committee on Finance" / "FINA" - Committee reference * - "Petition e-4823" / "e-petition 4823" - Petition reference */ /** * Entity types that can be mentioned */ export type MentionType = | 'bill' | 'mp' | 'committee' | 'vote' | 'debate' | 'petition' | 'user' | 'standing-order'; /** * Parsed mention data */ export interface ParsedMention { /** Original matched text (including @) */ raw: string; /** Entity type */ type: MentionType; /** Primary identifier */ id: string; /** Secondary identifier (e.g., section, meeting number) */ subId?: string; /** Third-level identifier (e.g., subsection) */ subSubId?: string; /** Start index in original text */ startIndex: number; /** End index in original text */ endIndex: number; /** Display text for natural language mentions (e.g., "Bill C-234" instead of "@bill:c-234") */ displayText?: string; } /** * Options for parsing mentions */ export interface ParseMentionOptions { /** Enable natural language pattern detection (e.g., "Bill C-234") */ naturalLanguage?: boolean; /** Locale for language-specific patterns (default: 'en') */ locale?: 'en' | 'fr'; } /** * Mention pattern configuration */ interface MentionPattern { type: MentionType; pattern: RegExp; /** Extract IDs from regex match groups */ extract: (match: RegExpMatchArray) => { id: string; subId?: string; subSubId?: string; }; } /** * Natural language pattern configuration */ interface NaturalLanguagePattern { type: MentionType; /** Pattern to match - should NOT have global flag as we create new instances */ patternSource: string; patternFlags: string; /** Locales this pattern applies to */ locales: ('en' | 'fr')[]; /** Extract IDs and display text from regex match groups */ extract: (match: RegExpMatchArray) => { id: string; subId?: string; subSubId?: string; /** The display text for this mention (e.g., "Bill C-234") */ displayText: string; }; } /** * Mention patterns for each entity type */ const MENTION_PATTERNS: MentionPattern[] = [ // Bill with session and section: @bill:45-1/c-234:s2.1.a (new format) { type: 'bill', pattern: /@bill:(\d+-\d+)\/([cs]-?\d+)(?::([a-z0-9.-]+))?/gi, extract: (match) => ({ id: match[2].toLowerCase(), // bill number (c-234) subId: match[1], // session (45-1) subSubId: match[3] || undefined, // section ref (s2.1) }), }, // Bill with section (legacy format): @bill:c-234:s2.1.a { type: 'bill', pattern: /@bill:([cs]-?\d+)(?::([a-z0-9.-]+))?/gi, extract: (match) => ({ id: match[1].toLowerCase(), subId: match[2] || undefined, }), }, // MP: @mp:pierre-poilievre { type: 'mp', pattern: /@mp:([a-z][a-z0-9-]+)/gi, extract: (match) => ({ id: match[1], }), }, // Committee with meeting: @committee:fina:45 { type: 'committee', pattern: /@committee:([a-z]{4})(?::(\d+))?/gi, extract: (match) => ({ id: match[1].toUpperCase(), subId: match[2] || undefined, }), }, // Vote: @vote:45-1:234 { type: 'vote', pattern: /@vote:(\d+-\d+):(\d+)/gi, extract: (match) => ({ id: match[1], subId: match[2], }), }, // Debate with timestamp: @debate:2025-12-09:14:30 { type: 'debate', pattern: /@debate:(\d{4}-\d{2}-\d{2})(?::(\d{2}[:-]\d{2}))?/gi, extract: (match) => ({ id: match[1], subId: match[2]?.replace('-', ':') || undefined, }), }, // Petition: @petition:e-4823 { type: 'petition', pattern: /@petition:([ea]-?\d+)/gi, extract: (match) => ({ id: match[1].toLowerCase(), }), }, // User: @username (must come LAST to avoid matching @type:id patterns) // Matches usernames starting with a letter, containing letters, numbers, underscores, hyphens // Negative lookahead prevents matching partial words before colons (e.g., @bil from @bill:) { type: 'user', pattern: /@([a-z][a-z0-9_-]{2,29})(?![a-z0-9_-]*:)/gi, extract: (match) => ({ id: match[1].toLowerCase(), }), }, ]; /** * Natural language patterns for entity detection (opt-in) * * These patterns match natural language references to parliamentary entities * without requiring the explicit @mention syntax. */ const NATURAL_LANGUAGE_PATTERNS: NaturalLanguagePattern[] = [ // English bills: "Bill C-234", "Bill S-12", "Government Bill C-234" // Also matches "bill C-234" (lowercase bill) { type: 'bill', patternSource: '\\b(?:Government\\s+|Private\\s+(?:Member\'?s?\\s+)?)?[Bb]ill\\s+([CS])-?(\\d+)\\b', patternFlags: 'gi', locales: ['en'], extract: (match) => ({ id: `${match[1].toLowerCase()}-${match[2]}`, displayText: match[0], }), }, // French bills: "projet de loi C-234", "Projet de loi S-12" { type: 'bill', patternSource: '\\b[Pp]rojet\\s+de\\s+loi\\s+([CS])-?(\\d+)\\b', patternFlags: 'gi', locales: ['fr'], extract: (match) => ({ id: `${match[1].toLowerCase()}-${match[2]}`, displayText: match[0], }), }, // Committee acronyms (high confidence, both locales) // Full list of committee codes { type: 'committee', patternSource: '\\b(FINA|ENVI|ETHI|HUMA|TRAN|NDDN|JUST|CHPC|SECU|AGRI|INAN|INDU|RNNR|SRSR|PROC|OGGO|FAAE|CIMM|HESA|FEWO|ACVA|LANG|FOPO|PACP|CIIT)\\b', patternFlags: 'g', locales: ['en', 'fr'], extract: (match) => ({ id: match[1].toLowerCase(), displayText: match[0], }), }, // English committees: "Standing Committee on Finance" // Non-greedy match: committee names are typically 1-3 words after "on" { type: 'committee', patternSource: '\\b((?:Standing|Special|Legislative|Joint)\\s+Committee\\s+on\\s+[A-Z][a-zA-Z]+(?:\\s+(?:and\\s+)?[A-Z][a-z]+)?)\\b', patternFlags: 'g', locales: ['en'], extract: (match) => { const fullMatch = match[1]; // Extract the committee subject (after "Committee on ") const subjectMatch = fullMatch.match(/Committee\s+on\s+(.+)$/i); const subject = subjectMatch ? subjectMatch[1].toLowerCase().replace(/\s+/g, '-') : 'unknown'; return { id: subject, displayText: fullMatch, }; }, }, // E-petitions with space: "e-petition 4823" { type: 'petition', patternSource: '\\b[Ee]-?petition\\s+(\\d{4,})\\b', patternFlags: 'g', locales: ['en', 'fr'], extract: (match) => ({ id: `e-${match[1]}`, displayText: match[0], }), }, // E-petitions direct: "e-4823", "Petition e-4823" { type: 'petition', patternSource: '\\b(?:[Pp]etition\\s+)?[Ee]-(\\d{4,})\\b', patternFlags: 'g', locales: ['en', 'fr'], extract: (match) => ({ id: `e-${match[1]}`, displayText: match[0], }), }, // Paper petitions: "Petition No. 441-02345" { type: 'petition', patternSource: '\\b[Pp]etition\\s+(?:[Nn]o\\.?\\s*)?(\\d{3}-\\d{5})\\b', patternFlags: 'g', locales: ['en', 'fr'], extract: (match) => ({ id: match[1], displayText: match[0], }), }, // Standing Orders (English): "Standing Order 45", "S.O. 45", "Standing Order 45(1)" { type: 'standing-order', patternSource: '\\b(?:Standing\\s+Order|S\\.O\\.)\\s+(\\d+)(?:\$(\\d+[a-z]*)\$)?\\b', patternFlags: 'gi', locales: ['en'], extract: (match) => ({ id: match[1], subId: match[2] || undefined, displayText: match[0], }), }, // Standing Orders (French): "article 45 du Règlement", "l'article 45" { type: 'standing-order', patternSource: "\\b(?:l')?article\\s+(\\d+)(?:\\s+du\\s+[Rr]èglement)?\\b", patternFlags: 'gi', locales: ['fr'], extract: (match) => ({ id: match[1], displayText: match[0], }), }, ]; /** * Parse all mentions from text * * @param text - Text to parse * @param options - Optional configuration for parsing * @returns Array of parsed mentions with positions */ export function parseMentions( text: string, options?: ParseMentionOptions ): ParsedMention[] { const mentions: ParsedMention[] = []; const { naturalLanguage = false, locale = 'en' } = options || {}; // Always parse explicit @mention syntax for (const { type, pattern, extract } of MENTION_PATTERNS) { // Reset regex lastIndex pattern.lastIndex = 0; let match: RegExpExecArray | null; while ((match = pattern.exec(text)) !== null) { const { id, subId, subSubId } = extract(match); mentions.push({ raw: match[0], type, id, subId, subSubId, startIndex: match.index, endIndex: match.index + match[0].length, }); } } // Parse natural language patterns if enabled if (naturalLanguage) { for (const nlPattern of NATURAL_LANGUAGE_PATTERNS) { // Skip patterns not applicable to current locale if (!nlPattern.locales.includes(locale)) { continue; } // Create fresh regex instance for each search const pattern = new RegExp(nlPattern.patternSource, nlPattern.patternFlags); let match: RegExpExecArray | null; while ((match = pattern.exec(text)) !== null) { const { id, subId, subSubId, displayText } = nlPattern.extract(match); const startIndex = match.index; const endIndex = match.index + match[0].length; // Check for overlap with existing mentions (explicit mentions take priority) const overlaps = mentions.some( (m) => (startIndex >= m.startIndex && startIndex < m.endIndex) || (endIndex > m.startIndex && endIndex <= m.endIndex) || (startIndex <= m.startIndex && endIndex >= m.endIndex) ); if (!overlaps) { mentions.push({ raw: match[0], type: nlPattern.type, id, subId, subSubId, startIndex, endIndex, displayText, }); } } } } // Sort by position in text mentions.sort((a, b) => a.startIndex - b.startIndex); // Remove duplicates (same position, same type) const uniqueMentions: ParsedMention[] = []; for (const mention of mentions) { const isDuplicate = uniqueMentions.some( (m) => m.startIndex === mention.startIndex && m.endIndex === mention.endIndex ); if (!isDuplicate) { uniqueMentions.push(mention); } } return uniqueMentions; } /** * Check if text contains any mentions * * @param text - Text to check * @param options - Optional configuration for parsing * @returns True if text contains at least one mention */ export function hasMentions(text: string, options?: ParseMentionOptions): boolean { const { naturalLanguage = false, locale = 'en' } = options || {}; // Check explicit @mention patterns const hasExplicitMentions = MENTION_PATTERNS.some(({ pattern }) => { pattern.lastIndex = 0; return pattern.test(text); }); if (hasExplicitMentions) { return true; } // Check natural language patterns if enabled if (naturalLanguage) { return NATURAL_LANGUAGE_PATTERNS.some((nlPattern) => { if (!nlPattern.locales.includes(locale)) { return false; } const pattern = new RegExp(nlPattern.patternSource, nlPattern.patternFlags); return pattern.test(text); }); } return false; } /** * Extract the mention being typed at cursor position * * @param text - Full text * @param cursorPosition - Current cursor position * @returns Partial mention string if typing a mention, null otherwise */ export function getMentionAtCursor( text: string, cursorPosition: number ): { mention: string; startIndex: number } | null { // Look backwards from cursor for @ const textBeforeCursor = text.slice(0, cursorPosition); const lastAtIndex = textBeforeCursor.lastIndexOf('@'); if (lastAtIndex === -1) return null; // Check if there's a space between @ and cursor (would break the mention) const textBetween = textBeforeCursor.slice(lastAtIndex); if (/\s/.test(textBetween) && textBetween.indexOf(' ') < textBetween.length - 1) { return null; } // Extract the partial mention const mention = textBetween; return { mention, startIndex: lastAtIndex, }; } /** * Replace a mention in text * * @param text - Original text * @param startIndex - Start position to replace * @param endIndex - End position to replace * @param replacement - Replacement text * @returns Updated text */ export function replaceMention( text: string, startIndex: number, endIndex: number, replacement: string ): string { return text.slice(0, startIndex) + replacement + text.slice(endIndex); } /** * Generate mention string from components * * @param type - Entity type * @param id - Primary ID * @param subId - Secondary ID (optional) * @returns Formatted mention string */ export function formatMention( type: MentionType, id: string, subId?: string, subSubId?: string ): string { // User mentions use @username format (no colon) if (type === 'user') { return `@${id}`; } // Entity mentions use @type:id format let mention = `@${type}:${id}`; if (subId) { mention += `:${subId}`; if (subSubId) { mention += `.${subSubId}`; } } return mention; } /** * Validate mention format * * @param mention - Mention string to validate * @returns True if valid mention format */ export function isValidMention(mention: string): boolean { return MENTION_PATTERNS.some(({ pattern }) => { pattern.lastIndex = 0; const match = pattern.exec(mention); return match !== null && match[0] === mention; }); } /** * Get the entity type from a mention string * * @param mention - Mention string (e.g., "@bill:c-234") * @returns Entity type or null if invalid */ export function getMentionType(mention: string): MentionType | null { // Check for entity mentions with colon syntax (@type:id) const entityMatch = mention.match(/@([a-z]+):/i); if (entityMatch) { const type = entityMatch[1].toLowerCase(); const entityTypes: MentionType[] = [ 'bill', 'mp', 'committee', 'vote', 'debate', 'petition', ]; return entityTypes.includes(type as MentionType) ? (type as MentionType) : null; } // Check for user mentions (@username without colon) const userMatch = mention.match(/^@([a-z][a-z0-9_-]{2,29})$/i); if (userMatch) { return 'user'; } return null; } /** * Extract plain text from mentions (for display) * * @param mention - ParsedMention object * @returns Human-readable label */ export function getMentionLabel(mention: ParsedMention): string { switch (mention.type) { case 'bill': return mention.subId ? `Bill ${mention.id.toUpperCase()} ${mention.subId}` : `Bill ${mention.id.toUpperCase()}`; case 'mp': // Convert slug to name (e.g., "pierre-poilievre" -> "Pierre Poilievre") return mention.id .split('-') .map((word) => word.charAt(0).toUpperCase() + word.slice(1)) .join(' '); case 'committee': return mention.subId ? `${mention.id} Meeting #${mention.subId}` : mention.id; case 'vote': return `Vote #${mention.subId}`; case 'debate': return mention.subId ? `${mention.id} at ${mention.subId}` : mention.id; case 'petition': return `Petition ${mention.id.toUpperCase()}`; case 'user': return `@${mention.id}`; default: return mention.raw; } } /** * Extract the leading bill mention from text (if present at start) * * @param text - Text to check * @returns The first bill mention if it starts at position 0, null otherwise */ export function extractLeadingBillMention(text: string): ParsedMention | null { const trimmed = text.trimStart(); const mentions = parseMentions(trimmed); const firstMention = mentions[0]; if (firstMention?.type === 'bill' && firstMention.startIndex === 0) { // Adjust indices based on trimmed whitespace const leadingWhitespace = text.length - trimmed.length; return { ...firstMention, startIndex: firstMention.startIndex + leadingWhitespace, endIndex: firstMention.endIndex + leadingWhitespace, }; } return null; } /** * Format a bill mention (simple format without session prefix) * * @param _session - Parliamentary session (unused, kept for API compatibility) * @param billNumber - Bill number (e.g., "c-234") * @param sectionRef - Optional section reference (e.g., "s2.1") * @returns Formatted mention string like @bill:c-234 or @bill:c-234:s2.1 */ export function formatBillMentionWithSession( _session: string, billNumber: string, sectionRef?: string | null ): string { let mention = `@bill:${billNumber.toLowerCase()}`; if (sectionRef) { mention += `:${sectionRef}`; } return mention; } /** * Check if a mention has the new session format * * @param mention - Parsed mention to check * @returns True if mention uses new format with session in subId */ export function hasSessionFormat(mention: ParsedMention): boolean { return mention.type === 'bill' && !!mention.subId && /^\d+-\d+$/.test(mention.subId); } /** * Extract section reference from a bill mention (handles both formats) * * @param mention - Parsed bill mention * @returns Section reference or undefined */ export function getSectionFromBillMention(mention: ParsedMention): string | undefined { if (mention.type !== 'bill') return undefined; return hasSessionFormat(mention) ? mention.subSubId : mention.subId; } export default { parseMentions, hasMentions, getMentionAtCursor, replaceMention, formatMention, isValidMention, getMentionType, getMentionLabel, extractLeadingBillMention, formatBillMentionWithSession, hasSessionFormat, getSectionFromBillMention, };

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/northernvariables/FedMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

mentionParser.ts•18.2 KiB