/**
* Mention Parser - Parse @mention syntax from text
*
* Supported mention formats:
* - @bill:c-234 - Bill reference
* - @bill:c-234:s2.1 - Bill section reference
* - @mp:pierre-poilievre - MP reference
* - @committee:fina - Committee reference
* - @committee:fina:45 - Committee meeting reference
* - @vote:45-1:234 - Vote reference
* - @debate:2025-12-09:14:30 - Debate timestamp reference
* - @petition:e-4823 - Petition reference
* - @username - User mention (forum posts only)
*
* Natural language patterns (opt-in via options):
* - "Bill C-234" / "bill c-234" - Bill reference
* - "Projet de loi C-234" - Bill reference (French)
* - "Standing Committee on Finance" / "FINA" - Committee reference
* - "Petition e-4823" / "e-petition 4823" - Petition reference
*/
/**
* Entity types that can be mentioned
*/
export type MentionType =
| 'bill'
| 'mp'
| 'committee'
| 'vote'
| 'debate'
| 'petition'
| 'user'
| 'standing-order';
/**
* Parsed mention data
*/
export interface ParsedMention {
/** Original matched text (including @) */
raw: string;
/** Entity type */
type: MentionType;
/** Primary identifier */
id: string;
/** Secondary identifier (e.g., section, meeting number) */
subId?: string;
/** Third-level identifier (e.g., subsection) */
subSubId?: string;
/** Start index in original text */
startIndex: number;
/** End index in original text */
endIndex: number;
/** Display text for natural language mentions (e.g., "Bill C-234" instead of "@bill:c-234") */
displayText?: string;
}
/**
* Options for parsing mentions
*/
export interface ParseMentionOptions {
/** Enable natural language pattern detection (e.g., "Bill C-234") */
naturalLanguage?: boolean;
/** Locale for language-specific patterns (default: 'en') */
locale?: 'en' | 'fr';
}
/**
* Mention pattern configuration
*/
interface MentionPattern {
type: MentionType;
pattern: RegExp;
/** Extract IDs from regex match groups */
extract: (match: RegExpMatchArray) => {
id: string;
subId?: string;
subSubId?: string;
};
}
/**
* Natural language pattern configuration
*/
interface NaturalLanguagePattern {
type: MentionType;
/** Pattern to match - should NOT have global flag as we create new instances */
patternSource: string;
patternFlags: string;
/** Locales this pattern applies to */
locales: ('en' | 'fr')[];
/** Extract IDs and display text from regex match groups */
extract: (match: RegExpMatchArray) => {
id: string;
subId?: string;
subSubId?: string;
/** The display text for this mention (e.g., "Bill C-234") */
displayText: string;
};
}
/**
* Mention patterns for each entity type
*/
const MENTION_PATTERNS: MentionPattern[] = [
// Bill with session and section: @bill:45-1/c-234:s2.1.a (new format)
{
type: 'bill',
pattern: /@bill:(\d+-\d+)\/([cs]-?\d+)(?::([a-z0-9.-]+))?/gi,
extract: (match) => ({
id: match[2].toLowerCase(), // bill number (c-234)
subId: match[1], // session (45-1)
subSubId: match[3] || undefined, // section ref (s2.1)
}),
},
// Bill with section (legacy format): @bill:c-234:s2.1.a
{
type: 'bill',
pattern: /@bill:([cs]-?\d+)(?::([a-z0-9.-]+))?/gi,
extract: (match) => ({
id: match[1].toLowerCase(),
subId: match[2] || undefined,
}),
},
// MP: @mp:pierre-poilievre
{
type: 'mp',
pattern: /@mp:([a-z][a-z0-9-]+)/gi,
extract: (match) => ({
id: match[1],
}),
},
// Committee with meeting: @committee:fina:45
{
type: 'committee',
pattern: /@committee:([a-z]{4})(?::(\d+))?/gi,
extract: (match) => ({
id: match[1].toUpperCase(),
subId: match[2] || undefined,
}),
},
// Vote: @vote:45-1:234
{
type: 'vote',
pattern: /@vote:(\d+-\d+):(\d+)/gi,
extract: (match) => ({
id: match[1],
subId: match[2],
}),
},
// Debate with timestamp: @debate:2025-12-09:14:30
{
type: 'debate',
pattern: /@debate:(\d{4}-\d{2}-\d{2})(?::(\d{2}[:-]\d{2}))?/gi,
extract: (match) => ({
id: match[1],
subId: match[2]?.replace('-', ':') || undefined,
}),
},
// Petition: @petition:e-4823
{
type: 'petition',
pattern: /@petition:([ea]-?\d+)/gi,
extract: (match) => ({
id: match[1].toLowerCase(),
}),
},
// User: @username (must come LAST to avoid matching @type:id patterns)
// Matches usernames starting with a letter, containing letters, numbers, underscores, hyphens
// Negative lookahead prevents matching partial words before colons (e.g., @bil from @bill:)
{
type: 'user',
pattern: /@([a-z][a-z0-9_-]{2,29})(?![a-z0-9_-]*:)/gi,
extract: (match) => ({
id: match[1].toLowerCase(),
}),
},
];
/**
* Natural language patterns for entity detection (opt-in)
*
* These patterns match natural language references to parliamentary entities
* without requiring the explicit @mention syntax.
*/
const NATURAL_LANGUAGE_PATTERNS: NaturalLanguagePattern[] = [
// English bills: "Bill C-234", "Bill S-12", "Government Bill C-234"
// Also matches "bill C-234" (lowercase bill)
{
type: 'bill',
patternSource: '\\b(?:Government\\s+|Private\\s+(?:Member\'?s?\\s+)?)?[Bb]ill\\s+([CS])-?(\\d+)\\b',
patternFlags: 'gi',
locales: ['en'],
extract: (match) => ({
id: `${match[1].toLowerCase()}-${match[2]}`,
displayText: match[0],
}),
},
// French bills: "projet de loi C-234", "Projet de loi S-12"
{
type: 'bill',
patternSource: '\\b[Pp]rojet\\s+de\\s+loi\\s+([CS])-?(\\d+)\\b',
patternFlags: 'gi',
locales: ['fr'],
extract: (match) => ({
id: `${match[1].toLowerCase()}-${match[2]}`,
displayText: match[0],
}),
},
// Committee acronyms (high confidence, both locales)
// Full list of committee codes
{
type: 'committee',
patternSource: '\\b(FINA|ENVI|ETHI|HUMA|TRAN|NDDN|JUST|CHPC|SECU|AGRI|INAN|INDU|RNNR|SRSR|PROC|OGGO|FAAE|CIMM|HESA|FEWO|ACVA|LANG|FOPO|PACP|CIIT)\\b',
patternFlags: 'g',
locales: ['en', 'fr'],
extract: (match) => ({
id: match[1].toLowerCase(),
displayText: match[0],
}),
},
// English committees: "Standing Committee on Finance"
// Non-greedy match: committee names are typically 1-3 words after "on"
{
type: 'committee',
patternSource: '\\b((?:Standing|Special|Legislative|Joint)\\s+Committee\\s+on\\s+[A-Z][a-zA-Z]+(?:\\s+(?:and\\s+)?[A-Z][a-z]+)?)\\b',
patternFlags: 'g',
locales: ['en'],
extract: (match) => {
const fullMatch = match[1];
// Extract the committee subject (after "Committee on ")
const subjectMatch = fullMatch.match(/Committee\s+on\s+(.+)$/i);
const subject = subjectMatch ? subjectMatch[1].toLowerCase().replace(/\s+/g, '-') : 'unknown';
return {
id: subject,
displayText: fullMatch,
};
},
},
// E-petitions with space: "e-petition 4823"
{
type: 'petition',
patternSource: '\\b[Ee]-?petition\\s+(\\d{4,})\\b',
patternFlags: 'g',
locales: ['en', 'fr'],
extract: (match) => ({
id: `e-${match[1]}`,
displayText: match[0],
}),
},
// E-petitions direct: "e-4823", "Petition e-4823"
{
type: 'petition',
patternSource: '\\b(?:[Pp]etition\\s+)?[Ee]-(\\d{4,})\\b',
patternFlags: 'g',
locales: ['en', 'fr'],
extract: (match) => ({
id: `e-${match[1]}`,
displayText: match[0],
}),
},
// Paper petitions: "Petition No. 441-02345"
{
type: 'petition',
patternSource: '\\b[Pp]etition\\s+(?:[Nn]o\\.?\\s*)?(\\d{3}-\\d{5})\\b',
patternFlags: 'g',
locales: ['en', 'fr'],
extract: (match) => ({
id: match[1],
displayText: match[0],
}),
},
// Standing Orders (English): "Standing Order 45", "S.O. 45", "Standing Order 45(1)"
{
type: 'standing-order',
patternSource: '\\b(?:Standing\\s+Order|S\\.O\\.)\\s+(\\d+)(?:\\((\\d+[a-z]*)\\))?\\b',
patternFlags: 'gi',
locales: ['en'],
extract: (match) => ({
id: match[1],
subId: match[2] || undefined,
displayText: match[0],
}),
},
// Standing Orders (French): "article 45 du Règlement", "l'article 45"
{
type: 'standing-order',
patternSource: "\\b(?:l')?article\\s+(\\d+)(?:\\s+du\\s+[Rr]èglement)?\\b",
patternFlags: 'gi',
locales: ['fr'],
extract: (match) => ({
id: match[1],
displayText: match[0],
}),
},
];
/**
* Parse all mentions from text
*
* @param text - Text to parse
* @param options - Optional configuration for parsing
* @returns Array of parsed mentions with positions
*/
export function parseMentions(
text: string,
options?: ParseMentionOptions
): ParsedMention[] {
const mentions: ParsedMention[] = [];
const { naturalLanguage = false, locale = 'en' } = options || {};
// Always parse explicit @mention syntax
for (const { type, pattern, extract } of MENTION_PATTERNS) {
// Reset regex lastIndex
pattern.lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = pattern.exec(text)) !== null) {
const { id, subId, subSubId } = extract(match);
mentions.push({
raw: match[0],
type,
id,
subId,
subSubId,
startIndex: match.index,
endIndex: match.index + match[0].length,
});
}
}
// Parse natural language patterns if enabled
if (naturalLanguage) {
for (const nlPattern of NATURAL_LANGUAGE_PATTERNS) {
// Skip patterns not applicable to current locale
if (!nlPattern.locales.includes(locale)) {
continue;
}
// Create fresh regex instance for each search
const pattern = new RegExp(nlPattern.patternSource, nlPattern.patternFlags);
let match: RegExpExecArray | null;
while ((match = pattern.exec(text)) !== null) {
const { id, subId, subSubId, displayText } = nlPattern.extract(match);
const startIndex = match.index;
const endIndex = match.index + match[0].length;
// Check for overlap with existing mentions (explicit mentions take priority)
const overlaps = mentions.some(
(m) =>
(startIndex >= m.startIndex && startIndex < m.endIndex) ||
(endIndex > m.startIndex && endIndex <= m.endIndex) ||
(startIndex <= m.startIndex && endIndex >= m.endIndex)
);
if (!overlaps) {
mentions.push({
raw: match[0],
type: nlPattern.type,
id,
subId,
subSubId,
startIndex,
endIndex,
displayText,
});
}
}
}
}
// Sort by position in text
mentions.sort((a, b) => a.startIndex - b.startIndex);
// Remove duplicates (same position, same type)
const uniqueMentions: ParsedMention[] = [];
for (const mention of mentions) {
const isDuplicate = uniqueMentions.some(
(m) => m.startIndex === mention.startIndex && m.endIndex === mention.endIndex
);
if (!isDuplicate) {
uniqueMentions.push(mention);
}
}
return uniqueMentions;
}
/**
* Check if text contains any mentions
*
* @param text - Text to check
* @param options - Optional configuration for parsing
* @returns True if text contains at least one mention
*/
export function hasMentions(text: string, options?: ParseMentionOptions): boolean {
const { naturalLanguage = false, locale = 'en' } = options || {};
// Check explicit @mention patterns
const hasExplicitMentions = MENTION_PATTERNS.some(({ pattern }) => {
pattern.lastIndex = 0;
return pattern.test(text);
});
if (hasExplicitMentions) {
return true;
}
// Check natural language patterns if enabled
if (naturalLanguage) {
return NATURAL_LANGUAGE_PATTERNS.some((nlPattern) => {
if (!nlPattern.locales.includes(locale)) {
return false;
}
const pattern = new RegExp(nlPattern.patternSource, nlPattern.patternFlags);
return pattern.test(text);
});
}
return false;
}
/**
* Extract the mention being typed at cursor position
*
* @param text - Full text
* @param cursorPosition - Current cursor position
* @returns Partial mention string if typing a mention, null otherwise
*/
export function getMentionAtCursor(
text: string,
cursorPosition: number
): { mention: string; startIndex: number } | null {
// Look backwards from cursor for @
const textBeforeCursor = text.slice(0, cursorPosition);
const lastAtIndex = textBeforeCursor.lastIndexOf('@');
if (lastAtIndex === -1) return null;
// Check if there's a space between @ and cursor (would break the mention)
const textBetween = textBeforeCursor.slice(lastAtIndex);
if (/\s/.test(textBetween) && textBetween.indexOf(' ') < textBetween.length - 1) {
return null;
}
// Extract the partial mention
const mention = textBetween;
return {
mention,
startIndex: lastAtIndex,
};
}
/**
* Replace a mention in text
*
* @param text - Original text
* @param startIndex - Start position to replace
* @param endIndex - End position to replace
* @param replacement - Replacement text
* @returns Updated text
*/
export function replaceMention(
text: string,
startIndex: number,
endIndex: number,
replacement: string
): string {
return text.slice(0, startIndex) + replacement + text.slice(endIndex);
}
/**
* Generate mention string from components
*
* @param type - Entity type
* @param id - Primary ID
* @param subId - Secondary ID (optional)
* @returns Formatted mention string
*/
export function formatMention(
type: MentionType,
id: string,
subId?: string,
subSubId?: string
): string {
// User mentions use @username format (no colon)
if (type === 'user') {
return `@${id}`;
}
// Entity mentions use @type:id format
let mention = `@${type}:${id}`;
if (subId) {
mention += `:${subId}`;
if (subSubId) {
mention += `.${subSubId}`;
}
}
return mention;
}
/**
* Validate mention format
*
* @param mention - Mention string to validate
* @returns True if valid mention format
*/
export function isValidMention(mention: string): boolean {
return MENTION_PATTERNS.some(({ pattern }) => {
pattern.lastIndex = 0;
const match = pattern.exec(mention);
return match !== null && match[0] === mention;
});
}
/**
* Get the entity type from a mention string
*
* @param mention - Mention string (e.g., "@bill:c-234")
* @returns Entity type or null if invalid
*/
export function getMentionType(mention: string): MentionType | null {
// Check for entity mentions with colon syntax (@type:id)
const entityMatch = mention.match(/@([a-z]+):/i);
if (entityMatch) {
const type = entityMatch[1].toLowerCase();
const entityTypes: MentionType[] = [
'bill',
'mp',
'committee',
'vote',
'debate',
'petition',
];
return entityTypes.includes(type as MentionType)
? (type as MentionType)
: null;
}
// Check for user mentions (@username without colon)
const userMatch = mention.match(/^@([a-z][a-z0-9_-]{2,29})$/i);
if (userMatch) {
return 'user';
}
return null;
}
/**
* Extract plain text from mentions (for display)
*
* @param mention - ParsedMention object
* @returns Human-readable label
*/
export function getMentionLabel(mention: ParsedMention): string {
switch (mention.type) {
case 'bill':
return mention.subId
? `Bill ${mention.id.toUpperCase()} ${mention.subId}`
: `Bill ${mention.id.toUpperCase()}`;
case 'mp':
// Convert slug to name (e.g., "pierre-poilievre" -> "Pierre Poilievre")
return mention.id
.split('-')
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
.join(' ');
case 'committee':
return mention.subId
? `${mention.id} Meeting #${mention.subId}`
: mention.id;
case 'vote':
return `Vote #${mention.subId}`;
case 'debate':
return mention.subId
? `${mention.id} at ${mention.subId}`
: mention.id;
case 'petition':
return `Petition ${mention.id.toUpperCase()}`;
case 'user':
return `@${mention.id}`;
default:
return mention.raw;
}
}
/**
* Extract the leading bill mention from text (if present at start)
*
* @param text - Text to check
* @returns The first bill mention if it starts at position 0, null otherwise
*/
export function extractLeadingBillMention(text: string): ParsedMention | null {
const trimmed = text.trimStart();
const mentions = parseMentions(trimmed);
const firstMention = mentions[0];
if (firstMention?.type === 'bill' && firstMention.startIndex === 0) {
// Adjust indices based on trimmed whitespace
const leadingWhitespace = text.length - trimmed.length;
return {
...firstMention,
startIndex: firstMention.startIndex + leadingWhitespace,
endIndex: firstMention.endIndex + leadingWhitespace,
};
}
return null;
}
/**
* Format a bill mention (simple format without session prefix)
*
* @param _session - Parliamentary session (unused, kept for API compatibility)
* @param billNumber - Bill number (e.g., "c-234")
* @param sectionRef - Optional section reference (e.g., "s2.1")
* @returns Formatted mention string like @bill:c-234 or @bill:c-234:s2.1
*/
export function formatBillMentionWithSession(
_session: string,
billNumber: string,
sectionRef?: string | null
): string {
let mention = `@bill:${billNumber.toLowerCase()}`;
if (sectionRef) {
mention += `:${sectionRef}`;
}
return mention;
}
/**
* Check if a mention has the new session format
*
* @param mention - Parsed mention to check
* @returns True if mention uses new format with session in subId
*/
export function hasSessionFormat(mention: ParsedMention): boolean {
return mention.type === 'bill' && !!mention.subId && /^\d+-\d+$/.test(mention.subId);
}
/**
* Extract section reference from a bill mention (handles both formats)
*
* @param mention - Parsed bill mention
* @returns Section reference or undefined
*/
export function getSectionFromBillMention(mention: ParsedMention): string | undefined {
if (mention.type !== 'bill') return undefined;
return hasSessionFormat(mention) ? mention.subSubId : mention.subId;
}
export default {
parseMentions,
hasMentions,
getMentionAtCursor,
replaceMention,
formatMention,
isValidMention,
getMentionType,
getMentionLabel,
extractLeadingBillMention,
formatBillMentionWithSession,
hasSessionFormat,
getSectionFromBillMention,
};