import * as chrono from 'chrono-node';
export interface ParsedTime {
startDate?: Date;
endDate?: Date;
duration?: number; // in minutes
isAllDay?: boolean;
timezone?: string;
confidence: number; // 0-1 scale
}
export interface ParsedEvent {
title?: string;
description?: string;
location?: string;
attendees?: string[];
time: ParsedTime;
priority?: 'low' | 'medium' | 'high' | 'urgent';
isRecurring?: boolean;
recurrencePattern?: string;
}
export class NaturalLanguageProcessor {
private static readonly EVENT_KEYWORDS = [
'meeting', 'call', 'appointment', 'lunch', 'dinner', 'coffee',
'interview', 'conference', 'workshop', 'training', 'presentation',
'review', 'standup', 'sync', 'catchup', 'demo', 'session'
];
private static readonly DURATION_PATTERNS = [
/(\d+)\s*(?:hour|hr|hours|hrs)/i,
/(\d+)\s*(?:minute|min|minutes|mins)/i,
/(\d+)\s*(?:h)/i,
/(\d+)\s*(?:m)/i
];
private static readonly PRIORITY_KEYWORDS = {
urgent: ['urgent', 'asap', 'critical', 'emergency'],
high: ['important', 'high priority', 'priority', 'crucial'],
low: ['low priority', 'when possible', 'optional', 'if time permits']
};
private static readonly ATTENDEE_PATTERNS = [
/with\s+([^,\n]+)/gi,
/invite\s+([^,\n]+)/gi,
/include\s+([^,\n]+)/gi,
/and\s+([A-Za-z]+(?:\s+[A-Za-z]+)*)/g
];
/**
* Parse natural language text into structured event data
*/
static parseEventText(text: string, userTimezone: string = 'UTC'): ParsedEvent {
const parsed: ParsedEvent = {
time: { confidence: 0 }
};
// Extract title (first meaningful phrase or detected event type)
parsed.title = this.extractTitle(text);
// Parse time information using chrono-node
const timeResult = this.parseTimeExpression(text, userTimezone);
parsed.time = timeResult;
// Extract attendees
parsed.attendees = this.extractAttendees(text);
// Extract location
parsed.location = this.extractLocation(text);
// Determine priority
parsed.priority = this.extractPriority(text);
// Check for recurring patterns
const recurrence = this.extractRecurrence(text);
parsed.isRecurring = recurrence.isRecurring;
parsed.recurrencePattern = recurrence.pattern;
// Extract description (remaining text after removing other components)
parsed.description = this.extractDescription(text, parsed);
return parsed;
}
/**
* Parse time expressions from text
*/
static parseTimeExpression(text: string, userTimezone: string = 'UTC'): ParsedTime {
const results = chrono.parse(text, new Date(), { timezone: userTimezone });
if (results.length === 0) {
return { confidence: 0 };
}
const result = results[0];
const parsed: ParsedTime = {
confidence: result.index >= 0 ? 0.8 : 0.3
};
if (result.start) {
parsed.startDate = result.start.date();
// Check if it's an all-day event
if (result.start.isCertain('hour') === false && result.start.isCertain('minute') === false) {
parsed.isAllDay = true;
}
}
if (result.end) {
parsed.endDate = result.end.date();
} else if (parsed.startDate) {
// If no end time specified, try to extract duration
const duration = this.extractDuration(text);
if (duration > 0) {
parsed.duration = duration;
parsed.endDate = new Date(parsed.startDate.getTime() + duration * 60000);
} else {
// Default to 1 hour
parsed.duration = 60;
parsed.endDate = new Date(parsed.startDate.getTime() + 60 * 60000);
}
}
parsed.timezone = userTimezone;
return parsed;
}
/**
* Extract duration from text in minutes
*/
static extractDuration(text: string): number {
for (const pattern of this.DURATION_PATTERNS) {
const match = text.match(pattern);
if (match) {
const value = parseInt(match[1]);
if (pattern.source.includes('hour|hr')) {
return value * 60;
} else if (pattern.source.includes('h')) {
return value * 60;
} else {
return value;
}
}
}
// Look for common duration phrases
if (text.includes('half hour') || text.includes('30 min')) return 30;
if (text.includes('quarter hour') || text.includes('15 min')) return 15;
if (text.includes('two hours')) return 120;
if (text.includes('half day')) return 240;
if (text.includes('full day') || text.includes('all day')) return 480;
return 0;
}
/**
* Extract event title from text
*/
static extractTitle(text: string): string {
// Remove time expressions to avoid them in title
let cleanText = text.replace(/(at|on|from|until|to)\s+\d+/gi, '');
cleanText = cleanText.replace(/\b(today|tomorrow|monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/gi, '');
cleanText = cleanText.replace(/\b\d{1,2}:\d{2}(\s*[ap]m)?\b/gi, '');
// Look for explicit event types
for (const keyword of this.EVENT_KEYWORDS) {
if (cleanText.toLowerCase().includes(keyword)) {
// Extract phrase around the keyword
const regex = new RegExp(`\\b([^.!?]*${keyword}[^.!?]*)`, 'i');
const match = cleanText.match(regex);
if (match) {
return match[1].trim();
}
}
}
// If no specific event type found, use first sentence/phrase
const sentences = cleanText.split(/[.!?]/);
if (sentences.length > 0) {
return sentences[0].trim();
}
return cleanText.substring(0, 50).trim();
}
/**
* Extract attendees from text
*/
static extractAttendees(text: string): string[] {
const attendees = new Set<string>();
for (const pattern of this.ATTENDEE_PATTERNS) {
const matches = text.matchAll(pattern);
for (const match of matches) {
if (match[1]) {
// Split by common separators and clean up
const names = match[1]
.split(/[,&]/)
.map(name => name.trim())
.filter(name => name.length > 0 && name.length < 50);
names.forEach(name => {
// Basic email detection
if (name.includes('@')) {
attendees.add(name);
} else if (name.split(' ').length <= 3) { // Reasonable name length
attendees.add(name);
}
});
}
}
}
return Array.from(attendees);
}
/**
* Extract location from text
*/
static extractLocation(text: string): string | undefined {
const locationPatterns = [
/(?:at|in|@)\s+([^,\n.!?]+(?:room|office|building|floor|conference|meeting|hall|center|cafe|restaurant))/gi,
/(?:location|venue|place):\s*([^,\n.!?]+)/gi,
/(?:at|in|@)\s+([A-Z][^,\n.!?]{5,30})/g // Capitalized location names
];
for (const pattern of locationPatterns) {
const match = text.match(pattern);
if (match && match[1]) {
return match[1].trim();
}
}
return undefined;
}
/**
* Extract priority level from text
*/
static extractPriority(text: string): 'low' | 'medium' | 'high' | 'urgent' {
const lowerText = text.toLowerCase();
for (const [priority, keywords] of Object.entries(this.PRIORITY_KEYWORDS)) {
for (const keyword of keywords) {
if (lowerText.includes(keyword)) {
return priority as 'low' | 'medium' | 'high' | 'urgent';
}
}
}
return 'medium'; // Default priority
}
/**
* Extract recurrence information
*/
static extractRecurrence(text: string): { isRecurring: boolean; pattern?: string } {
const recurringPatterns = [
{ pattern: /\b(?:every|each)\s+(?:week|weekly)\b/i, rule: 'FREQ=WEEKLY' },
{ pattern: /\b(?:every|each)\s+(?:day|daily)\b/i, rule: 'FREQ=DAILY' },
{ pattern: /\b(?:every|each)\s+(?:month|monthly)\b/i, rule: 'FREQ=MONTHLY' },
{ pattern: /\b(?:every|each)\s+(?:year|yearly|annually)\b/i, rule: 'FREQ=YEARLY' },
{ pattern: /\b(?:every|each)\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/i, rule: 'FREQ=WEEKLY' }
];
for (const { pattern, rule } of recurringPatterns) {
if (pattern.test(text)) {
return { isRecurring: true, pattern: rule };
}
}
return { isRecurring: false };
}
/**
* Extract description by removing already parsed components
*/
static extractDescription(text: string, parsed: ParsedEvent): string | undefined {
let description = text;
// Remove title
if (parsed.title) {
description = description.replace(parsed.title, '');
}
// Remove attendee mentions
if (parsed.attendees) {
parsed.attendees.forEach(attendee => {
description = description.replace(new RegExp(attendee, 'gi'), '');
});
}
// Remove location
if (parsed.location) {
description = description.replace(new RegExp(parsed.location, 'gi'), '');
}
// Remove time expressions
description = description.replace(/\b\d{1,2}:\d{2}(\s*[ap]m)?\b/gi, '');
description = description.replace(/\b(today|tomorrow|monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/gi, '');
// Clean up
description = description
.replace(/\s+/g, ' ')
.replace(/^[,\s]+|[,\s]+$/g, '')
.trim();
return description.length > 10 ? description : undefined;
}
/**
* Validate and improve parsed results
*/
static validateAndImprove(parsed: ParsedEvent): ParsedEvent {
// Ensure we have a title
if (!parsed.title || parsed.title.length < 3) {
parsed.title = 'Event';
}
// Ensure reasonable time bounds
if (parsed.time.startDate) {
const now = new Date();
const oneYearFromNow = new Date(now.getFullYear() + 1, now.getMonth(), now.getDate());
if (parsed.time.startDate < now) {
// If the parsed time is in the past, assume it's for the next occurrence
const tomorrow = new Date(now);
tomorrow.setDate(tomorrow.getDate() + 1);
if (parsed.time.startDate.getHours() >= 0) {
tomorrow.setHours(parsed.time.startDate.getHours());
tomorrow.setMinutes(parsed.time.startDate.getMinutes());
parsed.time.startDate = tomorrow;
}
}
if (parsed.time.startDate > oneYearFromNow) {
parsed.time.confidence *= 0.5; // Reduce confidence for far future dates
}
}
// Ensure end time is after start time
if (parsed.time.startDate && parsed.time.endDate && parsed.time.endDate <= parsed.time.startDate) {
const duration = parsed.time.duration || 60;
parsed.time.endDate = new Date(parsed.time.startDate.getTime() + duration * 60000);
}
return parsed;
}
}