intent-sanitizer.tsβ’5.24 kB
/**
* Intent sanitizer for removing PII from user intent strings
* Ensures privacy by masking sensitive information
*/
/**
* Patterns for detecting and removing PII
*/
const PII_PATTERNS = {
// Email addresses
email: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/gi,
// URLs with domains
url: /https?:\/\/[^\s]+/gi,
// IP addresses
ip: /\b(?:\d{1,3}\.){3}\d{1,3}\b/g,
// Phone numbers (various formats)
phone: /\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
// Credit card-like numbers (groups of 4 digits)
creditCard: /\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b/g,
// API keys and tokens (long alphanumeric strings)
apiKey: /\b[A-Za-z0-9_-]{32,}\b/g,
// UUIDs
uuid: /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi,
// File paths (Unix and Windows)
filePath: /(?:\/[\w.-]+)+\/?|(?:[A-Z]:\\(?:[\w.-]+\\)*[\w.-]+)/g,
// Potential passwords or secrets (common patterns)
secret: /\b(?:password|passwd|pwd|secret|token|key)[:=\s]+[^\s]+/gi,
};
/**
* Company/organization name patterns to anonymize
* These are common patterns that might appear in workflow intents
*/
const COMPANY_PATTERNS = {
// Company suffixes
companySuffix: /\b\w+(?:\s+(?:Inc|LLC|Corp|Corporation|Ltd|Limited|GmbH|AG)\.?)\b/gi,
// Common business terms that might indicate company names
businessContext: /\b(?:company|organization|client|customer)\s+(?:named?|called)\s+\w+/gi,
};
/**
* Sanitizes user intent by removing PII and sensitive information
*/
export class IntentSanitizer {
/**
* Sanitize user intent string
*/
sanitize(intent: string): string {
if (!intent) {
return intent;
}
let sanitized = intent;
// Remove email addresses
sanitized = sanitized.replace(PII_PATTERNS.email, '[EMAIL]');
// Remove URLs
sanitized = sanitized.replace(PII_PATTERNS.url, '[URL]');
// Remove IP addresses
sanitized = sanitized.replace(PII_PATTERNS.ip, '[IP_ADDRESS]');
// Remove phone numbers
sanitized = sanitized.replace(PII_PATTERNS.phone, '[PHONE]');
// Remove credit card numbers
sanitized = sanitized.replace(PII_PATTERNS.creditCard, '[CARD_NUMBER]');
// Remove API keys and long tokens
sanitized = sanitized.replace(PII_PATTERNS.apiKey, '[API_KEY]');
// Remove UUIDs
sanitized = sanitized.replace(PII_PATTERNS.uuid, '[UUID]');
// Remove file paths
sanitized = sanitized.replace(PII_PATTERNS.filePath, '[FILE_PATH]');
// Remove secrets/passwords
sanitized = sanitized.replace(PII_PATTERNS.secret, '[SECRET]');
// Anonymize company names
sanitized = sanitized.replace(COMPANY_PATTERNS.companySuffix, '[COMPANY]');
sanitized = sanitized.replace(COMPANY_PATTERNS.businessContext, '[COMPANY_CONTEXT]');
// Clean up multiple spaces
sanitized = sanitized.replace(/\s{2,}/g, ' ').trim();
return sanitized;
}
/**
* Check if intent contains potential PII
*/
containsPII(intent: string): boolean {
if (!intent) {
return false;
}
return Object.values(PII_PATTERNS).some((pattern) => pattern.test(intent));
}
/**
* Get list of PII types detected in the intent
*/
detectPIITypes(intent: string): string[] {
if (!intent) {
return [];
}
const detected: string[] = [];
if (PII_PATTERNS.email.test(intent)) detected.push('email');
if (PII_PATTERNS.url.test(intent)) detected.push('url');
if (PII_PATTERNS.ip.test(intent)) detected.push('ip_address');
if (PII_PATTERNS.phone.test(intent)) detected.push('phone');
if (PII_PATTERNS.creditCard.test(intent)) detected.push('credit_card');
if (PII_PATTERNS.apiKey.test(intent)) detected.push('api_key');
if (PII_PATTERNS.uuid.test(intent)) detected.push('uuid');
if (PII_PATTERNS.filePath.test(intent)) detected.push('file_path');
if (PII_PATTERNS.secret.test(intent)) detected.push('secret');
// Reset lastIndex for global regexes
Object.values(PII_PATTERNS).forEach((pattern) => {
pattern.lastIndex = 0;
});
return detected;
}
/**
* Truncate intent to maximum length while preserving meaning
*/
truncate(intent: string, maxLength: number = 1000): string {
if (!intent || intent.length <= maxLength) {
return intent;
}
// Try to truncate at sentence boundary
const truncated = intent.substring(0, maxLength);
const lastSentence = truncated.lastIndexOf('.');
const lastSpace = truncated.lastIndexOf(' ');
if (lastSentence > maxLength * 0.8) {
return truncated.substring(0, lastSentence + 1);
} else if (lastSpace > maxLength * 0.9) {
return truncated.substring(0, lastSpace) + '...';
}
return truncated + '...';
}
/**
* Validate intent is safe for telemetry
*/
isSafeForTelemetry(intent: string): boolean {
if (!intent) {
return true;
}
// Check length
if (intent.length > 5000) {
return false;
}
// Check for null bytes or control characters
if (/[\x00-\x08\x0B\x0C\x0E-\x1F]/.test(intent)) {
return false;
}
return true;
}
}
/**
* Singleton instance for easy access
*/
export const intentSanitizer = new IntentSanitizer();