/**
* PII (Personally Identifiable Information) sanitization
*
* Detects and removes sensitive data from logs and responses.
* Patterns cover common PII types to prevent accidental exposure.
*/
import { createLogger } from './logger.js';
const logger = createLogger('pii-sanitizer');
/**
* PII pattern definitions with replacement masks
*/
const PII_PATTERNS: Array<{ name: string; pattern: RegExp; mask: string }> = [
// Email addresses
{
name: 'email',
pattern: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g,
mask: '[EMAIL]',
},
// Phone numbers (various formats)
{
name: 'phone',
pattern: /\b(?:\+?1[-.\s]?)?\(?[2-9]\d{2}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
mask: '[PHONE]',
},
// Credit card numbers (with or without separators)
{
name: 'credit_card',
pattern: /\b(?:\d{4}[-\s]?){3}\d{4}\b/g,
mask: '[CREDIT_CARD]',
},
// Social Security Numbers
{
name: 'ssn',
pattern: /\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b/g,
mask: '[SSN]',
},
// API keys (common patterns)
{
name: 'api_key',
pattern: /\b(?:sk|pk|api|key)[-_]?[a-zA-Z0-9]{20,}\b/gi,
mask: '[API_KEY]',
},
// Bearer tokens
{
name: 'bearer_token',
pattern: /Bearer\s+[A-Za-z0-9\-_=]+\.[A-Za-z0-9\-_=]+\.?[A-Za-z0-9\-_.+/=]*/gi,
mask: 'Bearer [TOKEN]',
},
// Authorization headers
{
name: 'auth_header',
pattern: /(?:authorization|x-api-key|x-auth-token):\s*\S+/gi,
mask: '$1: [REDACTED]',
},
// Password fields in JSON/URLs
{
name: 'password_json',
pattern: /"password"\s*:\s*"[^"]*"/gi,
mask: '"password": "[REDACTED]"',
},
{
name: 'password_url',
pattern: /password=[^&\s]*/gi,
mask: 'password=[REDACTED]',
},
// IP addresses (both IPv4 and IPv6)
{
name: 'ipv4',
pattern: /\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/g,
mask: '[IP_ADDRESS]',
},
// Session cookies
{
name: 'session_cookie',
pattern: /(?:session|sess|sid|sessionid)=[A-Za-z0-9\-_]+/gi,
mask: '$1=[SESSION]',
},
// AWS credentials
{
name: 'aws_key',
pattern: /\b(?:AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}\b/g,
mask: '[AWS_KEY]',
},
{
name: 'aws_secret',
pattern: /\b[A-Za-z0-9/+=]{40}\b/g,
mask: '[AWS_SECRET]',
},
];
/**
* Sanitize a string by replacing PII patterns
*/
export function sanitizePii(input: string): string {
if (!input) {
return input;
}
let sanitized = input;
let piiFound = false;
for (const { name, pattern, mask } of PII_PATTERNS) {
const before = sanitized;
sanitized = sanitized.replace(pattern, mask);
if (sanitized !== before) {
piiFound = true;
logger.debug('PII sanitized', { type: name });
}
}
if (piiFound) {
logger.debug('PII detected and sanitized');
}
return sanitized;
}
/**
* Sanitize an object recursively
*/
export function sanitizeObject<T extends Record<string, unknown>>(obj: T): T {
if (obj === null || typeof obj !== 'object') {
return obj;
}
if (Array.isArray(obj)) {
return obj.map((item) =>
typeof item === 'object' && item !== null
? sanitizeObject(item as Record<string, unknown>)
: typeof item === 'string'
? sanitizePii(item)
: item
) as unknown as T;
}
const result: Record<string, unknown> = {};
for (const [key, value] of Object.entries(obj)) {
if (typeof value === 'string') {
result[key] = sanitizePii(value);
} else if (typeof value === 'object' && value !== null) {
result[key] = sanitizeObject(value as Record<string, unknown>);
} else {
result[key] = value;
}
}
return result as T;
}
/**
* Check if a string contains PII
*/
export function containsPii(input: string): boolean {
if (!input) {
return false;
}
for (const { pattern } of PII_PATTERNS) {
// Reset lastIndex for global patterns
pattern.lastIndex = 0;
if (pattern.test(input)) {
return true;
}
}
return false;
}
/**
* Get list of PII types detected in a string
*/
export function detectPiiTypes(input: string): string[] {
if (!input) {
return [];
}
const types: string[] = [];
for (const { name, pattern } of PII_PATTERNS) {
pattern.lastIndex = 0;
if (pattern.test(input)) {
types.push(name);
}
}
return types;
}
/**
* Sensitive field names that should always be masked
*/
const SENSITIVE_FIELDS = new Set([
'password',
'secret',
'token',
'apiKey',
'api_key',
'accessToken',
'access_token',
'refreshToken',
'refresh_token',
'authorization',
'cookie',
'session',
'credential',
'private_key',
'privateKey',
]);
/**
* Check if a field name is considered sensitive
*/
export function isSensitiveField(fieldName: string): boolean {
const lowerName = fieldName.toLowerCase();
return SENSITIVE_FIELDS.has(lowerName) || SENSITIVE_FIELDS.has(fieldName);
}
/**
* Mask a sensitive value (show first/last few chars)
*/
export function maskSensitiveValue(value: string, visibleChars: number = 4): string {
if (value.length <= visibleChars * 2) {
return '*'.repeat(value.length);
}
const start = value.slice(0, visibleChars);
const end = value.slice(-visibleChars);
const masked = '*'.repeat(Math.min(value.length - visibleChars * 2, 8));
return `${start}${masked}${end}`;
}