Skip to main content
Glama

DollhouseMCP

by DollhouseMCP
contentValidator.tsโ€ข25.9 kB
/** * Content Validator for DollhouseMCP * * Protects against prompt injection attacks in collection personas * by detecting and sanitizing malicious content patterns. * * Security: SEC-001 - Critical vulnerability protection */ import { SecurityError } from './errors.js'; import { SecurityMonitor } from './securityMonitor.js'; import { RegexValidator } from './regexValidator.js'; import { SECURITY_LIMITS } from './constants.js'; import { UnicodeValidator } from './validators/unicodeValidator.js'; import { SecurityTelemetry } from './telemetry/SecurityTelemetry.js'; // FIX: SonarCloud typescript:S4323 - Extract union type to type alias for maintainability export type SecuritySeverity = 'low' | 'medium' | 'high' | 'critical'; export interface ContentValidationResult { isValid: boolean; sanitizedContent?: string; detectedPatterns?: string[]; severity?: SecuritySeverity; } export interface ContentValidatorOptions { /** * Skip size limit checks - useful for memory content that can be large * @default false */ skipSizeCheck?: boolean; /** * Custom max length override * @default SECURITY_LIMITS.MAX_CONTENT_LENGTH */ maxLength?: number; } export class ContentValidator { /** * Pattern-based detection system for prompt injection attacks. * * This approach was chosen over AI-based detection because: * 1. Pattern matching cannot be socially engineered or confused * 2. Deterministic results ensure consistent security * 3. No additional API calls or latency * 4. Can't be bypassed by clever prompt engineering * * The patterns below represent known attack vectors from security research * and real-world exploit attempts against AI systems. */ // Prompt injection patterns that could compromise AI assistants private static readonly INJECTION_PATTERNS: Array<{ pattern: RegExp; severity: 'high' | 'critical'; description: string }> = [ // System prompt override attempts { pattern: /\[SYSTEM:\s*.*?\]/gi, severity: 'critical', description: 'System prompt override' }, { pattern: /\[ADMIN:\s*.*?\]/gi, severity: 'critical', description: 'Admin prompt override' }, { pattern: /\[ASSISTANT:\s*.*?\]/gi, severity: 'critical', description: 'Assistant prompt override' }, { pattern: /\[USER:\s*.*?\]/gi, severity: 'high', description: 'User prompt override' }, // Instruction manipulation { pattern: /ignore\s+(all\s+)?previous\s+instructions/gi, severity: 'critical', description: 'Instruction override' }, { pattern: /ignore\s+(all\s+)?prior\s+instructions/gi, severity: 'critical', description: 'Instruction override' }, { pattern: /disregard\s+(all\s+)?previous\s+instructions/gi, severity: 'critical', description: 'Instruction override' }, { pattern: /disregard\s+everything\s+above/gi, severity: 'critical', description: 'Instruction override' }, { pattern: /forget\s+(all\s+)?previous\s+instructions/gi, severity: 'critical', description: 'Instruction override' }, { pattern: /forget\s+your\s+training/gi, severity: 'critical', description: 'Instruction override' }, { pattern: /override\s+your\s+programming/gi, severity: 'critical', description: 'Instruction override' }, { pattern: /you\s+are\s+now\s+(admin|root|system|sudo)/gi, severity: 'critical', description: 'Role elevation attempt' }, { pattern: /act\s+as\s+(admin|root|system|sudo)/gi, severity: 'critical', description: 'Role elevation attempt' }, // Data exfiltration attempts { pattern: /export\s+all\s+(files|data|personas|tokens|credentials|api\s+keys)/gi, severity: 'critical', description: 'Data exfiltration' }, { pattern: /send\s+all\s+(files|data|personas|tokens|credentials|api\s+keys)\s+to/gi, severity: 'critical', description: 'Data exfiltration' }, { pattern: /list\s+all\s+(files|tokens|credentials|secrets|api\s+keys)/gi, severity: 'high', description: 'Information disclosure' }, { pattern: /show\s+me\s+all\s+(tokens|credentials|secrets|api\s+keys)/gi, severity: 'high', description: 'Credential disclosure' }, // Command execution patterns { pattern: /curl\s+[^\s]+\.(com|net|org|io|dev)/gi, severity: 'critical', description: 'External command execution' }, { pattern: /wget\s+[^\s]+\.(com|net|org|io|dev)/gi, severity: 'critical', description: 'External command execution' }, { pattern: /\$\([^)]+\)/g, severity: 'critical', description: 'Command substitution' }, // SECURITY: Backtick command detection with ReDoS mitigation // FIX (PR #1313): Fixed ReDoS vulnerabilities by replacing .* with [^`]* // FIX (PR #1313 - SonarCloud): Added explicit bounds {0,200} to prevent backtracking // Multiple unbounded quantifiers in same pattern can still cause backtracking even with [^`]* // Bounded quantifiers prevent exponential time complexity while matching realistic commands { pattern: /`[^`]{0,200}(?:rm\s+-rf?\s+[/~]|sudo\s+rm|chmod\s+777|chown\s+root)[^`]{0,200}`/gi, severity: 'critical', description: 'Dangerous shell command in backticks' }, { pattern: /`[^`]{0,200}(?:cat|ls)\s+\/etc\/[^`]{0,200}`/gi, severity: 'critical', description: 'Sensitive file access in backticks' }, { pattern: /`[^`]{0,200}(?:bash|sh)\s+-c\s+['"][^`]{0,200}`/gi, severity: 'critical', description: 'Shell execution in backticks' }, { pattern: /`[^`]{0,200}(?:passwd|shadow|nc\s+-l|netcat\s+-l|ssh\s+root@)[^`]{0,200}`/gi, severity: 'critical', description: 'Dangerous command in backticks' }, { pattern: /`[^`]{0,200}(?:curl|wget)\s+[^`]{0,200}\|\s*(?:sh|bash)[^`]{0,200}`/gi, severity: 'critical', description: 'Pipe to shell in backticks' }, { pattern: /`[^`]{0,200}(?:\/etc\/passwd|\/etc\/shadow|\.ssh\/id_|sudo\s+su)[^`]{0,200}`/gi, severity: 'critical', description: 'Sensitive file or privilege escalation in backticks' }, { pattern: /`[^`]{0,200}(?:python|perl|ruby|php|node)\s+(?:-e|-c)\s+[^`]{0,200}(?:exec|eval|system|subprocess)[^`]{0,200}`/gi, severity: 'critical', description: 'Script interpreter with dangerous function in backticks' }, { pattern: /eval\s*\(/gi, severity: 'critical', description: 'Code evaluation' }, { pattern: /exec\s*\(/gi, severity: 'critical', description: 'Code execution' }, { pattern: /os\.system\s*\(/gi, severity: 'critical', description: 'System command execution' }, { pattern: /subprocess\.(call|run|Popen)/gi, severity: 'critical', description: 'Subprocess execution' }, // Token/credential patterns { pattern: /GITHUB_TOKEN/gi, severity: 'high', description: 'Token reference' }, { pattern: /ghp_[a-zA-Z0-9]{36}/g, severity: 'critical', description: 'GitHub token exposure' }, { pattern: /gho_[a-zA-Z0-9]{36}/g, severity: 'critical', description: 'GitHub OAuth token exposure' }, // Path traversal in content { pattern: /\.\.\/\.\.\/\.\.\//g, severity: 'high', description: 'Path traversal attempt' }, { pattern: /\/etc\/passwd/gi, severity: 'high', description: 'Sensitive file access' }, { pattern: /\/\.ssh\//gi, severity: 'high', description: 'SSH key access attempt' }, ]; // Malicious YAML patterns // SECURITY FIX #364: YAML bomb detection patterns // SECURITY FIX (PR #552 review): Simplified patterns to reduce ReDoS risk private static readonly YAML_BOMB_PATTERNS = [ // Detects recursive anchor references that could cause exponential expansion // Example: &a [*a] or &bomb ["test", *bomb] /&(\w+)\s*\[[^\]]*\*\1[^\]]*\]/, // Direct recursion in array /&(\w+)\s*\{[^}]*\*\1[^}]*\}/, // Direct recursion in object /^\s*\w+:\s*&(\w+)\s*\n\s*\w+:\s*\*\1/m, // Multi-line value recursion (data: &ref / value: *ref) // Simplified pattern to detect deeply nested anchors (less ReDoS risk) // Looks for 3+ anchor definitions in close proximity /&\w+[^&]*&\w+[^&]*&\w+/, // 3+ anchors (simplified, less backtracking) // Detects excessive aliases in close proximity (potential amplification) // Example: [*a, *b, *c, *d, *e, *f, *g, *h, *i, *j] /\*\w+(?:[,\s]+\*\w+){9,}/, // 10+ aliases in sequence (non-capturing group) ]; private static readonly MALICIOUS_YAML_PATTERNS = [ // Language-specific deserialization attacks /!!python\/object/, /!!python\/module/, /!!python\/name/, /!!ruby\/object/, /!!ruby\/hash/, /!!ruby\/struct/, /!!ruby\/marshal/, /!!java/, /!!javax/, /!!com\.sun/, /!!perl\/hash/, /!!perl\/code/, /!!php\/object/, // Constructor/function injection /!!exec/, /!!eval/, /!!new/, /!!construct/, /!!apply/, /!!call/, /!!invoke/, // Code execution patterns - more specific to avoid false positives /subprocess\./, /os\.system/, /eval\s*\(/, /exec\s*\(/, /__import__\s*\(/, /require\s*\(/, /import\s+(?:os|sys|subprocess|eval|exec)/, /include\s+["'].*\.(?:php|sh|py|js|rb)["']/, // Command execution variants - more specific patterns /popen\s*\(/, /spawn\s*\(/, /system\s*\(/, /backtick\s*\(/, /shell_exec\s*\(/, /passthru\s*\(/, /proc_open\s*\(/, // Network operations - require suspicious context /socket\.connect/, // Detects socket connection attempts /urllib\.request/, // Python HTTP library usage /requests\.(?:get|post|put|delete)\s*\(/, // Detects HTTP requests with method calls /fetch\s*\(\s*["']https?:\/\//, // Detects fetch calls to external URLs /new\s+XMLHttpRequest/, // JavaScript AJAX object creation /\.(?:get|post|put|delete)\s*\(\s*["']https?:\/\//, // Method chaining with HTTP requests // File system operations - require suspicious context /(?:fs\.|file\.|)\s*open\s*\(\s*["'](?:\/etc\/|\/bin\/|\.\.\/)/, // File open with suspicious paths /file_get_contents\s*\(/, // PHP file reading function /file_put_contents\s*\(/, // PHP file writing function /fopen\s*\(\s*["'](?:\/etc\/|\/bin\/|\.\.\/)/, // File open with dangerous system paths /(?:fs\.)?\s*readFile\s*\(\s*["'](?:\/etc\/|\/bin\/|\.\.\/)/, // Node.js file read with path traversal /(?:fs\.)?\s*writeFile\s*\(\s*["'](?:\/(?:bin|etc|tmp)\/|\.\.\/)/, // Node.js file write to system dirs // Protocol handlers /file:\/\//, /data:\/\//, /expect:\/\//, /php:\/\//, /phar:\/\//, /zip:\/\//, /ssh2:\/\//, /ogg:\/\//, // YAML-specific dangerous features /&\w+\s*!!/, // Anchor with tag combination /\*\w+\s*!!/, // Alias with tag combination /!!merge/, /!!binary/, /!!timestamp/, // Unicode/encoding bypass attempts - prevent visual spoofing attacks /\\[uU]0*(?:22|27|60|3[cC])/, // Unicode escapes for quotes (") and brackets (<>) /[\u202A-\u202E\u2066-\u2069]/, // Direction override chars (RLO, LRO, isolates) /[\u200B-\u200F\u2028-\u202F]/, // Zero-width spaces, line/paragraph separators /[\uFEFF\uFFFE\uFFFF]/, // BOM, non-characters for payload hiding ]; /** * Handles Unicode validation and threat detection * REFACTOR: Extracted from validateAndSanitize() to reduce cognitive complexity * Returns normalized content and Unicode severity without aborting early */ private static handleUnicodeValidation( content: string, detectedPatterns: string[] ): { sanitized: string; highestSeverity: SecuritySeverity; } { const unicodeResult = UnicodeValidator.normalize(content); const sanitized = unicodeResult.normalizedContent; let highestSeverity: SecuritySeverity = 'low'; if (!unicodeResult.isValid && unicodeResult.detectedIssues) { detectedPatterns.push(...unicodeResult.detectedIssues.map(issue => `Unicode: ${issue}`)); if (unicodeResult.severity) { highestSeverity = unicodeResult.severity; } // Log high/critical Unicode attacks if (unicodeResult.severity === 'critical' || unicodeResult.severity === 'high') { SecurityMonitor.logSecurityEvent({ type: 'CONTENT_INJECTION_ATTEMPT', severity: unicodeResult.severity.toUpperCase() as 'HIGH' | 'CRITICAL', source: 'content_validation', details: `Unicode attack detected: ${unicodeResult.detectedIssues.join(', ')}`, }); SecurityTelemetry.recordBlockedAttack( 'UNICODE_ATTACK', unicodeResult.detectedIssues.join(', '), unicodeResult.severity.toUpperCase() as 'HIGH' | 'CRITICAL', 'unicode_validation', { issues: unicodeResult.detectedIssues } ); } } return { sanitized, highestSeverity }; } /** * Checks content for injection patterns and logs/sanitizes threats * REFACTOR: Extracted from validateAndSanitize() to reduce cognitive complexity * * @param originalContent - Original content to check patterns against * @param normalizedContent - Normalized content to apply replacements to * @param detectedPatterns - Array to accumulate detected pattern descriptions * @param currentSeverity - Current highest severity level */ private static checkInjectionPatterns( originalContent: string, normalizedContent: string, detectedPatterns: string[], currentSeverity: SecuritySeverity ): { sanitized: string; highestSeverity: SecuritySeverity; } { let sanitized = normalizedContent; let highestSeverity = currentSeverity; for (const { pattern, severity, description } of this.INJECTION_PATTERNS) { // Check pattern on original content (before normalization) to catch encoded attacks if (RegexValidator.validate(originalContent, pattern, { maxLength: 50000, rejectDangerousPatterns: false, logEvents: false })) { detectedPatterns.push(description); // Update highest severity if (severity === 'critical' || (severity === 'high' && highestSeverity !== 'critical')) { highestSeverity = severity; } // Log security event SecurityMonitor.logSecurityEvent({ type: 'CONTENT_INJECTION_ATTEMPT', severity: severity.toUpperCase() as 'HIGH' | 'CRITICAL', source: 'content_validation', details: `Detected pattern: ${description}`, }); // Record in telemetry SecurityTelemetry.recordBlockedAttack( 'CONTENT_INJECTION', description, severity.toUpperCase() as 'HIGH' | 'CRITICAL', 'content_validation', { pattern: pattern.source } ); // Apply replacement to normalized content sanitized = sanitized.replace(pattern, '[CONTENT_BLOCKED]'); } } return { sanitized, highestSeverity }; } /** * Validates and sanitizes persona content for security threats * FIX #1269: Added options to support large memory content * REFACTOR: Reduced cognitive complexity by extracting helper methods */ static validateAndSanitize(content: string, options: ContentValidatorOptions = {}): ContentValidationResult { // Length validation before pattern matching (unless explicitly skipped for memories) if (!options.skipSizeCheck) { const maxLength = options.maxLength || SECURITY_LIMITS.MAX_CONTENT_LENGTH; if (content.length > maxLength) { throw new SecurityError( `Content exceeds maximum length of ${maxLength} characters (${content.length} provided)` ); } } const detectedPatterns: string[] = []; // Handle Unicode validation (normalizes content but doesn't abort) const unicodeCheck = this.handleUnicodeValidation(content, detectedPatterns); // Check for injection patterns on ORIGINAL content (to catch encoded attacks) // but apply replacements to NORMALIZED content (to preserve normalization) const injectionCheck = this.checkInjectionPatterns( content, unicodeCheck.sanitized, detectedPatterns, unicodeCheck.highestSeverity ); // Use highest severity from either Unicode or injection checks const finalSeverity = injectionCheck.highestSeverity; // Abort if high/critical threats detected if (finalSeverity === 'high' || finalSeverity === 'critical') { return { isValid: false, sanitizedContent: injectionCheck.sanitized, detectedPatterns, severity: finalSeverity }; } return { isValid: detectedPatterns.length === 0, sanitizedContent: injectionCheck.sanitized, detectedPatterns, severity: finalSeverity }; } /** * Validates YAML frontmatter for malicious content * SECURITY FIX #364: Added YAML bomb detection to prevent denial of service */ static validateYamlContent(yamlContent: string): boolean { // Length validation before pattern matching if (yamlContent.length > SECURITY_LIMITS.MAX_YAML_LENGTH) { SecurityMonitor.logSecurityEvent({ type: 'YAML_INJECTION_ATTEMPT', severity: 'HIGH', source: 'yaml_validation', details: `YAML content exceeds maximum length: ${yamlContent.length} > ${SECURITY_LIMITS.MAX_YAML_LENGTH}` }); return false; } // SECURITY FIX #364: Check for YAML bombs before other validation // SECURITY FIX (PR #552 review): Use RegexValidator for ReDoS protection for (const pattern of this.YAML_BOMB_PATTERNS) { // Use RegexValidator to safely check patterns with timeout protection // This prevents ReDoS attacks from maliciously crafted YAML const isMatch = RegexValidator.validate(yamlContent, pattern, { maxLength: SECURITY_LIMITS.MAX_YAML_LENGTH, rejectDangerousPatterns: false, // Our patterns are trusted logEvents: false // We handle logging ourselves }); if (isMatch) { SecurityMonitor.logSecurityEvent({ type: 'YAML_INJECTION_ATTEMPT', severity: 'CRITICAL', source: 'yaml_bomb_detection', details: `YAML bomb pattern detected: ${pattern.source}`, metadata: { patternType: 'YAML_BOMB', contentLength: yamlContent.length } }); // Record in telemetry SecurityTelemetry.recordBlockedAttack( 'YAML_BOMB', `YAML bomb pattern: ${pattern.source}`, 'CRITICAL', 'yaml_validation', { patternType: 'YAML_BOMB', contentLength: yamlContent.length } ); return false; } } // SECURITY FIX #364: Count anchor/alias ratio for amplification detection // SECURITY FIX #1298: Use configurable threshold for easier tuning const anchorMatches = yamlContent.match(/&\w+/g) || []; const aliasMatches = yamlContent.match(/\*\w+/g) || []; const amplificationRatio = anchorMatches.length > 0 ? aliasMatches.length / anchorMatches.length : 0; if (amplificationRatio > SECURITY_LIMITS.YAML_BOMB_AMPLIFICATION_THRESHOLD) { SecurityMonitor.logSecurityEvent({ type: 'YAML_INJECTION_ATTEMPT', severity: 'HIGH', source: 'yaml_amplification_detection', details: `Excessive alias amplification detected: ${aliasMatches.length} aliases for ${anchorMatches.length} anchors (ratio: ${amplificationRatio.toFixed(2)})`, metadata: { anchors: anchorMatches.length, aliases: aliasMatches.length, ratio: amplificationRatio } }); return false; } // SECURITY FIX #364: Detect circular reference chains // SECURITY FIX (PR #552 review): Optimized from O(nยฒ) to O(n) using Set-based lookups const anchorRefs = new Map<string, Set<string>>(); const lines = yamlContent.split('\n'); // First pass: Build reference map efficiently for (let i = 0; i < lines.length; i++) { const anchorMatch = lines[i].match(/&(\w+)/); if (anchorMatch) { const anchorName = anchorMatch[1]; // Get references in next 5 lines const contextEnd = Math.min(i + 5, lines.length); const references = new Set<string>(); for (let j = i; j < contextEnd; j++) { const aliasMatches = lines[j].match(/\*(\w+)/g); if (aliasMatches) { aliasMatches.forEach(alias => { references.add(alias.substring(1)); // Remove * prefix }); } } anchorRefs.set(anchorName, references); } } // Second pass: Check for circular references (O(n) with Set lookups) for (const [anchor1, refs1] of anchorRefs) { for (const refAnchor of refs1) { const refs2 = anchorRefs.get(refAnchor); // Check if the referenced anchor references back to the original if (refs2 && refs2.has(anchor1)) { SecurityMonitor.logSecurityEvent({ type: 'YAML_INJECTION_ATTEMPT', severity: 'CRITICAL', source: 'yaml_bomb_detection', details: `Circular reference chain detected between anchors: &${anchor1} and &${refAnchor}`, metadata: { patternType: 'CIRCULAR_REFERENCE', anchors: [anchor1, refAnchor] } }); return false; } } } // Unicode normalization preprocessing for YAML content const unicodeResult = UnicodeValidator.normalize(yamlContent); const normalizedYaml = unicodeResult.normalizedContent; if (!unicodeResult.isValid && unicodeResult.detectedIssues) { SecurityMonitor.logSecurityEvent({ type: 'YAML_UNICODE_ATTACK', severity: (unicodeResult.severity?.toUpperCase() || 'MEDIUM') as 'LOW' | 'MEDIUM' | 'HIGH' | 'CRITICAL', source: 'yaml_validation', details: `Unicode attack detected in YAML: ${unicodeResult.detectedIssues.join(', ')}` }); return false; } for (const pattern of this.MALICIOUS_YAML_PATTERNS) { // These are trusted internal patterns, so we disable ReDoS rejection if (RegexValidator.validate(normalizedYaml, pattern, { maxLength: 10000, rejectDangerousPatterns: false, logEvents: false // Don't log our own security patterns as dangerous })) { SecurityMonitor.logSecurityEvent({ type: 'YAML_INJECTION_ATTEMPT', severity: 'CRITICAL', source: 'yaml_validation', details: `Malicious YAML pattern detected: ${pattern}`, }); // Early exit on first match for performance return false; } } return true; } /** * Validates persona metadata fields */ static validateMetadata(metadata: any): ContentValidationResult { const detectedPatterns: string[] = []; // Check all string fields in metadata const checkField = (fieldName: string, value: any) => { if (typeof value === 'string') { // Check field length first if (value.length > SECURITY_LIMITS.MAX_METADATA_FIELD_LENGTH) { detectedPatterns.push(`${fieldName}: Field exceeds maximum length of ${SECURITY_LIMITS.MAX_METADATA_FIELD_LENGTH} characters`); return; } const result = this.validateAndSanitize(value); if (!result.isValid || result.detectedPatterns?.length) { detectedPatterns.push(`${fieldName}: ${result.detectedPatterns?.join(', ')}`); } } }; // Validate standard persona fields checkField('name', metadata.name); checkField('description', metadata.description); checkField('category', metadata.category); checkField('author', metadata.author); // Check any custom fields for (const [key, value] of Object.entries(metadata)) { if (!['name', 'description', 'category', 'author'].includes(key)) { checkField(key, value); } } return { isValid: detectedPatterns.length === 0, detectedPatterns, severity: detectedPatterns.length > 0 ? 'high' : 'low' }; } /** * Sanitizes a complete persona file (frontmatter + content) */ static sanitizePersonaContent(content: string): string { // Extract frontmatter const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/); if (!frontmatterMatch) { // No frontmatter, just validate content const result = this.validateAndSanitize(content); if (!result.isValid && result.severity === 'critical') { // FIX: Include specific patterns that triggered the rejection for debugging const patterns = result.detectedPatterns?.join(', ') || 'unknown patterns'; throw new SecurityError(`Critical security threat detected in persona content: ${patterns}`); } return result.sanitizedContent || content; } const yamlContent = frontmatterMatch[1]; const markdownContent = content.substring(frontmatterMatch[0].length); // Validate YAML if (!this.validateYamlContent(yamlContent)) { throw new SecurityError('Malicious YAML detected in persona frontmatter'); } // Validate markdown content const contentResult = this.validateAndSanitize(markdownContent); if (!contentResult.isValid && contentResult.severity === 'critical') { // FIX: Include specific patterns that triggered the rejection for debugging const patterns = contentResult.detectedPatterns?.join(', ') || 'unknown patterns'; throw new SecurityError(`Critical security threat detected in persona content: ${patterns}`); } // Return sanitized content return `---\n${yamlContent}\n---${contentResult.sanitizedContent || markdownContent}`; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DollhouseMCP/DollhouseMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server