Skip to main content
Glama

DollhouseMCP

by DollhouseMCP
unicodeValidator.test.tsβ€’16.2 kB
/** * Unicode Validator Tests * * Tests Unicode attack prevention including: * - Homograph attacks * - Direction override attacks * - Mixed script attacks * - Zero-width character injection * - Unicode normalization bypasses */ import { UnicodeValidator } from '../../../../src/security/validators/unicodeValidator.js'; describe('UnicodeValidator', () => { describe('normalize', () => { test('should pass through normal ASCII content unchanged', () => { const content = 'Hello World! This is normal ASCII text.'; const result = UnicodeValidator.normalize(content); expect(result.isValid).toBe(true); expect(result.normalizedContent).toBe(content); expect(result.detectedIssues).toBeUndefined(); expect(result.severity).toBeUndefined(); }); test('should normalize Unicode to NFC form', () => { // Γ© can be represented as single character (Γ©) or as e + combining accent (Γ©) const composed = 'cafΓ©'; // Single character Γ© const decomposed = 'cafe\u0301'; // e + combining acute accent const result1 = UnicodeValidator.normalize(composed); const result2 = UnicodeValidator.normalize(decomposed); expect(result1.normalizedContent).toBe(result2.normalizedContent); expect(result1.normalizedContent).toBe('cafΓ©'); }); }); describe('Direction Override Attack Prevention', () => { test('should detect and remove RLO (Right-to-Left Override) characters', () => { const maliciousContent = 'admin\u202Eeval\u202Dpassword'; // RLO chars const result = UnicodeValidator.normalize(maliciousContent); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('adminevalpassword'); expect(result.detectedIssues).toContain('Direction override characters detected'); expect(result.severity).toBe('high'); }); test('should detect and remove LRO (Left-to-Right Override) characters', () => { const maliciousContent = 'test\u202Dhidden\u202Ccontent'; // LRO chars const result = UnicodeValidator.normalize(maliciousContent); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('testhiddencontent'); expect(result.detectedIssues).toContain('Direction override characters detected'); expect(result.severity).toBe('high'); }); test('should detect bidirectional isolate characters', () => { const maliciousContent = 'safe\u2066dangerous\u2069content'; // FSI/PDI const result = UnicodeValidator.normalize(maliciousContent); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('safedangerouscontent'); expect(result.detectedIssues).toContain('Direction override characters detected'); }); }); describe('Zero-Width Character Attack Prevention', () => { test('should detect and remove zero-width spaces', () => { const maliciousContent = 'admin\u200Bpassword\u200Ceval'; // ZWSP, ZWNJ const result = UnicodeValidator.normalize(maliciousContent); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('adminpasswordeval'); expect(result.detectedIssues).toContain('Zero-width or non-printable characters detected'); expect(result.severity).toBe('medium'); }); test('should detect line and paragraph separators', () => { const maliciousContent = 'line1\u2028line2\u2029paragraph'; // LS, PS const result = UnicodeValidator.normalize(maliciousContent); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('line1line2paragraph'); expect(result.detectedIssues).toContain('Zero-width or non-printable characters detected'); }); test('should detect BOM and non-characters', () => { const maliciousContent = '\uFEFFtest\uFFFEcontent\uFFFF'; // BOM, non-chars const result = UnicodeValidator.normalize(maliciousContent); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('testcontent'); expect(result.detectedIssues).toContain('Zero-width or non-printable characters detected'); }); }); describe('Homograph Attack Prevention', () => { test('should normalize Cyrillic characters to Latin equivalents', () => { // Cyrillic 'Π°' looks like Latin 'a' but has different Unicode const cyrillicAttack = 'Π°dmin'; // First char is Cyrillic 'Π°' (U+0430) const result = UnicodeValidator.normalize(cyrillicAttack); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('admin'); // Normalized to Latin expect(result.detectedIssues).toContain('Confusable Unicode characters detected and normalized'); expect(result.severity).toBe('high'); // Mixed script detected first, escalates to high }); test('should normalize Greek characters to Latin equivalents', () => { const greekAttack = 'Ξ±dmin'; // Greek alpha const result = UnicodeValidator.normalize(greekAttack); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('admin'); expect(result.detectedIssues).toContain('Confusable Unicode characters detected and normalized'); }); test('should normalize Turkish dotless i', () => { const turkishAttack = 'admΔ±n'; // Turkish dotless i (Δ±) const result = UnicodeValidator.normalize(turkishAttack); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('admin'); expect(result.detectedIssues).toContain('Confusable Unicode characters detected and normalized'); }); test('should normalize mathematical styled characters', () => { const mathAttack = 'π’‚π’…π’Žπ’Šπ’'; // Mathematical script characters const result = UnicodeValidator.normalize(mathAttack); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('admin'); expect(result.detectedIssues).toContain('Confusable Unicode characters detected and normalized'); }); test('should normalize fullwidth characters', () => { const fullwidthAttack = 'ο½ο½„ο½ο½‰ο½Ž'; // Fullwidth Latin const result = UnicodeValidator.normalize(fullwidthAttack); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('admin'); expect(result.detectedIssues).toContain('Confusable Unicode characters detected and normalized'); }); }); describe('Mixed Script Attack Detection', () => { test('should detect suspicious Latin + Cyrillic mixing', () => { const mixedScript = 'adminΡ€assword'; // Latin + Cyrillic 'Ρ€' const result = UnicodeValidator.normalize(mixedScript); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('adminpassword'); // Normalized expect(result.detectedIssues).toContain('Mixed script usage detected: LATIN, CYRILLIC'); expect(result.severity).toBe('high'); }); test('should detect Latin + Greek mixing', () => { const mixedScript = 'adminΟ€assword'; // Latin + Greek Ο€ const result = UnicodeValidator.normalize(mixedScript); expect(result.isValid).toBe(false); expect(result.detectedIssues).toContain('Mixed script usage detected: LATIN, GREEK'); expect(result.severity).toBe('high'); }); test('should allow single script usage', () => { const pureGreek = 'αβγδΡ'; // Pure Greek const result = UnicodeValidator.normalize(pureGreek); // Pure Greek should be valid (no mixing) - but confusables are detected expect(result.isValid).toBe(false); // Confusables detected, but not mixed scripts expect(result.detectedIssues).toContain('Confusable Unicode characters detected and normalized'); expect(result.detectedIssues).not.toContain('Mixed script usage detected'); }); test('should detect excessive script mixing', () => { const multiScript = 'aΞ±Π°Ψ£Ψ§'; // Latin + Greek + Cyrillic + Arabic + Arabic const result = UnicodeValidator.normalize(multiScript); expect(result.isValid).toBe(false); // Script order may vary, so check if it contains the expected scripts expect(result.detectedIssues!.some(issue => issue.includes('Mixed script usage detected') && issue.includes('LATIN') && issue.includes('GREEK') && issue.includes('CYRILLIC') && issue.includes('ARABIC') )).toBe(true); expect(result.severity).toBe('high'); }); }); describe('Unicode Escape Attack Detection', () => { test('should detect excessive Unicode escapes', () => { // Simulate base64 encoded payload with many Unicode escapes const escapeAttack = '\\u0065\\u0076\\u0061\\u006c\\u0028\\u0022\\u006d\\u0061\\u006c\\u0069\\u0063\\u0069\\u006f\\u0075\\u0073\\u0022\\u0029'; const result = UnicodeValidator.normalize(escapeAttack); expect(result.isValid).toBe(false); expect(result.detectedIssues).toContain('Excessive Unicode escapes detected (17)'); expect(result.severity).toBe('high'); }); test('should allow normal Unicode escapes', () => { const normalEscape = 'Hello \\u0041 World'; // Just one Unicode A const result = UnicodeValidator.normalize(normalEscape); expect(result.isValid).toBe(true); }); }); describe('Suspicious Unicode Range Detection', () => { test('should detect Private Use Area characters', () => { const privateUse = 'test\uE000hidden\uF8FFcontent'; // PUA chars const result = UnicodeValidator.normalize(privateUse); expect(result.isValid).toBe(false); expect(result.detectedIssues).toContain('Suspicious Unicode range detected: Private Use Area'); expect(result.severity).toBe('medium'); }); test('should detect non-character codepoints', () => { const nonChar = 'test\uFDD0content\uFDEF'; // Non-characters const result = UnicodeValidator.normalize(nonChar); expect(result.isValid).toBe(false); expect(result.detectedIssues).toContain('Suspicious Unicode range detected: Non-characters'); expect(result.severity).toBe('medium'); }); }); describe('Combined Attack Scenarios', () => { test('should handle multiple attack vectors simultaneously', () => { // Combine direction override + homographs + zero-width const complexAttack = 'Π°dmin\u202E\u200Beval\u202DΟ€assword'; const result = UnicodeValidator.normalize(complexAttack); expect(result.isValid).toBe(false); expect(result.detectedIssues).toHaveLength(4); // Direction override, zero-width, mixed scripts, confusables expect(result.severity).toBe('high'); // Highest severity expect(result.normalizedContent).toBe('adminevalpassword'); }); test('should escalate severity correctly', () => { // Start with medium (confusables) then add high (direction override) const escalatedAttack = 'Π°dmin\u202Eeval'; const result = UnicodeValidator.normalize(escalatedAttack); expect(result.severity).toBe('high'); // Should escalate to high }); }); describe('Real-world Attack Simulations', () => { test('should prevent domain spoofing attack', () => { // Simulate phishing URL with Cyrillic characters const spoofedDomain = 'gΠΎogle.com'; // Cyrillic 'ΠΎ' instead of Latin 'o' const result = UnicodeValidator.normalize(spoofedDomain); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('google.com'); expect(result.detectedIssues).toContain('Confusable Unicode characters detected and normalized'); }); test('should prevent hidden command injection', () => { // Use direction override to hide malicious content const hiddenCommand = 'safe_command\u202E && rm -rf /\u202D_safe'; const result = UnicodeValidator.normalize(hiddenCommand); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('safe_command && rm -rf /_safe'); expect(result.detectedIssues).toContain('Direction override characters detected'); expect(result.severity).toBe('high'); }); test('should prevent invisible character payload injection', () => { // Hide payload using zero-width characters const invisiblePayload = 'normal\u200Beval("malicious")\u200Ctext'; const result = UnicodeValidator.normalize(invisiblePayload); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('normaleval("malicious")text'); expect(result.detectedIssues).toContain('Zero-width or non-printable characters detected'); }); }); describe('Utility Methods', () => { describe('containsDangerousUnicode', () => { test('should detect dangerous Unicode patterns quickly', () => { expect(UnicodeValidator.containsDangerousUnicode('safe text')).toBe(false); expect(UnicodeValidator.containsDangerousUnicode('text\u202Ewith RLO')).toBe(true); expect(UnicodeValidator.containsDangerousUnicode('text\u200Bwith ZWSP')).toBe(true); // Many Unicode escapes const manyEscapes = '\\u0065\\u0076\\u0061\\u006c\\u0028\\u0022\\u006d\\u0061\\u006c\\u0069\\u0063\\u0069\\u006f\\u0075\\u0073\\u0022\\u0029'; expect(UnicodeValidator.containsDangerousUnicode(manyEscapes)).toBe(true); }); }); describe('getSafePreview', () => { test('should create safe preview for logging', () => { const dangerous = 'test\u202Ewith\u200Bdangerous\u0001chars'; const preview = UnicodeValidator.getSafePreview(dangerous, 50); expect(preview).toBe('test[DIR]with[ZW]dangerous[NP]chars'); expect(preview.length).toBeLessThanOrEqual(50); }); test('should truncate long content', () => { const longText = 'a'.repeat(200); const preview = UnicodeValidator.getSafePreview(longText, 50); expect(preview.length).toBeLessThanOrEqual(53); // 50 + '...' expect(preview.endsWith('...')).toBe(true); }); }); }); describe('Error Handling', () => { test('should handle normalization errors gracefully', () => { // Mock normalize to throw an error const originalNormalize = String.prototype.normalize; String.prototype.normalize = () => { throw new Error('Normalization failed'); }; try { const result = UnicodeValidator.normalize('test content'); expect(result.isValid).toBe(false); expect(result.normalizedContent).toBe('test content'); // Fallback to original expect(result.detectedIssues).toContain('Unicode validation failed'); expect(result.severity).toBe('high'); } finally { // Restore original method String.prototype.normalize = originalNormalize; } }); }); describe('Performance', () => { test('should handle large content efficiently', () => { // Create 50KB of mixed content const largeContent = 'a'.repeat(25000) + 'Π±'.repeat(25000); // Latin + Cyrillic const startTime = process.hrtime.bigint(); const result = UnicodeValidator.normalize(largeContent); const endTime = process.hrtime.bigint(); const durationMs = Number(endTime - startTime) / 1_000_000; expect(result.isValid).toBe(false); // Mixed scripts expect(durationMs).toBeLessThan(100); // Should complete in under 100ms }); test('should be fast for normal content', () => { const normalContent = 'This is a normal English sentence with some numbers 123 and symbols !@#$%'; const startTime = process.hrtime.bigint(); const result = UnicodeValidator.normalize(normalContent); const endTime = process.hrtime.bigint(); const durationMs = Number(endTime - startTime) / 1_000_000; expect(result.isValid).toBe(true); expect(durationMs).toBeLessThan(10); // Should be very fast for normal content }); }); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DollhouseMCP/DollhouseMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server