Git MCP Server

Overview Schema Related Servers Score Discussions

sanitization.ts•23.6 KiB

/** * @fileoverview Provides a comprehensive `Sanitization` class for various input cleaning and validation tasks. * This module includes utilities for sanitizing HTML, strings, URLs, file paths, JSON, numbers, * and for redacting sensitive information from data intended for logging. * The path sanitization utilities are only available in a Node.js environment. * @module src/utils/security/sanitization */ import sanitizeHtml from 'sanitize-html'; import validator from 'validator'; import { JsonRpcErrorCode, McpError } from '@/types-global/errors.js'; import { logger, requestContextService } from '@/utils/index.js'; const isServerless = typeof process === 'undefined' || process.env.IS_SERVERLESS === 'true'; // Dynamically import 'path' only in non-serverless environments let pathModule: typeof import('path') | undefined; if (!isServerless) { import('path') .then((mod) => { pathModule = mod.default; }) .catch(() => { // This might happen in some bundlers, but we have the guard. }); } /** * Defines options for path sanitization to control how file paths are processed and validated. */ export interface PathSanitizeOptions { /** If provided, restricts sanitized paths to be relative to this directory. */ rootDir?: string; /** If true, normalizes Windows backslashes to POSIX forward slashes. */ toPosix?: boolean; /** If true, absolute paths are permitted (subject to `rootDir`). Default: false. */ allowAbsolute?: boolean; } /** * Contains information about a path sanitization operation. */ export interface SanitizedPathInfo { /** The final sanitized and normalized path string. */ sanitizedPath: string; /** The original path string before any processing. */ originalInput: string; /** True if the input path was absolute after initial normalization. */ wasAbsolute: boolean; /** True if an absolute path was converted to relative due to `allowAbsolute: false`. */ convertedToRelative: boolean; /** The effective options used for sanitization, including defaults. */ optionsUsed: PathSanitizeOptions; } /** * Defines options for context-specific string sanitization. */ export interface SanitizeStringOptions { /** The context in which the string will be used. 'javascript' is disallowed. */ context?: 'text' | 'html' | 'attribute' | 'url' | 'javascript'; /** Custom allowed HTML tags if `context` is 'html'. */ allowedTags?: string[]; /** Custom allowed HTML attributes if `context` is 'html'. */ allowedAttributes?: Record<string, string[]>; } /** * Configuration options for HTML sanitization, mirroring `sanitize-html` library options. */ export interface HtmlSanitizeConfig { /** An array of allowed HTML tag names. */ allowedTags?: string[]; /** Specifies allowed attributes, either globally or per tag. */ allowedAttributes?: sanitizeHtml.IOptions['allowedAttributes']; /** If true, HTML comments are preserved. */ preserveComments?: boolean; /** Custom functions to transform tags during sanitization. */ transformTags?: sanitizeHtml.IOptions['transformTags']; } /** * A singleton class providing various methods for input sanitization. * Aims to protect against common vulnerabilities like XSS and path traversal. */ export class Sanitization { /** @private */ private static instance: Sanitization; /** * Default list of field names considered sensitive for log redaction. * Case-insensitive matching is applied. * @private */ private sensitiveFields: string[] = [ 'password', 'token', 'secret', 'apiKey', 'credential', 'jwt', 'ssn', 'cvv', 'authorization', 'cookie', 'clientsecret', 'client_secret', 'private_key', 'privatekey', ]; /** * Default configuration for HTML sanitization. * @private */ private defaultHtmlSanitizeConfig: HtmlSanitizeConfig = { allowedTags: [ // === Structure & Sectioning === 'div', 'span', 'p', 'br', 'hr', 'header', 'footer', 'nav', 'article', 'section', 'aside', // === Headings & Text Content === 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'strong', 'em', 'b', 'i', 'strike', 'blockquote', // === Code === 'code', 'pre', // === Lists === 'ul', 'ol', 'li', // === Tables === 'table', 'thead', 'tbody', 'tr', 'th', 'td', // === Media & Links === 'a', 'img', 'figure', 'figcaption', ], allowedAttributes: { a: ['href', 'name', 'target', 'rel', 'title'], img: ['src', 'alt', 'title', 'width', 'height', 'loading'], // Allow data attributes, class, id, and style on all tags '*': ['class', 'id', 'style', 'data-*'], // Table-specific attributes th: ['scope'], td: ['colspan', 'rowspan'], }, preserveComments: true, }; /** @private */ private constructor() {} /** * Retrieves the singleton instance of the `Sanitization` class. * @returns The singleton `Sanitization` instance. */ public static getInstance(): Sanitization { if (!Sanitization.instance) { Sanitization.instance = new Sanitization(); } return Sanitization.instance; } /** * Sets or extends the list of sensitive field names for log sanitization. * @param fields - An array of field names to add to the sensitive list. */ public setSensitiveFields(fields: string[]): void { this.sensitiveFields = [ ...new Set([ ...this.sensitiveFields, ...fields.map((f) => f.toLowerCase()), ]), ]; const logContext = requestContextService.createRequestContext({ operation: 'Sanitization.setSensitiveFields', additionalContext: { newSensitiveFieldCount: this.sensitiveFields.length, }, }); logger.debug( 'Updated sensitive fields list for log sanitization', logContext, ); } /** * Gets a copy of the current list of sensitive field names. * @returns An array of sensitive field names. */ public getSensitiveFields(): string[] { return [...this.sensitiveFields]; } /** * Gets a pino-compliant copy of the current list of sensitive field names. * @returns A pino-compliant array of sensitive field names. */ public getSensitivePinoFields(): string[] { return this.sensitiveFields.map((field) => field.replace(/[-_]/g, '')); } /** * Sanitizes an HTML string by removing potentially malicious tags and attributes. * @param input - The HTML string to sanitize. * @param config - Optional custom configuration for `sanitize-html`. * @returns The sanitized HTML string. Returns an empty string if input is falsy. */ public sanitizeHtml(input: string, config?: HtmlSanitizeConfig): string { if (!input) return ''; const effectiveConfig = { allowedTags: config?.allowedTags ?? this.defaultHtmlSanitizeConfig.allowedTags, allowedAttributes: config?.allowedAttributes ?? this.defaultHtmlSanitizeConfig.allowedAttributes, transformTags: config?.transformTags, // Can be undefined preserveComments: config?.preserveComments ?? this.defaultHtmlSanitizeConfig.preserveComments, }; const options: sanitizeHtml.IOptions = { allowedTags: effectiveConfig.allowedTags, allowedAttributes: effectiveConfig.allowedAttributes, transformTags: effectiveConfig.transformTags, }; if (effectiveConfig.preserveComments) { // Ensure allowedTags is an array before spreading const baseTags = Array.isArray(options.allowedTags) ? options.allowedTags : []; options.allowedTags = [...baseTags, '!--']; } return sanitizeHtml(input, options); } /** * Sanitizes a string based on its intended context (e.g., HTML, URL, text). * **Important:** `context: 'javascript'` is disallowed due to security risks. * * @param input - The string to sanitize. * @param options - Options specifying the sanitization context. * @returns The sanitized string. Returns an empty string if input is falsy. * @throws {McpError} If `options.context` is 'javascript', or URL validation fails. */ public sanitizeString( input: string, options: SanitizeStringOptions = {}, ): string { if (!input) return ''; const context = options.context ?? 'text'; switch (context) { case 'html': { const config: HtmlSanitizeConfig = {}; if (options.allowedTags) { config.allowedTags = options.allowedTags; } if (options.allowedAttributes) { config.allowedAttributes = this.convertAttributesFormat( options.allowedAttributes, ); } return this.sanitizeHtml(input, config); } case 'attribute': return sanitizeHtml(input, { allowedTags: [], allowedAttributes: {} }); case 'url': if ( !validator.isURL(input, { protocols: ['http', 'https'], require_protocol: true, require_host: true, }) ) { logger.warning( 'Potentially invalid URL detected during string sanitization (context: url)', requestContextService.createRequestContext({ operation: 'Sanitization.sanitizeString.urlWarning', additionalContext: { invalidUrlAttempt: input }, }), ); return ''; } return validator.trim(input); case 'javascript': logger.error( 'Attempted JavaScript sanitization via sanitizeString, which is disallowed.', requestContextService.createRequestContext({ operation: 'Sanitization.sanitizeString.jsAttempt', additionalContext: { inputSnippet: input.substring(0, 50) }, }), ); throw new McpError( JsonRpcErrorCode.ValidationError, 'JavaScript sanitization is not supported through sanitizeString due to security risks.', ); case 'text': default: return sanitizeHtml(input, { allowedTags: [], allowedAttributes: {} }); } } /** * Converts attribute format for `sanitizeHtml`. * @param attrs - Attributes in `{ tagName: ['attr1'] }` format. * @returns Attributes in `sanitize-html` expected format. * @private */ private convertAttributesFormat( attrs: Record<string, string[]>, ): sanitizeHtml.IOptions['allowedAttributes'] { return attrs; } /** * Sanitizes a URL string by validating its format and protocol. * @param input - The URL string to sanitize. * @param allowedProtocols - Array of allowed URL protocols. Default: `['http', 'https']`. * @returns The sanitized and trimmed URL string. * @throws {McpError} If the URL is invalid or uses a disallowed protocol. */ public sanitizeUrl( input: string, allowedProtocols: string[] = ['http', 'https'], ): string { try { const trimmedInput = input.trim(); if ( !validator.isURL(trimmedInput, { protocols: allowedProtocols, require_protocol: true, require_host: true, }) ) { throw new Error('Invalid URL format or protocol not in allowed list.'); } const lowercasedInput = trimmedInput.toLowerCase(); if ( lowercasedInput.startsWith('javascript:') || lowercasedInput.startsWith('data:') || lowercasedInput.startsWith('vbscript:') ) { throw new Error( 'Disallowed pseudo-protocol (javascript:, data:, or vbscript:) in URL.', ); } return trimmedInput; } catch (error) { throw new McpError( JsonRpcErrorCode.ValidationError, error instanceof Error ? error.message : 'Invalid or unsafe URL provided.', { input }, ); } } /** * Sanitizes a file path to prevent path traversal and normalize format. * This method is only available in a Node.js environment. * @param input - The file path string to sanitize. * @param options - Options to control sanitization behavior. * @returns An object with the sanitized path and sanitization metadata. * @throws {McpError} If the path is invalid, unsafe, or method is called in a non-Node.js environment. */ public sanitizePath( input: string, options: PathSanitizeOptions = {}, ): SanitizedPathInfo { if (isServerless || !pathModule) { throw new McpError( JsonRpcErrorCode.InternalError, 'File-based path sanitization is not supported in this environment.', ); } const path = pathModule; const originalInput = input; const resolvedRootDir = options.rootDir ? path.resolve(options.rootDir) : undefined; const effectiveOptions: PathSanitizeOptions = { toPosix: options.toPosix ?? false, allowAbsolute: options.allowAbsolute ?? false, ...(resolvedRootDir && { rootDir: resolvedRootDir }), }; let wasAbsoluteInitially = false; try { if (!input || typeof input !== 'string') throw new Error('Invalid path input: must be a non-empty string.'); if (input.includes('\0')) throw new Error('Path contains null byte, which is disallowed.'); let normalized = path.normalize(input); wasAbsoluteInitially = path.isAbsolute(normalized); if (effectiveOptions.toPosix) { normalized = normalized.replace(/\\/g, '/'); } let finalSanitizedPath: string; if (resolvedRootDir) { const fullPath = path.resolve(resolvedRootDir, normalized); if ( !fullPath.startsWith(resolvedRootDir + path.sep) && fullPath !== resolvedRootDir ) { throw new Error( 'Path traversal detected: attempts to escape the defined root directory.', ); } finalSanitizedPath = path.relative(resolvedRootDir, fullPath); finalSanitizedPath = finalSanitizedPath === '' ? '.' : finalSanitizedPath; if ( path.isAbsolute(finalSanitizedPath) && !effectiveOptions.allowAbsolute ) { throw new Error( 'Path resolved to absolute outside root when absolute paths are disallowed.', ); } } else { if (path.isAbsolute(normalized)) { if (!effectiveOptions.allowAbsolute) { throw new Error( 'Absolute paths are disallowed by current options.', ); } else { finalSanitizedPath = normalized; } } else { const resolvedAgainstCwd = path.resolve(normalized); const currentWorkingDir = path.resolve('.'); if ( !resolvedAgainstCwd.startsWith(currentWorkingDir + path.sep) && resolvedAgainstCwd !== currentWorkingDir ) { throw new Error( 'Relative path traversal detected (escapes current working directory context).', ); } finalSanitizedPath = normalized; } } return { sanitizedPath: finalSanitizedPath, originalInput, wasAbsolute: wasAbsoluteInitially, convertedToRelative: wasAbsoluteInitially && !path.isAbsolute(finalSanitizedPath) && !effectiveOptions.allowAbsolute, optionsUsed: effectiveOptions, }; } catch (error) { logger.warning( 'Path sanitization error', requestContextService.createRequestContext({ operation: 'Sanitization.sanitizePath.error', additionalContext: { originalPathInput: originalInput, pathOptionsUsed: effectiveOptions, errorMessage: error instanceof Error ? error.message : String(error), }, }), ); throw new McpError( JsonRpcErrorCode.ValidationError, error instanceof Error ? error.message : 'Invalid or unsafe path provided.', { input: originalInput }, ); } } /** * Sanitizes a JSON string by parsing it to validate its format. * Optionally checks if the JSON string exceeds a maximum allowed size. * @template T The expected type of the parsed JSON object. Defaults to `unknown`. * @param input - The JSON string to sanitize/validate. * @param maxSize - Optional maximum allowed size of the JSON string in bytes. * @returns The parsed JavaScript object. * @throws {McpError} If input is not a string, too large, or invalid JSON. */ public sanitizeJson<T = unknown>(input: string, maxSize?: number): T { try { if (typeof input !== 'string') throw new Error('Invalid input: expected a JSON string.'); // Cross-environment byte length computation const computeBytes = (s: string): number => { if ( typeof Buffer !== 'undefined' && typeof Buffer.byteLength === 'function' ) { return Buffer.byteLength(s, 'utf8'); } if (typeof TextEncoder !== 'undefined') { return new TextEncoder().encode(s).length; } return s.length; }; if (maxSize !== undefined && computeBytes(input) > maxSize) { throw new McpError( JsonRpcErrorCode.ValidationError, `JSON string exceeds maximum allowed size of ${maxSize} bytes.`, { actualSize: computeBytes(input), maxSize }, ); } return JSON.parse(input) as T; } catch (error) { if (error instanceof McpError) throw error; throw new McpError( JsonRpcErrorCode.ValidationError, error instanceof Error ? error.message : 'Invalid JSON format.', { inputPreview: input.length > 100 ? `${input.substring(0, 100)}...` : input, }, ); } } /** * Validates and sanitizes a numeric input, converting strings to numbers. * Clamps the number to `min`/`max` if provided. * @param input - The number or string to validate and sanitize. * @param min - Minimum allowed value (inclusive). * @param max - Maximum allowed value (inclusive). * @returns The sanitized (and potentially clamped) number. * @throws {McpError} If input is not a valid number, NaN, or Infinity. */ public sanitizeNumber( input: number | string, min?: number, max?: number, ): number { let value: number; if (typeof input === 'string') { const trimmedInput = input.trim(); if (trimmedInput === '' || !validator.isNumeric(trimmedInput)) { throw new McpError( JsonRpcErrorCode.ValidationError, 'Invalid number format: input is empty or not numeric.', { input }, ); } value = parseFloat(trimmedInput); } else if (typeof input === 'number') { value = input; } else { throw new McpError( JsonRpcErrorCode.ValidationError, 'Invalid input type: expected number or string.', { input: String(input) }, ); } if (isNaN(value) || !isFinite(value)) { throw new McpError( JsonRpcErrorCode.ValidationError, 'Invalid number value (NaN or Infinity).', { input }, ); } let clamped = false; const originalValueForLog = value; if (min !== undefined && value < min) { value = min; clamped = true; } if (max !== undefined && value > max) { value = max; clamped = true; } if (clamped) { logger.debug( 'Number clamped to range.', requestContextService.createRequestContext({ operation: 'Sanitization.sanitizeNumber.clamped', additionalContext: { originalInput: String(input), parsedValue: originalValueForLog, minValue: min, maxValue: max, clampedValue: value, }, }), ); } return value; } /** * Sanitizes input for logging by redacting sensitive fields. * Creates a deep clone and replaces values of fields matching `this.sensitiveFields` * (case-insensitive substring match) with "[REDACTED]". * * It uses `structuredClone` if available for a high-fidelity deep clone. * If `structuredClone` is not available (e.g., in older Node.js environments), * it falls back to `JSON.parse(JSON.stringify(input))`. This fallback has limitations: * - `Date` objects are converted to ISO date strings. * - `undefined` values within objects are removed. * - `Map`, `Set`, `RegExp` objects are converted to empty objects (`{}`). * - Functions are removed. * - `BigInt` values will throw an error during `JSON.stringify` unless a `toJSON` method is provided. * - Circular references will cause `JSON.stringify` to throw an error. * * @param input - The input data to sanitize for logging. * @returns A sanitized (deep cloned) version of the input, safe for logging. * Returns original input if not object/array, or "[Log Sanitization Failed]" on error. */ public sanitizeForLogging(input: unknown): unknown { try { if (!input || typeof input !== 'object') return input; const clonedInput: unknown = typeof globalThis.structuredClone === 'function' ? globalThis.structuredClone(input) : JSON.parse(JSON.stringify(input)); this.redactSensitiveFields(clonedInput); return clonedInput; } catch (error) { logger.error( 'Error during log sanitization, returning placeholder.', requestContextService.createRequestContext({ operation: 'Sanitization.sanitizeForLogging.error', additionalContext: { errorMessage: error instanceof Error ? error.message : String(error), }, }), ); return '[Log Sanitization Failed]'; } } /** * Recursively redacts sensitive fields in an object or array in place. * @param obj - The object or array to redact. * @private */ private redactSensitiveFields(obj: unknown): void { if (!obj || typeof obj !== 'object') return; if (Array.isArray(obj)) { obj.forEach((item) => this.redactSensitiveFields(item)); return; } const normalize = (str: string): string => str.toLowerCase().replace(/[^a-z0-9]/g, ''); const normalizedSensitiveSet = new Set( this.sensitiveFields.map((f) => normalize(f)).filter(Boolean), ); const wordSensitiveSet = new Set( this.sensitiveFields.map((f) => f.toLowerCase()).filter(Boolean), ); for (const key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) { const value = (obj as Record<string, unknown>)[key]; const normalizedKey = normalize(key); // Split into words for token-based matching (camelCase, snake_case, kebab-case) const keyWords = key .replace(/([A-Z])/g, ' $1') .toLowerCase() .split(/[\s_-]+/) .filter(Boolean); const isExactSensitive = normalizedSensitiveSet.has(normalizedKey); const isWordSensitive = keyWords.some((w) => wordSensitiveSet.has(w)); const isSensitive = isExactSensitive || isWordSensitive; if (isSensitive) { (obj as Record<string, unknown>)[key] = '[REDACTED]'; } else if (value && typeof value === 'object') { this.redactSensitiveFields(value); } } } } } /** * Singleton instance of the `Sanitization` class. * Use this for all input sanitization tasks. */ export const sanitization = Sanitization.getInstance(); /** * Convenience function calling `sanitization.sanitizeForLogging`. * @param input - The input data to sanitize. * @returns A sanitized version of the input, safe for logging. */ export const sanitizeInputForLogging = (input: unknown): unknown => sanitization.sanitizeForLogging(input);

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cyanheads/git-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

sanitization.ts•23.6 KiB