Skip to main content
Glama

Git MCP Server

sanitization.ts24.1 kB
/** * @fileoverview Provides a comprehensive `Sanitization` class for various input cleaning and validation tasks. * This module includes utilities for sanitizing HTML, strings, URLs, file paths, JSON, numbers, * and for redacting sensitive information from data intended for logging. * The path sanitization utilities are only available in a Node.js environment. * @module src/utils/security/sanitization */ import sanitizeHtml from 'sanitize-html'; import validator from 'validator'; import { JsonRpcErrorCode, McpError } from '@/types-global/errors.js'; import { logger, requestContextService } from '@/utils/index.js'; const isServerless = typeof process === 'undefined' || process.env.IS_SERVERLESS === 'true'; // Dynamically import 'path' only in non-serverless environments let pathModule: typeof import('path') | undefined; if (!isServerless) { import('path') .then((mod) => { pathModule = mod.default; }) .catch(() => { // This might happen in some bundlers, but we have the guard. }); } /** * Defines options for path sanitization to control how file paths are processed and validated. */ export interface PathSanitizeOptions { /** If provided, restricts sanitized paths to be relative to this directory. */ rootDir?: string; /** If true, normalizes Windows backslashes to POSIX forward slashes. */ toPosix?: boolean; /** If true, absolute paths are permitted (subject to `rootDir`). Default: false. */ allowAbsolute?: boolean; } /** * Contains information about a path sanitization operation. */ export interface SanitizedPathInfo { /** The final sanitized and normalized path string. */ sanitizedPath: string; /** The original path string before any processing. */ originalInput: string; /** True if the input path was absolute after initial normalization. */ wasAbsolute: boolean; /** True if an absolute path was converted to relative due to `allowAbsolute: false`. */ convertedToRelative: boolean; /** The effective options used for sanitization, including defaults. */ optionsUsed: PathSanitizeOptions; } /** * Defines options for context-specific string sanitization. */ export interface SanitizeStringOptions { /** The context in which the string will be used. 'javascript' is disallowed. */ context?: 'text' | 'html' | 'attribute' | 'url' | 'javascript'; /** Custom allowed HTML tags if `context` is 'html'. */ allowedTags?: string[]; /** Custom allowed HTML attributes if `context` is 'html'. */ allowedAttributes?: Record<string, string[]>; } /** * Configuration options for HTML sanitization, mirroring `sanitize-html` library options. */ export interface HtmlSanitizeConfig { /** An array of allowed HTML tag names. */ allowedTags?: string[]; /** Specifies allowed attributes, either globally or per tag. */ allowedAttributes?: sanitizeHtml.IOptions['allowedAttributes']; /** If true, HTML comments are preserved. */ preserveComments?: boolean; /** Custom functions to transform tags during sanitization. */ transformTags?: sanitizeHtml.IOptions['transformTags']; } /** * A singleton class providing various methods for input sanitization. * Aims to protect against common vulnerabilities like XSS and path traversal. */ export class Sanitization { /** @private */ private static instance: Sanitization; /** * Default list of field names considered sensitive for log redaction. * Case-insensitive matching is applied. * @private */ private sensitiveFields: string[] = [ 'password', 'token', 'secret', 'apiKey', 'credential', 'jwt', 'ssn', 'cvv', 'authorization', 'cookie', 'clientsecret', 'client_secret', 'private_key', 'privatekey', ]; /** * Default configuration for HTML sanitization. * @private */ private defaultHtmlSanitizeConfig: HtmlSanitizeConfig = { allowedTags: [ // === Structure & Sectioning === 'div', 'span', 'p', 'br', 'hr', 'header', 'footer', 'nav', 'article', 'section', 'aside', // === Headings & Text Content === 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'strong', 'em', 'b', 'i', 'strike', 'blockquote', // === Code === 'code', 'pre', // === Lists === 'ul', 'ol', 'li', // === Tables === 'table', 'thead', 'tbody', 'tr', 'th', 'td', // === Media & Links === 'a', 'img', 'figure', 'figcaption', ], allowedAttributes: { a: ['href', 'name', 'target', 'rel', 'title'], img: ['src', 'alt', 'title', 'width', 'height', 'loading'], // Allow data attributes, class, id, and style on all tags '*': ['class', 'id', 'style', 'data-*'], // Table-specific attributes th: ['scope'], td: ['colspan', 'rowspan'], }, preserveComments: true, }; /** @private */ private constructor() {} /** * Retrieves the singleton instance of the `Sanitization` class. * @returns The singleton `Sanitization` instance. */ public static getInstance(): Sanitization { if (!Sanitization.instance) { Sanitization.instance = new Sanitization(); } return Sanitization.instance; } /** * Sets or extends the list of sensitive field names for log sanitization. * @param fields - An array of field names to add to the sensitive list. */ public setSensitiveFields(fields: string[]): void { this.sensitiveFields = [ ...new Set([ ...this.sensitiveFields, ...fields.map((f) => f.toLowerCase()), ]), ]; const logContext = requestContextService.createRequestContext({ operation: 'Sanitization.setSensitiveFields', additionalContext: { newSensitiveFieldCount: this.sensitiveFields.length, }, }); logger.debug( 'Updated sensitive fields list for log sanitization', logContext, ); } /** * Gets a copy of the current list of sensitive field names. * @returns An array of sensitive field names. */ public getSensitiveFields(): string[] { return [...this.sensitiveFields]; } /** * Gets a pino-compliant copy of the current list of sensitive field names. * @returns A pino-compliant array of sensitive field names. */ public getSensitivePinoFields(): string[] { return this.sensitiveFields.map((field) => field.replace(/[-_]/g, '')); } /** * Sanitizes an HTML string by removing potentially malicious tags and attributes. * @param input - The HTML string to sanitize. * @param config - Optional custom configuration for `sanitize-html`. * @returns The sanitized HTML string. Returns an empty string if input is falsy. */ public sanitizeHtml(input: string, config?: HtmlSanitizeConfig): string { if (!input) return ''; const effectiveConfig = { allowedTags: config?.allowedTags ?? this.defaultHtmlSanitizeConfig.allowedTags, allowedAttributes: config?.allowedAttributes ?? this.defaultHtmlSanitizeConfig.allowedAttributes, transformTags: config?.transformTags, // Can be undefined preserveComments: config?.preserveComments ?? this.defaultHtmlSanitizeConfig.preserveComments, }; const options: sanitizeHtml.IOptions = { allowedTags: effectiveConfig.allowedTags, allowedAttributes: effectiveConfig.allowedAttributes, transformTags: effectiveConfig.transformTags, }; if (effectiveConfig.preserveComments) { // Ensure allowedTags is an array before spreading const baseTags = Array.isArray(options.allowedTags) ? options.allowedTags : []; options.allowedTags = [...baseTags, '!--']; } return sanitizeHtml(input, options); } /** * Sanitizes a string based on its intended context (e.g., HTML, URL, text). * **Important:** `context: 'javascript'` is disallowed due to security risks. * * @param input - The string to sanitize. * @param options - Options specifying the sanitization context. * @returns The sanitized string. Returns an empty string if input is falsy. * @throws {McpError} If `options.context` is 'javascript', or URL validation fails. */ public sanitizeString( input: string, options: SanitizeStringOptions = {}, ): string { if (!input) return ''; const context = options.context ?? 'text'; switch (context) { case 'html': { const config: HtmlSanitizeConfig = {}; if (options.allowedTags) { config.allowedTags = options.allowedTags; } if (options.allowedAttributes) { config.allowedAttributes = this.convertAttributesFormat( options.allowedAttributes, ); } return this.sanitizeHtml(input, config); } case 'attribute': return sanitizeHtml(input, { allowedTags: [], allowedAttributes: {} }); case 'url': if ( !validator.isURL(input, { protocols: ['http', 'https'], require_protocol: true, require_host: true, }) ) { logger.warning( 'Potentially invalid URL detected during string sanitization (context: url)', requestContextService.createRequestContext({ operation: 'Sanitization.sanitizeString.urlWarning', additionalContext: { invalidUrlAttempt: input }, }), ); return ''; } return validator.trim(input); case 'javascript': logger.error( 'Attempted JavaScript sanitization via sanitizeString, which is disallowed.', requestContextService.createRequestContext({ operation: 'Sanitization.sanitizeString.jsAttempt', additionalContext: { inputSnippet: input.substring(0, 50) }, }), ); throw new McpError( JsonRpcErrorCode.ValidationError, 'JavaScript sanitization is not supported through sanitizeString due to security risks.', ); case 'text': default: return sanitizeHtml(input, { allowedTags: [], allowedAttributes: {} }); } } /** * Converts attribute format for `sanitizeHtml`. * @param attrs - Attributes in `{ tagName: ['attr1'] }` format. * @returns Attributes in `sanitize-html` expected format. * @private */ private convertAttributesFormat( attrs: Record<string, string[]>, ): sanitizeHtml.IOptions['allowedAttributes'] { return attrs; } /** * Sanitizes a URL string by validating its format and protocol. * @param input - The URL string to sanitize. * @param allowedProtocols - Array of allowed URL protocols. Default: `['http', 'https']`. * @returns The sanitized and trimmed URL string. * @throws {McpError} If the URL is invalid or uses a disallowed protocol. */ public sanitizeUrl( input: string, allowedProtocols: string[] = ['http', 'https'], ): string { try { const trimmedInput = input.trim(); if ( !validator.isURL(trimmedInput, { protocols: allowedProtocols, require_protocol: true, require_host: true, }) ) { throw new Error('Invalid URL format or protocol not in allowed list.'); } const lowercasedInput = trimmedInput.toLowerCase(); if ( lowercasedInput.startsWith('javascript:') || lowercasedInput.startsWith('data:') || lowercasedInput.startsWith('vbscript:') ) { throw new Error( 'Disallowed pseudo-protocol (javascript:, data:, or vbscript:) in URL.', ); } return trimmedInput; } catch (error) { throw new McpError( JsonRpcErrorCode.ValidationError, error instanceof Error ? error.message : 'Invalid or unsafe URL provided.', { input }, ); } } /** * Sanitizes a file path to prevent path traversal and normalize format. * This method is only available in a Node.js environment. * @param input - The file path string to sanitize. * @param options - Options to control sanitization behavior. * @returns An object with the sanitized path and sanitization metadata. * @throws {McpError} If the path is invalid, unsafe, or method is called in a non-Node.js environment. */ public sanitizePath( input: string, options: PathSanitizeOptions = {}, ): SanitizedPathInfo { if (isServerless || !pathModule) { throw new McpError( JsonRpcErrorCode.InternalError, 'File-based path sanitization is not supported in this environment.', ); } const path = pathModule; const originalInput = input; const resolvedRootDir = options.rootDir ? path.resolve(options.rootDir) : undefined; const effectiveOptions: PathSanitizeOptions = { toPosix: options.toPosix ?? false, allowAbsolute: options.allowAbsolute ?? false, ...(resolvedRootDir && { rootDir: resolvedRootDir }), }; let wasAbsoluteInitially = false; try { if (!input || typeof input !== 'string') throw new Error('Invalid path input: must be a non-empty string.'); if (input.includes('\0')) throw new Error('Path contains null byte, which is disallowed.'); let normalized = path.normalize(input); wasAbsoluteInitially = path.isAbsolute(normalized); if (effectiveOptions.toPosix) { normalized = normalized.replace(/\\/g, '/'); } let finalSanitizedPath: string; if (resolvedRootDir) { const fullPath = path.resolve(resolvedRootDir, normalized); if ( !fullPath.startsWith(resolvedRootDir + path.sep) && fullPath !== resolvedRootDir ) { throw new Error( 'Path traversal detected: attempts to escape the defined root directory.', ); } finalSanitizedPath = path.relative(resolvedRootDir, fullPath); finalSanitizedPath = finalSanitizedPath === '' ? '.' : finalSanitizedPath; if ( path.isAbsolute(finalSanitizedPath) && !effectiveOptions.allowAbsolute ) { throw new Error( 'Path resolved to absolute outside root when absolute paths are disallowed.', ); } } else { if (path.isAbsolute(normalized)) { if (!effectiveOptions.allowAbsolute) { throw new Error( 'Absolute paths are disallowed by current options.', ); } else { finalSanitizedPath = normalized; } } else { const resolvedAgainstCwd = path.resolve(normalized); const currentWorkingDir = path.resolve('.'); if ( !resolvedAgainstCwd.startsWith(currentWorkingDir + path.sep) && resolvedAgainstCwd !== currentWorkingDir ) { throw new Error( 'Relative path traversal detected (escapes current working directory context).', ); } finalSanitizedPath = normalized; } } return { sanitizedPath: finalSanitizedPath, originalInput, wasAbsolute: wasAbsoluteInitially, convertedToRelative: wasAbsoluteInitially && !path.isAbsolute(finalSanitizedPath) && !effectiveOptions.allowAbsolute, optionsUsed: effectiveOptions, }; } catch (error) { logger.warning( 'Path sanitization error', requestContextService.createRequestContext({ operation: 'Sanitization.sanitizePath.error', additionalContext: { originalPathInput: originalInput, pathOptionsUsed: effectiveOptions, errorMessage: error instanceof Error ? error.message : String(error), }, }), ); throw new McpError( JsonRpcErrorCode.ValidationError, error instanceof Error ? error.message : 'Invalid or unsafe path provided.', { input: originalInput }, ); } } /** * Sanitizes a JSON string by parsing it to validate its format. * Optionally checks if the JSON string exceeds a maximum allowed size. * @template T The expected type of the parsed JSON object. Defaults to `unknown`. * @param input - The JSON string to sanitize/validate. * @param maxSize - Optional maximum allowed size of the JSON string in bytes. * @returns The parsed JavaScript object. * @throws {McpError} If input is not a string, too large, or invalid JSON. */ public sanitizeJson<T = unknown>(input: string, maxSize?: number): T { try { if (typeof input !== 'string') throw new Error('Invalid input: expected a JSON string.'); // Cross-environment byte length computation const computeBytes = (s: string): number => { if ( typeof Buffer !== 'undefined' && typeof Buffer.byteLength === 'function' ) { return Buffer.byteLength(s, 'utf8'); } if (typeof TextEncoder !== 'undefined') { return new TextEncoder().encode(s).length; } return s.length; }; if (maxSize !== undefined && computeBytes(input) > maxSize) { throw new McpError( JsonRpcErrorCode.ValidationError, `JSON string exceeds maximum allowed size of ${maxSize} bytes.`, { actualSize: computeBytes(input), maxSize }, ); } return JSON.parse(input) as T; } catch (error) { if (error instanceof McpError) throw error; throw new McpError( JsonRpcErrorCode.ValidationError, error instanceof Error ? error.message : 'Invalid JSON format.', { inputPreview: input.length > 100 ? `${input.substring(0, 100)}...` : input, }, ); } } /** * Validates and sanitizes a numeric input, converting strings to numbers. * Clamps the number to `min`/`max` if provided. * @param input - The number or string to validate and sanitize. * @param min - Minimum allowed value (inclusive). * @param max - Maximum allowed value (inclusive). * @returns The sanitized (and potentially clamped) number. * @throws {McpError} If input is not a valid number, NaN, or Infinity. */ public sanitizeNumber( input: number | string, min?: number, max?: number, ): number { let value: number; if (typeof input === 'string') { const trimmedInput = input.trim(); if (trimmedInput === '' || !validator.isNumeric(trimmedInput)) { throw new McpError( JsonRpcErrorCode.ValidationError, 'Invalid number format: input is empty or not numeric.', { input }, ); } value = parseFloat(trimmedInput); } else if (typeof input === 'number') { value = input; } else { throw new McpError( JsonRpcErrorCode.ValidationError, 'Invalid input type: expected number or string.', { input: String(input) }, ); } if (isNaN(value) || !isFinite(value)) { throw new McpError( JsonRpcErrorCode.ValidationError, 'Invalid number value (NaN or Infinity).', { input }, ); } let clamped = false; const originalValueForLog = value; if (min !== undefined && value < min) { value = min; clamped = true; } if (max !== undefined && value > max) { value = max; clamped = true; } if (clamped) { logger.debug( 'Number clamped to range.', requestContextService.createRequestContext({ operation: 'Sanitization.sanitizeNumber.clamped', additionalContext: { originalInput: String(input), parsedValue: originalValueForLog, minValue: min, maxValue: max, clampedValue: value, }, }), ); } return value; } /** * Sanitizes input for logging by redacting sensitive fields. * Creates a deep clone and replaces values of fields matching `this.sensitiveFields` * (case-insensitive substring match) with "[REDACTED]". * * It uses `structuredClone` if available for a high-fidelity deep clone. * If `structuredClone` is not available (e.g., in older Node.js environments), * it falls back to `JSON.parse(JSON.stringify(input))`. This fallback has limitations: * - `Date` objects are converted to ISO date strings. * - `undefined` values within objects are removed. * - `Map`, `Set`, `RegExp` objects are converted to empty objects (`{}`). * - Functions are removed. * - `BigInt` values will throw an error during `JSON.stringify` unless a `toJSON` method is provided. * - Circular references will cause `JSON.stringify` to throw an error. * * @param input - The input data to sanitize for logging. * @returns A sanitized (deep cloned) version of the input, safe for logging. * Returns original input if not object/array, or "[Log Sanitization Failed]" on error. */ public sanitizeForLogging(input: unknown): unknown { try { if (!input || typeof input !== 'object') return input; const clonedInput: unknown = typeof globalThis.structuredClone === 'function' ? globalThis.structuredClone(input) : JSON.parse(JSON.stringify(input)); this.redactSensitiveFields(clonedInput); return clonedInput; } catch (error) { logger.error( 'Error during log sanitization, returning placeholder.', requestContextService.createRequestContext({ operation: 'Sanitization.sanitizeForLogging.error', additionalContext: { errorMessage: error instanceof Error ? error.message : String(error), }, }), ); return '[Log Sanitization Failed]'; } } /** * Recursively redacts sensitive fields in an object or array in place. * @param obj - The object or array to redact. * @private */ private redactSensitiveFields(obj: unknown): void { if (!obj || typeof obj !== 'object') return; if (Array.isArray(obj)) { obj.forEach((item) => this.redactSensitiveFields(item)); return; } const normalize = (str: string): string => str.toLowerCase().replace(/[^a-z0-9]/g, ''); const normalizedSensitiveSet = new Set( this.sensitiveFields.map((f) => normalize(f)).filter(Boolean), ); const wordSensitiveSet = new Set( this.sensitiveFields.map((f) => f.toLowerCase()).filter(Boolean), ); for (const key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) { const value = (obj as Record<string, unknown>)[key]; const normalizedKey = normalize(key); // Split into words for token-based matching (camelCase, snake_case, kebab-case) const keyWords = key .replace(/([A-Z])/g, ' $1') .toLowerCase() .split(/[\s_-]+/) .filter(Boolean); const isExactSensitive = normalizedSensitiveSet.has(normalizedKey); const isWordSensitive = keyWords.some((w) => wordSensitiveSet.has(w)); const isSensitive = isExactSensitive || isWordSensitive; if (isSensitive) { (obj as Record<string, unknown>)[key] = '[REDACTED]'; } else if (value && typeof value === 'object') { this.redactSensitiveFields(value); } } } } } /** * Singleton instance of the `Sanitization` class. * Use this for all input sanitization tasks. */ export const sanitization = Sanitization.getInstance(); /** * Convenience function calling `sanitization.sanitizeForLogging`. * @param input - The input data to sanitize. * @returns A sanitized version of the input, safe for logging. */ export const sanitizeInputForLogging = (input: unknown): unknown => sanitization.sanitizeForLogging(input);

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cyanheads/git-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server