/**
* Input validation and sanitization utilities
*
* Provides security-focused validation for:
* - String length limits (prevent resource exhaustion)
* - Filename sanitization (prevent path traversal)
* - Path validation (prevent directory escapes)
* - Regex safety (prevent ReDoS attacks)
*/
import path from 'path';
// ============================================================================
// Maximum Length Constants
// ============================================================================
export const MAX_LENGTHS = {
summary: 10000,
query: 1000,
path: 4096,
topic: 500,
tag: 100,
projectName: 200,
code: 100000,
description: 5000,
command: 2000,
} as const;
// ============================================================================
// String Validation
// ============================================================================
/**
* Truncate string to maximum length
*/
export function truncateString(
str: string | undefined,
maxLength: number
): string | undefined {
if (str === undefined || str === null) {
return undefined;
}
return str.length > maxLength ? str.slice(0, maxLength) : str;
}
/**
* Validate and truncate string, throwing if empty when required
*/
export function validateString(
str: string | undefined,
fieldName: string,
maxLength: number,
required = false
): string | undefined {
if (required && (!str || str.trim().length === 0)) {
throw new Error(`${fieldName} is required and cannot be empty`);
}
return truncateString(str, maxLength);
}
/**
* Validate array of strings with length limits
*/
export function validateStringArray(
arr: string[] | undefined,
itemMaxLength: number,
maxItems = 100
): string[] | undefined {
if (!arr) {
return arr;
}
return arr
.slice(0, maxItems)
.map((item) => truncateString(item, itemMaxLength))
.filter((item): item is string => item !== undefined && item.length > 0);
}
// ============================================================================
// Filename Sanitization
// ============================================================================
// Characters that are dangerous in filenames across platforms
const DANGEROUS_FILENAME_CHARS = /[<>:"/\\|?*\x00-\x1f]/g;
/**
* Sanitize filename to remove dangerous characters
*
* Removes: < > : " / \ | ? * and control characters
* Also prevents: leading dots, trailing dots, excessive length
*/
export function sanitizeFilename(filename: string): string {
if (!filename) {
return 'unnamed';
}
return filename
.replace(DANGEROUS_FILENAME_CHARS, '_') // Replace dangerous chars with underscore
.replace(/^\.+/, '_') // Don't start with dots (hidden files, . and ..)
.replace(/\.+$/, '') // Don't end with dots (Windows issue)
.replace(/\s+/g, ' ') // Normalize whitespace
.trim()
.slice(0, 255) // Max filename length on most filesystems
|| 'unnamed'; // Fallback if string becomes empty
}
/**
* Sanitize a path component (single directory or filename)
*/
export function sanitizePathComponent(component: string): string {
// Additionally prevent .. which could be used for traversal
const sanitized = sanitizeFilename(component);
return sanitized === '..' ? '_' : sanitized;
}
// ============================================================================
// Path Validation
// ============================================================================
/**
* Validate that a target path is within a base directory
* Prevents path traversal attacks (e.g., ../../../etc/passwd)
*
* @throws Error if path traversal is detected
*/
export function validatePathWithinBase(
targetPath: string,
basePath: string
): string {
const resolvedTarget = path.resolve(basePath, targetPath);
const resolvedBase = path.resolve(basePath);
// Check if target is within base (or is base itself)
if (
!resolvedTarget.startsWith(resolvedBase + path.sep) &&
resolvedTarget !== resolvedBase
) {
throw new Error('Path traversal attempt detected');
}
return resolvedTarget;
}
/**
* Safely join paths, ensuring result stays within base directory
*
* @throws Error if resulting path would escape base
*/
export function safePathJoin(basePath: string, ...segments: string[]): string {
// Sanitize each segment
const sanitizedSegments = segments.map(sanitizePathComponent);
// Join and validate
const joined = path.join(basePath, ...sanitizedSegments);
return validatePathWithinBase(joined, basePath);
}
// ============================================================================
// Regex Safety
// ============================================================================
/**
* Escape special regex characters in a string
* Use this when creating a regex from user input
*/
export function escapeRegex(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
// Patterns that could cause catastrophic backtracking (ReDoS)
// These are string patterns to check against, not actual regex
const DANGEROUS_REGEX_PATTERN_STRINGS = [
'(.+)\\1{5,}', // Excessive repetition of capturing groups
'(\\w+)+$', // Nested quantifiers at end
'([a-zA-Z]+)*$', // Nested quantifiers with character classes
];
/**
* Create a safe regex from user input
*
* - Escapes special characters
* - Checks for catastrophic backtracking patterns
* - Limits pattern length
*
* @returns RegExp if safe, null if pattern is potentially dangerous
*/
export function createSafeRegex(
pattern: string,
flags = 'i'
): RegExp | null {
// Reject overly long patterns
if (pattern.length > 100) {
return null;
}
// Check for dangerous patterns in the raw input
// (before escaping, to detect if user is trying to inject regex)
for (const dangerousPattern of DANGEROUS_REGEX_PATTERN_STRINGS) {
if (pattern.includes(dangerousPattern)) {
return null;
}
}
try {
// Escape the pattern to treat it as literal text
return new RegExp(escapeRegex(pattern), flags);
} catch {
return null;
}
}
/**
* Create a regex for simple word matching
* Safer than createSafeRegex for basic substring searches
*/
export function createWordMatchRegex(word: string): RegExp | null {
if (!word || word.length > 100) {
return null;
}
try {
return new RegExp(escapeRegex(word), 'gi');
} catch {
return null;
}
}
// ============================================================================
// Input Validators for Tool Arguments
// ============================================================================
/**
* Validate ISO date string (YYYY-MM-DD format)
* Also accepts full ISO 8601 timestamps
*/
export function isValidISODate(dateStr: string | undefined): boolean {
if (!dateStr) return false;
// Check for valid ISO 8601 format (date or datetime)
const isoDateRegex = /^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{3})?Z?)?$/;
if (!isoDateRegex.test(dateStr)) return false;
const date = new Date(dateStr);
return !isNaN(date.getTime());
}
/**
* Validate that a value is a positive integer
*/
export function isPositiveInteger(value: unknown): value is number {
return typeof value === 'number' && Number.isInteger(value) && value > 0;
}
/**
* Clamp a number to a range
*/
export function clampNumber(
value: number | undefined,
min: number,
max: number,
defaultValue: number
): number {
if (value === undefined || value === null) {
return defaultValue;
}
return Math.max(min, Math.min(max, value));
}