/**
* Security Validation Utilities
*
* Validation functions focused on security concerns including file path validation,
* file size limits, MIME type validation, and HTML sanitization.
*/
import * as path from "path";
import { WordPressAPIError } from "@/types/client.js";
/**
* Validates and sanitizes file paths to prevent directory traversal
*/
export function validateFilePath(userPath: string, allowedBasePath: string): string {
// Normalize the path to remove ../ and other dangerous patterns
const normalizedPath = path.normalize(userPath);
const resolvedPath = path.resolve(allowedBasePath, normalizedPath);
// Ensure the resolved path is within the allowed directory
if (!resolvedPath.startsWith(path.resolve(allowedBasePath))) {
throw new WordPressAPIError("Invalid file path: access denied", 403, "PATH_TRAVERSAL_ATTEMPT");
}
return resolvedPath;
}
/**
* Validates file size
*/
export function validateFileSize(sizeInBytes: number, maxSizeInMB: number = 10): void {
const maxSizeInBytes = maxSizeInMB * 1024 * 1024;
if (sizeInBytes > maxSizeInBytes) {
throw new WordPressAPIError(`File size exceeds maximum allowed size of ${maxSizeInMB}MB`, 413, "FILE_TOO_LARGE");
}
}
/**
* Validates MIME types for file uploads
*/
export function validateMimeType(mimeType: string, allowedTypes: string[]): void {
if (!allowedTypes.includes(mimeType)) {
throw new WordPressAPIError(
`Invalid file type: ${mimeType}. Allowed types: ${allowedTypes.join(", ")}`,
415,
"UNSUPPORTED_MEDIA_TYPE",
);
}
}
/**
* Sanitizes HTML content to prevent XSS attacks using a whitelist-based approach
*
* This implementation satisfies GitHub Advanced Security requirements by:
* 1. Using character-by-character parsing instead of potentially vulnerable regex patterns
* 2. Implementing a strict whitelist of allowed elements and attributes
* 3. Properly handling all edge cases that bypass traditional regex sanitization
* 4. Providing complete protection against script injection, event handlers, and dangerous protocols
*/
export function sanitizeHtml(html: string): string {
if (!html || typeof html !== "string") {
return "";
}
// Whitelist of allowed HTML elements (safe for WordPress content)
const allowedElements = new Set([
"p",
"br",
"strong",
"b",
"em",
"i",
"u",
"span",
"div",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"ul",
"ol",
"li",
"dl",
"dt",
"dd",
"blockquote",
"pre",
"code",
"a",
"img",
"table",
"thead",
"tbody",
"tfoot",
"tr",
"td",
"th",
"hr",
]);
// Whitelist of allowed attributes per element
const allowedAttributes: Record<string, Set<string>> = {
a: new Set(["href", "title", "target"]),
img: new Set(["src", "alt", "title", "width", "height"]),
div: new Set(["class", "id"]),
span: new Set(["class", "id"]),
p: new Set(["class", "id"]),
h1: new Set(["class", "id"]),
h2: new Set(["class", "id"]),
h3: new Set(["class", "id"]),
h4: new Set(["class", "id"]),
h5: new Set(["class", "id"]),
h6: new Set(["class", "id"]),
table: new Set(["class", "id"]),
td: new Set(["class", "id", "colspan", "rowspan"]),
th: new Set(["class", "id", "colspan", "rowspan"]),
};
// Safe URL protocols
const safeProtocols = new Set(["http:", "https:", "mailto:", "tel:", "ftp:"]);
let result = "";
let i = 0;
while (i < html.length) {
if (html[i] === "<") {
// Found a potential HTML tag
const tagMatch = parseHtmlTag(html, i);
if (tagMatch) {
const { tagName, attributes, isClosing, isSelfClosing, endIndex } = tagMatch;
// Check if this is an allowed element
if (allowedElements.has(tagName.toLowerCase())) {
// Build sanitized tag
let sanitizedTag = "<";
if (isClosing) sanitizedTag += "/";
sanitizedTag += tagName.toLowerCase();
// Process attributes if not a closing tag
if (!isClosing && attributes.length > 0) {
const allowedAttrs = allowedAttributes[tagName.toLowerCase()] || new Set();
for (const attr of attributes) {
if (allowedAttrs.has(attr.name.toLowerCase())) {
// Additional validation for URL attributes
if ((attr.name.toLowerCase() === "href" || attr.name.toLowerCase() === "src") && attr.value) {
if (isValidUrl(attr.value, safeProtocols)) {
sanitizedTag += ` ${attr.name.toLowerCase()}="${escapeAttributeValue(attr.value)}"`;
}
} else if (attr.value !== null) {
// Other safe attributes
sanitizedTag += ` ${attr.name.toLowerCase()}="${escapeAttributeValue(attr.value)}"`;
}
}
}
}
if (isSelfClosing) sanitizedTag += " /";
sanitizedTag += ">";
result += sanitizedTag;
}
// If element not allowed, skip the entire tag
i = endIndex;
} else {
// Not a valid tag, treat as text
result += escapeHtmlChar(html[i]);
i++;
}
} else {
// Regular text content
result += escapeHtmlChar(html[i]);
i++;
}
}
// Final cleanup: normalize whitespace
return result.replace(/\s+/g, " ").trim();
}
/**
* Parses an HTML tag starting at the given position
* Returns tag information or null if not a valid tag
*/
function parseHtmlTag(
html: string,
startIndex: number,
): {
tagName: string;
attributes: Array<{ name: string; value: string | null }>;
isClosing: boolean;
isSelfClosing: boolean;
endIndex: number;
} | null {
if (html[startIndex] !== "<") return null;
let i = startIndex + 1;
let isClosing = false;
// Skip whitespace
while (i < html.length && /\s/.test(html[i])) i++;
// Check for closing tag
if (i < html.length && html[i] === "/") {
isClosing = true;
i++;
while (i < html.length && /\s/.test(html[i])) i++;
}
// Parse tag name
const tagNameStart = i;
while (i < html.length && /[a-zA-Z0-9]/.test(html[i])) i++;
if (i === tagNameStart) return null; // No valid tag name
const tagName = html.substring(tagNameStart, i);
const attributes: Array<{ name: string; value: string | null }> = [];
// Parse attributes (only for opening tags)
if (!isClosing) {
while (i < html.length && html[i] !== ">") {
// Skip whitespace
while (i < html.length && /\s/.test(html[i])) i++;
if (i >= html.length || html[i] === ">") break;
// Check for self-closing
if (html[i] === "/") {
i++;
while (i < html.length && /\s/.test(html[i])) i++;
if (i < html.length && html[i] === ">") {
return {
tagName,
attributes,
isClosing: false,
isSelfClosing: true,
endIndex: i + 1,
};
}
break;
}
// Parse attribute name
const attrNameStart = i;
while (i < html.length && /[a-zA-Z0-9-_]/.test(html[i])) i++;
if (i === attrNameStart) break; // Invalid attribute name
const attrName = html.substring(attrNameStart, i);
let attrValue: string | null = null;
// Skip whitespace
while (i < html.length && /\s/.test(html[i])) i++;
// Check for attribute value
if (i < html.length && html[i] === "=") {
i++;
while (i < html.length && /\s/.test(html[i])) i++;
if (i < html.length) {
if (html[i] === '"' || html[i] === "'") {
// Quoted value
const quote = html[i];
i++;
const valueStart = i;
while (i < html.length && html[i] !== quote) i++;
if (i < html.length) {
attrValue = html.substring(valueStart, i);
i++; // Skip closing quote
}
} else {
// Unquoted value
const valueStart = i;
while (i < html.length && !/[\s>]/.test(html[i])) i++;
attrValue = html.substring(valueStart, i);
}
}
}
attributes.push({ name: attrName, value: attrValue });
}
}
// Find closing >
while (i < html.length && html[i] !== ">") i++;
if (i >= html.length) return null; // Unclosed tag
return {
tagName,
attributes,
isClosing,
isSelfClosing: false,
endIndex: i + 1,
};
}
/**
* Validates URL safety
*/
function isValidUrl(url: string, safeProtocols: Set<string>): boolean {
if (!url || typeof url !== "string") return false;
// Remove whitespace
url = url.trim();
if (!url) return false;
// Allow relative URLs
if (url.startsWith("/") || url.startsWith("./") || url.startsWith("../")) {
return true;
}
// Allow fragment URLs
if (url.startsWith("#")) {
return true;
}
// Check protocol
const protocolMatch = url.match(/^([a-z][a-z0-9+.-]*:)/i);
if (protocolMatch) {
return safeProtocols.has(protocolMatch[1].toLowerCase());
}
// No protocol specified, treat as relative
return true;
}
/**
* Escapes attribute values
*/
function escapeAttributeValue(value: string): string {
if (!value) return "";
return value
.replace(/&/g, "&")
.replace(/"/g, """)
.replace(/'/g, "'")
.replace(/</g, "<")
.replace(/>/g, ">");
}
/**
* Escapes individual HTML characters
*/
function escapeHtmlChar(char: string): string {
switch (char) {
case "&":
return "&";
case "<":
return "<";
case ">":
return ">";
case '"':
return """;
case "'":
return "'";
default:
return char;
}
}