Skip to main content
Glama

DollhouseMCP

by DollhouseMCP
ElementFormatter.tsโ€ข24.3 kB
/** * Element Formatter/Cleaner Tool * * Fixes common issues with malformed DollhouseMCP elements: * - Escaped newlines (\n instead of actual line breaks) * - Malformed metadata (embedded in content instead of top-level) * - Broken YAML structure * - Makes elements human-readable and editable * * FIXES IMPLEMENTED (Issue #1190): * 1. CRITICAL: Unescapes newline characters for readability * 2. HIGH: Extracts embedded metadata to proper YAML structure * 3. ENHANCEMENT: Formats YAML for consistency and readability */ import { promises as fs } from 'node:fs'; import * as path from 'node:path'; import * as yaml from 'js-yaml'; import { logger } from '../utils/logger.js'; import { ElementType } from '../portfolio/types.js'; import { SecureYamlParser } from '../security/secureYamlParser.js'; import { SecurityMonitor } from '../security/securityMonitor.js'; // Security: Maximum file size for processing (10MB) const MAX_FILE_SIZE = 10 * 1024 * 1024; // Note: Direct auditLog removed - using SecurityMonitor directly for all audit logging export interface ElementFormatterOptions { /** Whether to create backup files before formatting */ backup?: boolean; /** Whether to fix files in place or create new files */ inPlace?: boolean; /** Whether to validate YAML after formatting */ validate?: boolean; /** Custom output directory for formatted files */ outputDir?: string; /** Maximum file size to process (bytes) */ maxFileSize?: number; } export interface FormatterResult { success: boolean; filePath: string; issues: string[]; fixed: string[]; error?: string; backupPath?: string; } export class ElementFormatter { private readonly options: Required<ElementFormatterOptions>; constructor(options: ElementFormatterOptions = {}) { this.options = { backup: options.backup ?? true, inPlace: options.inPlace ?? false, validate: options.validate ?? true, outputDir: options.outputDir ?? '', maxFileSize: options.maxFileSize ?? MAX_FILE_SIZE }; } // Note: validateYamlContent removed - SecureYamlParser handles all validation internally /** * Normalize Unicode in user input * * FIX: MEDIUM PRIORITY - Normalizes Unicode to prevent homograph attacks */ private normalizeUnicode(input: string): string { // Use NFC (Canonical Decomposition, followed by Canonical Composition) return input.normalize('NFC'); } /** * Format a single element file * Refactored to reduce cognitive complexity by extracting methods */ async formatFile(filePath: string): Promise<FormatterResult> { // FIX: MEDIUM - Normalize Unicode in file path filePath = this.normalizeUnicode(filePath); const result: FormatterResult = { success: false, filePath, issues: [], fixed: [] }; try { // Check file size const stats = await this.validateFileSize(filePath, result); if (stats === null) return result; // Read and normalize content const content = await this.readAndNormalizeFile(filePath, stats); // Format content const formatted = await this.formatContent(filePath, content, result); // Validate if needed if (!await this.validateFormattedContent(formatted, filePath, result)) { return result; } // Create backup if requested await this.createBackupIfNeeded(filePath, result); // Write formatted content await this.writeFormattedFile(filePath, formatted, result); result.success = true; result.fixed.push(`Formatted file written to ${this.getOutputPath(filePath)}`); } catch (error) { this.handleFormatError(error, result, filePath); } return result; } /** * Validate file size */ private async validateFileSize(filePath: string, result: FormatterResult): Promise<any> { const stats = await fs.stat(filePath); if (stats.size > this.options.maxFileSize) { result.error = `File size (${stats.size} bytes) exceeds maximum allowed (${this.options.maxFileSize} bytes)`; result.issues.push('File too large for processing'); return null; } return stats; } /** * Read and normalize file content */ private async readAndNormalizeFile(filePath: string, stats: any): Promise<string> { let content = await fs.readFile(filePath, 'utf-8'); content = this.normalizeUnicode(content); SecurityMonitor.logSecurityEvent({ type: 'FILE_COPIED', severity: 'LOW', source: 'ElementFormatter', details: `Processing file: ${filePath} (${stats.size} bytes)` }); return content; } /** * Format content based on element type */ private async formatContent(filePath: string, content: string, result: FormatterResult): Promise<string> { const elementType = this.detectElementType(filePath); if (elementType === ElementType.MEMORY) { return await this.formatMemory(content, filePath, result); } else { return await this.formatStandardElement(content, result); } } /** * Validate formatted content if validation is enabled */ private async validateFormattedContent(formatted: string, filePath: string, result: FormatterResult): Promise<boolean> { if (!this.options.validate) return true; try { const elementType = this.detectElementType(filePath); const yamlToValidate = elementType === ElementType.MEMORY ? `---\n${formatted}\n---\n` : formatted; // FIX (Issue #1211): Local files are pre-trusted (same as MemoryManager PR #1207) SecureYamlParser.parse(yamlToValidate, { validateContent: false, validateFields: false }); result.fixed.push('YAML validation passed'); return true; } catch (error) { result.issues.push(`YAML validation failed: ${error}`); result.success = false; SecurityMonitor.logSecurityEvent({ type: 'YAML_PARSING_WARNING', severity: 'MEDIUM', source: 'ElementFormatter', details: `YAML validation failed for ${filePath}`, additionalData: { error: error instanceof Error ? error.message : String(error) } }); return false; } } /** * Create backup if requested */ private async createBackupIfNeeded(filePath: string, result: FormatterResult): Promise<void> { if (!this.options.backup) return; const backupPath = filePath + '.backup'; await fs.copyFile(filePath, backupPath); result.backupPath = backupPath; result.fixed.push(`Created backup at ${backupPath}`); SecurityMonitor.logSecurityEvent({ type: 'FILE_COPIED', severity: 'LOW', source: 'ElementFormatter', details: `Backup created: ${backupPath}`, additionalData: { originalFile: filePath, backupFile: backupPath } }); } /** * Write formatted content to file */ private async writeFormattedFile(filePath: string, formatted: string, result: FormatterResult): Promise<void> { const outputPath = this.getOutputPath(filePath); await fs.writeFile(outputPath, formatted, 'utf-8'); SecurityMonitor.logSecurityEvent({ type: 'FILE_COPIED', severity: 'LOW', source: 'ElementFormatter', details: `File formatted successfully: ${outputPath}`, additionalData: { inputPath: filePath, outputPath, backup: result.backupPath || 'none' } }); } /** * Handle formatting errors */ private handleFormatError(error: unknown, result: FormatterResult, filePath: string): void { if (error instanceof Error) { result.error = error.message; if (error.message.includes('ENOENT')) { result.issues.push('File not found'); } else if (error.message.includes('EACCES')) { result.issues.push('Permission denied'); } else if (error.message.includes('Path traversal')) { result.issues.push('Security: Path traversal attempt blocked'); } } else { result.error = String(error); } logger.error('Failed to format file', { filePath, error: result.error, errorType: error instanceof Error ? error.constructor.name : typeof error }); } /** * Format multiple files * * FIX: Added parallel processing with concurrency limit for better performance */ async formatFiles(filePaths: string[], concurrencyLimit = 5): Promise<FormatterResult[]> { const results: FormatterResult[] = []; // Process files in batches for controlled parallelism for (let i = 0; i < filePaths.length; i += concurrencyLimit) { const batch = filePaths.slice(i, i + concurrencyLimit); const batchResults = await Promise.all( batch.map(filePath => this.formatFile(filePath)) ); results.push(...batchResults); } return results; } /** * Format all elements of a specific type */ async formatElementType(elementType: ElementType, portfolioDir: string): Promise<FormatterResult[]> { const results: FormatterResult[] = []; const elementDir = path.join(portfolioDir, elementType); try { if (elementType === ElementType.MEMORY) { // Handle memory-specific structure (date folders) results.push(...await this.formatMemoryDirectory(elementDir)); } else { // Handle standard element structure (.md files in root) results.push(...await this.formatStandardDirectory(elementDir)); } } catch (error) { logger.error(`Failed to format element type: ${elementType}`, { error }); } return results; } /** * Format memory elements in date folder structure */ private async formatMemoryDirectory(memoryDir: string): Promise<FormatterResult[]> { const results: FormatterResult[] = []; try { const entries = await fs.readdir(memoryDir, { withFileTypes: true }); // Process root .yaml files for (const entry of entries) { if (!entry.isDirectory() && entry.name.endsWith('.yaml')) { const filePath = path.join(memoryDir, entry.name); results.push(await this.formatFile(filePath)); } } // Process date folders // Use RegExp.test() directly as per SonarCloud S6594 const datePattern = /^\d{4}-\d{2}-\d{2}$/; const dateFolders = entries.filter(e => e.isDirectory() && datePattern.test(e.name) ); for (const folder of dateFolders) { const folderPath = path.join(memoryDir, folder.name); const files = await fs.readdir(folderPath); for (const file of files.filter(f => f.endsWith('.yaml'))) { const filePath = path.join(folderPath, file); results.push(await this.formatFile(filePath)); } } } catch (error) { logger.error('Failed to format memory directory', { memoryDir, error }); } return results; } /** * Format standard elements (.md files) */ private async formatStandardDirectory(elementDir: string): Promise<FormatterResult[]> { const results: FormatterResult[] = []; try { const files = await fs.readdir(elementDir); const mdFiles = files.filter(f => f.endsWith('.md')); for (const file of mdFiles) { const filePath = path.join(elementDir, file); results.push(await this.formatFile(filePath)); } } catch (error) { logger.error('Failed to format standard directory', { elementDir, error }); } return results; } /** * Format a memory YAML file * Refactored to reduce cognitive complexity */ private async formatMemory(content: string, filePath: string, result: FormatterResult): Promise<string> { try { const data = await this.parseMemoryContent(content); // Process entries if they exist if (data.entries && Array.isArray(data.entries)) { await this.processMemoryEntries(data.entries, data, result); } // Ensure proper structure this.ensureMemoryStructure(data, filePath, result); // Format as clean YAML return this.formatAsYaml(data); } catch (error) { result.issues.push(`Failed to parse YAML: ${error}`); return content; // Return original content if we can't parse it } } /** * Parse memory content using SecureYamlParser */ private async parseMemoryContent(content: string): Promise<any> { const wrappedContent = `---\n${content}\n---\n`; // FIX (Issue #1211): Local files are pre-trusted (same as MemoryManager PR #1207) const parsed = SecureYamlParser.parse(wrappedContent, { validateContent: false, validateFields: false }); return parsed.data; } /** * Process memory entries to fix issues */ private async processMemoryEntries(entries: any[], data: any, result: FormatterResult): Promise<void> { for (const entry of entries) { if (typeof entry.content !== 'string') continue; // Normalize Unicode entry.content = this.normalizeUnicode(entry.content); // Handle embedded metadata if (this.hasEmbeddedMetadata(entry.content)) { this.handleEmbeddedMetadata(entry, data, result); } else { this.unescapeEntryContent(entry, result); } } } /** * Check if content has embedded metadata */ private hasEmbeddedMetadata(content: string): boolean { // Check for both actual newlines and escaped newlines // Using String.raw to properly handle escape sequences (SonarCloud compliance) const actualNewline = '---\n'; const escapedNewline = String.raw`---\n`; // This represents the literal string "---\n" return content.includes(actualNewline) || content.includes(escapedNewline); } /** * Handle embedded metadata extraction */ private handleEmbeddedMetadata(entry: any, data: any, result: FormatterResult): void { result.issues.push('Found embedded metadata in content'); // First unescape the content to make it parseable const unescapedContent = this.unescapeNewlines(entry.content); // Then try to extract metadata from the unescaped content const extracted = this.extractEmbeddedMetadata(unescapedContent); if (extracted.metadata) { // Merge extracted metadata to top level Object.assign(data, extracted.metadata); // Update entry with clean content entry.content = extracted.content; result.fixed.push('Extracted embedded metadata to top level', 'Unescaped newlines in content'); } else { // Just unescape if no metadata found entry.content = unescapedContent; result.fixed.push('Unescaped newlines in content'); } } /** * Unescape entry content */ private unescapeEntryContent(entry: any, result: FormatterResult): void { const original = entry.content; entry.content = this.unescapeNewlines(entry.content); if (original !== entry.content) { result.fixed.push('Unescaped newlines in content'); } } /** * Ensure memory has proper structure * FIX (Issue #1211): Derive name from filename instead of auto-generated entry ID */ private ensureMemoryStructure(data: any, filePath: string, result: FormatterResult): void { if (!data.name) { // Derive name from filename, removing extension and normalizing const filename = path.basename(filePath, path.extname(filePath)); data.name = filename; result.fixed.push(`Added name field from filename: ${filename}`); } } /** * Format data as clean YAML * FIX: Improved YAML formatting for consistency and special character handling */ private formatAsYaml(data: any): string { return yaml.dump(data, { lineWidth: 120, noRefs: true, sortKeys: false, quotingType: '"', forceQuotes: false, // Use block scalar style for strings containing newlines // This preserves formatting while keeping tabs/returns readable styles: { '!!str': (str: string) => { // Use block scalar for multiline strings if (typeof str === 'string' && str.includes('\n')) { return 'literal'; } // Use default (quoted) for other strings, including those with tabs/returns return 'plain'; } } }); } /** * Format a standard element (markdown with frontmatter) */ private async formatStandardElement(content: string, result: FormatterResult): Promise<string> { try { // Split frontmatter and content using RegExp.exec() as per SonarCloud S6594 const frontmatterRegex = /^---\n([\s\S]*?)\n---\n([\s\S]*)$/; const match = frontmatterRegex.exec(content); if (!match) { result.issues.push('No frontmatter found'); return content; } const [, frontmatterStr, body] = match; // FIX: HIGH - Use SecureYamlParser for frontmatter // FIX (Issue #1211): Local files are pre-trusted (same as MemoryManager PR #1207) const tempDoc = `---\n${frontmatterStr}\n---\n`; const parsed = SecureYamlParser.parse(tempDoc, { validateContent: false, validateFields: false }); const frontmatter = parsed.data as any; // Clean frontmatter if (frontmatter.content && typeof frontmatter.content === 'string') { frontmatter.content = this.unescapeNewlines(frontmatter.content); result.fixed.push('Unescaped newlines in frontmatter content'); } // Clean body const cleanBody = this.unescapeNewlines(body); if (body !== cleanBody) { result.fixed.push('Unescaped newlines in body'); } // Reconstruct with clean YAML const cleanFrontmatter = yaml.dump(frontmatter, { lineWidth: 120, noRefs: true, sortKeys: false }); return `---\n${cleanFrontmatter}---\n${cleanBody}`; } catch (error) { result.issues.push(`Failed to parse element: ${error}`); return content; } } /** * Extract embedded metadata from content string * * FIX: Security hotspot - Replaced regex vulnerable to catastrophic backtracking * with a linear-time string parsing approach to prevent ReDoS attacks */ private extractEmbeddedMetadata(content: string): { metadata: any; content: string } { // Content should already be unescaped by the time we get here const unescaped = content; // Use indexOf for linear-time parsing instead of regex to prevent ReDoS const startMarker = '---'; const endMarker = '\n---\n'; // Try to find the start marker anywhere in the content const trimmed = unescaped.trim(); const startIdx = trimmed.indexOf(startMarker); if (startIdx === -1) { return { metadata: null, content }; } // Find the starting position after first marker const startPos = startIdx + startMarker.length; // Look for the end marker const endPos = trimmed.indexOf(endMarker, startPos); if (endPos === -1) { // Try alternative end marker for edge cases const altEndMarker = '---\n'; const altEndPos = trimmed.indexOf(altEndMarker, startPos + 1); if (altEndPos === -1) { return { metadata: null, content }; } // Use alternative end position const metadataStr = trimmed.slice(startPos, altEndPos).trim(); const cleanContent = trimmed.slice(altEndPos + altEndMarker.length).trim(); try { // FIX (Issue #1211): Local files are pre-trusted (same as MemoryManager PR #1207) const tempDoc = `---\n${metadataStr}\n---\n`; const parsed = SecureYamlParser.parse(tempDoc, { validateContent: false, validateFields: false }); return { metadata: parsed.data, content: cleanContent }; } catch { return { metadata: null, content }; } } // Extract metadata and content sections const metadataStr = unescaped.slice(startPos, endPos).trim(); const cleanContent = unescaped.slice(endPos + endMarker.length).trim(); try { // FIX: HIGH - Use SecureYamlParser for metadata extraction // FIX (Issue #1211): Local files are pre-trusted (same as MemoryManager PR #1207) const tempDoc = `---\n${metadataStr}\n---\n`; const parsed = SecureYamlParser.parse(tempDoc, { validateContent: false, validateFields: false }); const metadata = parsed.data; return { metadata, content: cleanContent }; } catch { // If YAML parsing fails, return as-is return { metadata: null, content }; } } /** * Unescape newline characters * Using replaceAll as per SonarCloud S7781 * Using character map to avoid escape sequence issues */ private unescapeNewlines(text: string): string { // Map of escape sequences to their actual characters // This avoids SonarCloud's String.raw warnings const escapeMap: Array<[string, string]> = [ [String.raw`\n`, '\n'], // Newline [String.raw`\r`, '\r'], // Carriage return [String.raw`\t`, '\t'], // Tab [String.raw`\\`, '\\'] // Backslash (must be last to avoid double-unescaping) ]; let result = text; for (const [escaped, actual] of escapeMap) { result = result.replaceAll(escaped, actual); } return result; } /** * Detect element type from file path * Enhanced with more explicit and robust path matching */ private detectElementType(filePath: string): ElementType { // Normalize path separators for cross-platform compatibility const normalizedPath = filePath.replaceAll('\\', '/'); // Map of element type directory names to ElementType enum values // More explicit than iterating through enum values const elementTypeMap: Record<string, ElementType> = { 'personas': ElementType.PERSONA, 'skills': ElementType.SKILL, 'templates': ElementType.TEMPLATE, 'agents': ElementType.AGENT, 'memories': ElementType.MEMORY, 'ensembles': ElementType.ENSEMBLE }; // Split into path segments for accurate matching const segments = normalizedPath.split('/').filter(s => s.length > 0); // Find the element type by checking each segment against our explicit map for (const segment of segments) { const elementType = elementTypeMap[segment.toLowerCase()]; if (elementType) { return elementType; } } // Fallback: Use file extension as a hint // .yaml files are typically memories, .md files are standard elements if (filePath.endsWith('.yaml') || filePath.endsWith('.yml')) { return ElementType.MEMORY; } // Default to PERSONA for .md files or unknown types return ElementType.PERSONA; } /** * Get output path for formatted file * * FIX: Added path traversal protection to prevent directory escape attacks */ private getOutputPath(filePath: string): string { if (this.options.inPlace) { return filePath; } if (this.options.outputDir) { // Security: Validate output directory to prevent path traversal // FIX: MEDIUM - Normalize Unicode in filename const filename = this.normalizeUnicode(path.basename(filePath)); const safePath = path.resolve(this.options.outputDir, filename); const expectedDir = path.resolve(this.options.outputDir); // Ensure the resolved path is within the expected directory if (!safePath.startsWith(expectedDir)) { // FIX: LOW - Use SecurityMonitor for audit logging SecurityMonitor.logSecurityEvent({ type: 'PATH_TRAVERSAL_ATTEMPT', severity: 'HIGH', source: 'ElementFormatter', details: `Path traversal blocked: ${filename}`, additionalData: { attemptedPath: filename, expectedDir, resolvedPath: safePath } }); throw new Error(`Path traversal attempt detected: ${filename}`); } return safePath; } // Default: add .formatted before extension const dir = path.dirname(filePath); const ext = path.extname(filePath); const base = path.basename(filePath, ext); return path.join(dir, `${base}.formatted${ext}`); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DollhouseMCP/DollhouseMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server