Validate PDF Metadata
validate_metadataValidate PDF metadata conformance to PDF/A and PDF/UA standards, checking required fields like title, dates, and tagged status.
Instructions
Validate PDF metadata conformance against best practices and specification requirements.
Args:
file_path (string): Absolute path to a local PDF file
response_format ('markdown' | 'json'): Output format (default: 'markdown')
Returns: Validation results including: total checks, pass/fail counts, detailed issues with severity, metadata field presence summary, and an overall summary.
Checks performed:
Title presence (required for PDF/UA, PDF/A)
Author presence
Creation date format validation
Modification date presence
Producer identification
PDF version detection
Tagged flag status
Subject and Keywords presence
Encryption and accessibility impact
Examples:
Verify PDF metadata completeness for PDF/A archival
Check metadata requirements for PDF/UA compliance
Audit document metadata for publishing standards
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| file_path | Yes | Absolute path to a local PDF file (e.g., "/path/to/document.pdf") | |
| response_format | No | Output format: "markdown" for human-readable, "json" for structured data | markdown |
Implementation Reference
- Core metadata validation logic: performs 10 checks (title, author, creation date, modification date, producer, PDF version, tagged flag, subject, keywords, encryption) and returns a MetadataValidation result with summary.
export async function validateMetadata(filePath: string): Promise<MetadataValidation> { const issues: ValidationIssue[] = []; let totalChecks = 0; let passed = 0; let failed = 0; let warnings = 0; const meta = await getMetadata(filePath); // Check 1: Title totalChecks++; if (!meta.title) { failed++; issues.push({ severity: 'error', code: 'META-001', message: 'Title is missing', details: 'A document title is required for PDF/UA and PDF/A compliance. It is used by assistive technology and search engines.', }); } else { passed++; issues.push({ severity: 'info', code: 'META-001', message: `Title: "${meta.title}"`, }); } // Check 2: Author totalChecks++; if (!meta.author) { warnings++; issues.push({ severity: 'warning', code: 'META-002', message: 'Author is missing', details: 'Author metadata is recommended for document identification.', }); } else { passed++; issues.push({ severity: 'info', code: 'META-002', message: `Author: "${meta.author}"`, }); } // Check 3: CreationDate totalChecks++; if (!meta.creationDate) { warnings++; issues.push({ severity: 'warning', code: 'META-003', message: 'Creation date is missing', details: 'Creation date metadata is recommended.', }); } else { const isValidDate = isValidPdfDate(meta.creationDate); if (isValidDate) { passed++; issues.push({ severity: 'info', code: 'META-003', message: `Creation date: ${meta.creationDate}`, }); } else { warnings++; issues.push({ severity: 'warning', code: 'META-003', message: `Creation date format may be non-standard: ${meta.creationDate}`, details: "PDF date format should follow D:YYYYMMDDHHmmSSOHH'mm'.", }); } } // Check 4: ModificationDate totalChecks++; if (!meta.modificationDate) { warnings++; issues.push({ severity: 'warning', code: 'META-004', message: 'Modification date is missing', details: 'Modification date metadata is recommended for document tracking.', }); } else { passed++; issues.push({ severity: 'info', code: 'META-004', message: `Modification date: ${meta.modificationDate}`, }); } // Check 5: Producer totalChecks++; if (!meta.producer) { warnings++; issues.push({ severity: 'warning', code: 'META-005', message: 'Producer is missing', details: 'Producer identifies the application that created the PDF. Useful for debugging rendering issues.', }); } else { passed++; issues.push({ severity: 'info', code: 'META-005', message: `Producer: "${meta.producer}"`, }); } // Check 6: PDF version totalChecks++; if (!meta.pdfVersion) { warnings++; issues.push({ severity: 'warning', code: 'META-006', message: 'PDF version not detected', details: 'The PDF version could not be determined from the file header.', }); } else { passed++; const version = Number.parseFloat(meta.pdfVersion); if (version < 1.4) { issues.push({ severity: 'info', code: 'META-006', message: `PDF version: ${meta.pdfVersion} (pre-1.4; limited feature support)`, }); } else { issues.push({ severity: 'info', code: 'META-006', message: `PDF version: ${meta.pdfVersion}`, }); } } // Check 7: Tagged flag totalChecks++; if (!meta.isTagged) { warnings++; issues.push({ severity: 'warning', code: 'META-007', message: 'Document is not tagged', details: 'Tagged PDF is required for PDF/UA compliance and recommended for accessibility.', }); } else { passed++; issues.push({ severity: 'info', code: 'META-007', message: 'Document is tagged', }); } // Check 8: Subject totalChecks++; if (!meta.subject) { warnings++; issues.push({ severity: 'warning', code: 'META-008', message: 'Subject is missing', details: 'Subject metadata helps describe the document purpose.', }); } else { passed++; issues.push({ severity: 'info', code: 'META-008', message: `Subject: "${meta.subject}"`, }); } // Check 9: Keywords totalChecks++; if (!meta.keywords) { warnings++; issues.push({ severity: 'warning', code: 'META-009', message: 'Keywords are missing', details: 'Keywords metadata aids document discoverability.', }); } else { passed++; issues.push({ severity: 'info', code: 'META-009', message: `Keywords: "${meta.keywords}"`, }); } // Check 10: Encryption and accessibility totalChecks++; if (meta.isEncrypted) { warnings++; issues.push({ severity: 'warning', code: 'META-010', message: 'Document is encrypted', details: 'Encryption may restrict assistive technology access. Ensure accessibility permissions are enabled.', }); } else { passed++; issues.push({ severity: 'info', code: 'META-010', message: 'Document is not encrypted', }); } const errorCount = issues.filter((i) => i.severity === 'error').length; const warnCount = issues.filter((i) => i.severity === 'warning').length; let summary: string; if (errorCount === 0 && warnCount === 0) { summary = `All ${totalChecks} metadata checks passed.`; } else if (errorCount === 0) { summary = `${passed}/${totalChecks} checks passed with ${warnCount} warning(s).`; } else { summary = `${passed}/${totalChecks} checks passed, ${errorCount} error(s), ${warnCount} warning(s).`; } return { totalChecks, passed, failed, warnings, issues, metadata: { hasTitle: !!meta.title, hasAuthor: !!meta.author, hasSubject: !!meta.subject, hasKeywords: !!meta.keywords, hasCreator: !!meta.creator, hasProducer: !!meta.producer, hasCreationDate: !!meta.creationDate, hasModificationDate: !!meta.modificationDate, pdfVersion: meta.pdfVersion, isTagged: meta.isTagged, }, summary, }; } - Tool handler callback: calls validateMetadata service, formats output as JSON or markdown, and handles errors.
async (params: ValidateMetadataInput) => { try { const result = await validateMetadata(params.file_path); const raw = params.response_format === ResponseFormat.JSON ? JSON.stringify(result, null, 2) : formatMetadataValidationMarkdown(result); const { text } = truncateIfNeeded(raw); return { content: [{ type: 'text' as const, text }] }; } catch (error) { const err = handleStructuredError(error); return { content: [{ type: 'text' as const, text: JSON.stringify(err, null, 2) }], isError: true, }; } }, ); } - src/schemas/tier3.ts:17-22 (schema)Zod schema defining input parameters: file_path (string) and response_format (optional, default 'markdown').
export const ValidateMetadataSchema = z .object({ file_path: FilePathSchema, response_format: ResponseFormatSchema, }) .strict(); - src/types.ts:269-289 (schema)TypeScript interface MetadataValidation for the tool's output shape.
/** validate_metadata output */ export interface MetadataValidation { totalChecks: number; passed: number; failed: number; warnings: number; issues: ValidationIssue[]; metadata: { hasTitle: boolean; hasAuthor: boolean; hasSubject: boolean; hasKeywords: boolean; hasCreator: boolean; hasProducer: boolean; hasCreationDate: boolean; hasModificationDate: boolean; pdfVersion: string | null; isTagged: boolean; }; summary: string; } - src/tools/tier3/validate-metadata.ts:12-69 (registration)Registration function registerValidateMetadata that calls server.registerTool with name 'validate_metadata', schema, annotations, and handler.
export function registerValidateMetadata(server: McpServer): void { server.registerTool( 'validate_metadata', { title: 'Validate PDF Metadata', description: `Validate PDF metadata conformance against best practices and specification requirements. Args: - file_path (string): Absolute path to a local PDF file - response_format ('markdown' | 'json'): Output format (default: 'markdown') Returns: Validation results including: total checks, pass/fail counts, detailed issues with severity, metadata field presence summary, and an overall summary. Checks performed: - Title presence (required for PDF/UA, PDF/A) - Author presence - Creation date format validation - Modification date presence - Producer identification - PDF version detection - Tagged flag status - Subject and Keywords presence - Encryption and accessibility impact Examples: - Verify PDF metadata completeness for PDF/A archival - Check metadata requirements for PDF/UA compliance - Audit document metadata for publishing standards`, inputSchema: ValidateMetadataSchema, annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: false, }, }, async (params: ValidateMetadataInput) => { try { const result = await validateMetadata(params.file_path); const raw = params.response_format === ResponseFormat.JSON ? JSON.stringify(result, null, 2) : formatMetadataValidationMarkdown(result); const { text } = truncateIfNeeded(raw); return { content: [{ type: 'text' as const, text }] }; } catch (error) { const err = handleStructuredError(error); return { content: [{ type: 'text' as const, text: JSON.stringify(err, null, 2) }], isError: true, }; } }, ); }