de en es ja ko ru zh

Git MCP Server

by cyanheads

TypeScript

Apache 2.0

2,311

136

Overview InspectNew Endpoints Schema Related Servers Reviews Score

Need Help?View Source Code Report Issue

pdfParser.ts•27.6 kB

/** * @fileoverview Provides a utility class for creating, modifying, and parsing PDF documents. * Wraps the 'pdf-lib' npm library with structured error handling and logging. * @module src/utils/parsing/pdfParser */ import { PDFDocument, PDFFont, PDFImage, PDFPage, StandardFonts, degrees, rgb, type RGB, } from 'pdf-lib'; import { JsonRpcErrorCode, McpError } from '@/types-global/errors.js'; import { type RequestContext, logger, requestContextService, } from '@/utils/index.js'; /** * Options for adding a new page to a PDF document. */ export interface AddPageOptions { /** * Width of the page in points (1/72 inch). Defaults to US Letter width (612 points). */ width?: number; /** * Height of the page in points (1/72 inch). Defaults to US Letter height (792 points). */ height?: number; } /** * Options for drawing text on a PDF page. */ export interface DrawTextOptions { /** * The text string to draw. */ text: string; /** * X-coordinate (in points) of the text baseline start. */ x: number; /** * Y-coordinate (in points) of the text baseline. */ y: number; /** * Font size in points. Defaults to 12. */ size?: number; /** * Font to use. Must be embedded first via embedFont(). * Defaults to Helvetica. */ font?: PDFFont; /** * Text color as an RGB object. Defaults to black. */ color?: RGB; /** * Rotation angle in degrees. Defaults to 0. */ rotate?: number; /** * Maximum width for text wrapping (in points). If specified, text will wrap. */ maxWidth?: number; /** * Line height multiplier for wrapped text. Defaults to 1.2. */ lineHeight?: number; } /** * Options for embedding an image into a PDF document. */ export interface EmbedImageOptions { /** * Image data as Uint8Array or ArrayBuffer. */ imageBytes: Uint8Array | ArrayBuffer; /** * Image format: 'png' or 'jpg'. */ format: 'png' | 'jpg'; } /** * Options for drawing an embedded image on a page. */ export interface DrawImageOptions { /** * The embedded PDF image. */ image: PDFImage; /** * X-coordinate (in points) of the image's top-left corner. */ x: number; /** * Y-coordinate (in points) of the image's top-left corner. */ y: number; /** * Width of the image in points. Defaults to original width. */ width?: number; /** * Height of the image in points. Defaults to original height. */ height?: number; /** * Rotation angle in degrees. Defaults to 0. */ rotate?: number; /** * Opacity (0-1). Defaults to 1 (fully opaque). */ opacity?: number; } /** * Page range specification for splitting PDFs. */ export interface PageRange { /** * Starting page index (0-based). */ start: number; /** * Ending page index (0-based, inclusive). */ end: number; } /** * Metadata extracted from a PDF document. */ export interface PdfMetadata { /** * Document title. */ title?: string; /** * Document author. */ author?: string; /** * Document subject. */ subject?: string; /** * Keywords associated with the document. */ keywords?: string; /** * Application that created the document. */ creator?: string; /** * Application that produced the PDF. */ producer?: string; /** * Creation date (ISO 8601 string). */ creationDate?: string; /** * Modification date (ISO 8601 string). */ modificationDate?: string; /** * Total number of pages. */ pageCount: number; } /** * Options for setting PDF metadata. */ export interface SetMetadataOptions { /** * Document title. */ title?: string; /** * Document author. */ author?: string; /** * Document subject. */ subject?: string; /** * Keywords associated with the document. */ keywords?: string; /** * Application that created the document. */ creator?: string; /** * Application that produced the PDF. */ producer?: string; } /** * Options for filling PDF form fields. */ export interface FillFormOptions { /** * Map of field names to their values. */ fields: Record<string, string | boolean | number>; /** * Whether to flatten the form after filling (make it non-editable). * Defaults to false. */ flatten?: boolean; } /** * Utility class for creating, modifying, and parsing PDF documents. * Wraps the 'pdf-lib' library with structured error handling and logging. */ export class PdfParser { /** * Creates a new blank PDF document. * * @param context - Optional RequestContext for logging and error correlation. * @returns A new PDFDocument instance. * @throws {McpError} If document creation fails. * @example * ```typescript * const doc = await pdfParser.createDocument(); * const page = pdfParser.addPage(doc); * ``` */ async createDocument(context?: RequestContext): Promise<PDFDocument> { const logContext = context || requestContextService.createRequestContext({ operation: 'PdfParser.createDocument', }); try { logger.debug('Creating new PDF document.', logContext); const doc = await PDFDocument.create(); return doc; } catch (e: unknown) { const error = e as Error; logger.error('Failed to create PDF document.', { ...logContext, errorDetails: error.message, }); throw new McpError( JsonRpcErrorCode.InternalError, `Failed to create PDF document: ${error.message}`, { ...context, rawError: error instanceof Error ? error.stack : String(error), }, ); } } /** * Loads an existing PDF document from bytes. * * @param pdfBytes - The PDF file as Uint8Array or ArrayBuffer. * @param context - Optional RequestContext for logging and error correlation. * @returns A PDFDocument instance. * @throws {McpError} If document loading fails. * @example * ```typescript * const existingPdfBytes = await fs.readFile('input.pdf'); * const doc = await pdfParser.loadDocument(existingPdfBytes); * ``` */ async loadDocument( pdfBytes: Uint8Array | ArrayBuffer, context?: RequestContext, ): Promise<PDFDocument> { const logContext = context || requestContextService.createRequestContext({ operation: 'PdfParser.loadDocument', }); try { logger.debug('Loading PDF document from bytes.', { ...logContext, byteLength: pdfBytes instanceof Uint8Array ? pdfBytes.length : pdfBytes.byteLength, }); const doc = await PDFDocument.load(pdfBytes); return doc; } catch (e: unknown) { const error = e as Error; logger.error('Failed to load PDF document.', { ...logContext, errorDetails: error.message, }); throw new McpError( JsonRpcErrorCode.ValidationError, `Failed to load PDF document: ${error.message}`, { ...context, rawError: error instanceof Error ? error.stack : String(error), }, ); } } /** * Adds a new page to a PDF document. * * @param doc - The PDFDocument to add a page to. * @param options - Optional page dimensions. * @returns The newly created PDFPage. * @example * ```typescript * const page = pdfParser.addPage(doc, { width: 600, height: 400 }); * ``` */ addPage(doc: PDFDocument, options?: AddPageOptions): PDFPage { const width = options?.width ?? 612; // US Letter width const height = options?.height ?? 792; // US Letter height return doc.addPage([width, height]); } /** * Embeds a standard font into a PDF document. * * @param doc - The PDFDocument to embed the font into. * @param fontName - The standard font name. Defaults to 'Helvetica'. * @param context - Optional RequestContext for logging and error correlation. * @returns The embedded PDFFont. * @throws {McpError} If font embedding fails. * @example * ```typescript * const font = await pdfParser.embedFont(doc, 'TimesRoman'); * ``` */ async embedFont( doc: PDFDocument, fontName: keyof typeof StandardFonts = 'Helvetica', context?: RequestContext, ): Promise<PDFFont> { const logContext = context || requestContextService.createRequestContext({ operation: 'PdfParser.embedFont', }); try { logger.debug('Embedding standard font.', { ...logContext, fontName, }); const font = await doc.embedFont(StandardFonts[fontName]); return font; } catch (e: unknown) { const error = e as Error; logger.error('Failed to embed font.', { ...logContext, fontName, errorDetails: error.message, }); throw new McpError( JsonRpcErrorCode.InternalError, `Failed to embed font '${fontName}': ${error.message}`, { ...context, rawError: error instanceof Error ? error.stack : String(error), }, ); } } /** * Embeds an image (PNG or JPG) into a PDF document. * * @param doc - The PDFDocument to embed the image into. * @param options - Image data and format. * @param context - Optional RequestContext for logging and error correlation. * @returns The embedded PDFImage. * @throws {McpError} If image embedding fails. * @example * ```typescript * const imageBytes = await fs.readFile('logo.png'); * const image = await pdfParser.embedImage(doc, { * imageBytes, * format: 'png' * }); * ``` */ async embedImage( doc: PDFDocument, options: EmbedImageOptions, context?: RequestContext, ): Promise<PDFImage> { const logContext = context || requestContextService.createRequestContext({ operation: 'PdfParser.embedImage', }); try { logger.debug('Embedding image into PDF.', { ...logContext, format: options.format, }); const image = options.format === 'png' ? await doc.embedPng(options.imageBytes) : await doc.embedJpg(options.imageBytes); return image; } catch (e: unknown) { const error = e as Error; logger.error('Failed to embed image.', { ...logContext, format: options.format, errorDetails: error.message, }); throw new McpError( JsonRpcErrorCode.InternalError, `Failed to embed ${options.format} image: ${error.message}`, { ...context, rawError: error instanceof Error ? error.stack : String(error), }, ); } } /** * Draws text on a PDF page with optional formatting. * * @param page - The PDFPage to draw text on. * @param options - Text content, position, and styling options. * @example * ```typescript * const font = await pdfParser.embedFont(doc, 'Helvetica'); * pdfParser.drawText(page, { * text: 'Hello, World!', * x: 50, * y: 700, * size: 30, * font, * color: rgb(0, 0.53, 0.71) * }); * ``` */ drawText(page: PDFPage, options: DrawTextOptions): void { const { text, x, y, size = 12, font, color = rgb(0, 0, 0), rotate = 0, maxWidth, lineHeight = 1.2, } = options; if (!maxWidth) { // Simple single-line text const drawOptions: { x: number; y: number; size: number; font?: PDFFont; color: RGB; rotate?: ReturnType<typeof degrees>; } = { x, y, size, color, }; if (font) drawOptions.font = font; if (rotate) drawOptions.rotate = degrees(rotate); page.drawText(text, drawOptions); } else { // Text wrapping const words = text.split(' '); const lines: string[] = []; let currentLine = ''; const effectiveFont = font || page.doc.getForm().getDefaultFont(); for (const word of words) { const testLine = currentLine ? `${currentLine} ${word}` : word; const testWidth = effectiveFont.widthOfTextAtSize(testLine, size); if (testWidth <= maxWidth) { currentLine = testLine; } else { if (currentLine) { lines.push(currentLine); } currentLine = word; } } if (currentLine) { lines.push(currentLine); } // Draw each line let currentY = y; for (const line of lines) { const drawOptions: { x: number; y: number; size: number; font?: PDFFont; color: RGB; rotate?: ReturnType<typeof degrees>; } = { x, y: currentY, size, color, }; if (font) drawOptions.font = font; if (rotate) drawOptions.rotate = degrees(rotate); page.drawText(line, drawOptions); currentY -= size * lineHeight; } } } /** * Draws an embedded image on a PDF page. * * @param page - The PDFPage to draw the image on. * @param options - Image, position, and sizing options. * @example * ```typescript * const image = await pdfParser.embedImage(doc, { imageBytes, format: 'png' }); * pdfParser.drawImage(page, { * image, * x: 100, * y: 500, * width: 200, * height: 150 * }); * ``` */ drawImage(page: PDFPage, options: DrawImageOptions): void { const { image, x, y, width = image.width, height = image.height, rotate = 0, opacity = 1, } = options; const drawOptions: { x: number; y: number; width: number; height: number; rotate?: ReturnType<typeof degrees>; opacity: number; } = { x, y, width, height, opacity, }; if (rotate) drawOptions.rotate = degrees(rotate); page.drawImage(image, drawOptions); } /** * Merges multiple PDF documents into a single document. * * @param pdfBytesArray - Array of PDF documents as Uint8Array or ArrayBuffer. * @param context - Optional RequestContext for logging and error correlation. * @returns A new merged PDFDocument. * @throws {McpError} If merging fails. * @example * ```typescript * const pdf1 = await fs.readFile('doc1.pdf'); * const pdf2 = await fs.readFile('doc2.pdf'); * const merged = await pdfParser.mergePdfs([pdf1, pdf2]); * ``` */ async mergePdfs( pdfBytesArray: (Uint8Array | ArrayBuffer)[], context?: RequestContext, ): Promise<PDFDocument> { const logContext = context || requestContextService.createRequestContext({ operation: 'PdfParser.mergePdfs', }); try { logger.debug('Merging PDF documents.', { ...logContext, documentCount: pdfBytesArray.length, }); const mergedPdf = await PDFDocument.create(); for (let i = 0; i < pdfBytesArray.length; i++) { const pdfBytes = pdfBytesArray[i]; if (!pdfBytes) continue; const pdfDoc = await PDFDocument.load(pdfBytes); const copiedPages = await mergedPdf.copyPages( pdfDoc, pdfDoc.getPageIndices(), ); copiedPages.forEach((page) => mergedPdf.addPage(page)); } logger.debug('Successfully merged PDF documents.', { ...logContext, mergedPageCount: mergedPdf.getPageCount(), }); return mergedPdf; } catch (e: unknown) { const error = e as Error; logger.error('Failed to merge PDF documents.', { ...logContext, errorDetails: error.message, }); throw new McpError( JsonRpcErrorCode.InternalError, `Failed to merge PDFs: ${error.message}`, { ...context, rawError: error instanceof Error ? error.stack : String(error), }, ); } } /** * Splits a PDF document into multiple documents based on page ranges. * * @param pdfBytes - The source PDF as Uint8Array or ArrayBuffer. * @param ranges - Array of page ranges to extract. * @param context - Optional RequestContext for logging and error correlation. * @returns Array of new PDFDocuments, one per range. * @throws {McpError} If splitting fails. * @example * ```typescript * const pdfBytes = await fs.readFile('document.pdf'); * const [part1, part2] = await pdfParser.splitPdf(pdfBytes, [ * { start: 0, end: 4 }, * { start: 5, end: 9 } * ]); * ``` */ async splitPdf( pdfBytes: Uint8Array | ArrayBuffer, ranges: PageRange[], context?: RequestContext, ): Promise<PDFDocument[]> { const logContext = context || requestContextService.createRequestContext({ operation: 'PdfParser.splitPdf', }); try { logger.debug('Splitting PDF document.', { ...logContext, rangeCount: ranges.length, }); const sourcePdf = await PDFDocument.load(pdfBytes); const results: PDFDocument[] = []; for (const range of ranges) { const newPdf = await PDFDocument.create(); const pageIndices: number[] = []; for (let i = range.start; i <= range.end; i++) { pageIndices.push(i); } const copiedPages = await newPdf.copyPages(sourcePdf, pageIndices); copiedPages.forEach((page) => newPdf.addPage(page)); results.push(newPdf); } logger.debug('Successfully split PDF document.', { ...logContext, resultCount: results.length, }); return results; } catch (e: unknown) { const error = e as Error; logger.error('Failed to split PDF document.', { ...logContext, errorDetails: error.message, }); throw new McpError( JsonRpcErrorCode.InternalError, `Failed to split PDF: ${error.message}`, { ...context, rawError: error instanceof Error ? error.stack : String(error), }, ); } } /** * Fills form fields in a PDF document. * * @param doc - The PDFDocument containing the form. * @param options - Field values and flatten option. * @param context - Optional RequestContext for logging and error correlation. * @throws {McpError} If form filling fails. * @example * ```typescript * pdfParser.fillForm(doc, { * fields: { * 'Name': 'John Doe', * 'Age': 30, * 'Subscribe': true * }, * flatten: true * }); * ``` */ fillForm( doc: PDFDocument, options: FillFormOptions, context?: RequestContext, ): void { const logContext = context || requestContextService.createRequestContext({ operation: 'PdfParser.fillForm', }); try { logger.debug('Filling PDF form fields.', { ...logContext, fieldCount: Object.keys(options.fields).length, flatten: options.flatten ?? false, }); const form = doc.getForm(); for (const [fieldName, value] of Object.entries(options.fields)) { try { const field = form.getField(fieldName); if (typeof value === 'string') { if ('setText' in field) { (field as { setText: (text: string) => void }).setText(value); } } else if (typeof value === 'boolean') { if ('check' in field || 'uncheck' in field) { const checkboxField = field as { check?: () => void; uncheck?: () => void; }; if (value) { checkboxField.check?.(); } else { checkboxField.uncheck?.(); } } } else if (typeof value === 'number') { if ('setText' in field) { (field as { setText: (text: string) => void }).setText( String(value), ); } } } catch (fieldError: unknown) { logger.warning('Failed to fill form field.', { ...logContext, fieldName, fieldError: fieldError instanceof Error ? fieldError.message : String(fieldError), }); } } if (options.flatten) { form.flatten(); } logger.debug('Successfully filled PDF form.', logContext); } catch (e: unknown) { const error = e as Error; logger.error('Failed to fill PDF form.', { ...logContext, errorDetails: error.message, }); throw new McpError( JsonRpcErrorCode.InternalError, `Failed to fill PDF form: ${error.message}`, { ...context, rawError: error instanceof Error ? error.stack : String(error), }, ); } } /** * Extracts metadata from a PDF document. * * @param doc - The PDFDocument to extract metadata from. * @returns PDF metadata object. * @example * ```typescript * const metadata = pdfParser.extractMetadata(doc); * console.log(metadata.title, metadata.author); * ``` */ extractMetadata(doc: PDFDocument): PdfMetadata { const title = doc.getTitle(); const author = doc.getAuthor(); const subject = doc.getSubject(); const keywords = doc.getKeywords(); const creator = doc.getCreator(); const producer = doc.getProducer(); const creationDate = doc.getCreationDate(); const modificationDate = doc.getModificationDate(); const metadata: PdfMetadata = { pageCount: doc.getPageCount(), }; if (title !== undefined) metadata.title = title; if (author !== undefined) metadata.author = author; if (subject !== undefined) metadata.subject = subject; if (keywords !== undefined) metadata.keywords = keywords; if (creator !== undefined) metadata.creator = creator; if (producer !== undefined) metadata.producer = producer; if (creationDate !== undefined) metadata.creationDate = creationDate.toISOString(); if (modificationDate !== undefined) metadata.modificationDate = modificationDate.toISOString(); return metadata; } /** * Sets metadata for a PDF document. * * @param doc - The PDFDocument to set metadata on. * @param metadata - Metadata values to set. * @example * ```typescript * pdfParser.setMetadata(doc, { * title: 'My Document', * author: 'John Doe', * subject: 'Important Document' * }); * ``` */ setMetadata(doc: PDFDocument, metadata: SetMetadataOptions): void { if (metadata.title) doc.setTitle(metadata.title); if (metadata.author) doc.setAuthor(metadata.author); if (metadata.subject) doc.setSubject(metadata.subject); if (metadata.keywords) doc.setKeywords([metadata.keywords]); if (metadata.creator) doc.setCreator(metadata.creator); if (metadata.producer) doc.setProducer(metadata.producer); } /** * Extracts text content from all pages of a PDF document. * Note: pdf-lib has limited text extraction capabilities. * For robust text extraction, consider using pdf-parse or pdfjs-dist. * * @param doc - The PDFDocument to extract text from. * @param context - Optional RequestContext for logging and error correlation. * @returns Array of text strings, one per page. * @throws {McpError} If text extraction fails. * @example * ```typescript * const textPages = pdfParser.extractText(doc); * console.log(textPages[0]); // Text from first page * ``` */ extractText(doc: PDFDocument, context?: RequestContext): string[] { const logContext = context || requestContextService.createRequestContext({ operation: 'PdfParser.extractText', }); try { logger.debug('Extracting text from PDF.', { ...logContext, pageCount: doc.getPageCount(), }); // Note: pdf-lib doesn't have native text extraction. // This is a placeholder implementation. // For production use, integrate pdf-parse or pdfjs-dist. const pages = doc.getPages(); const textPages: string[] = []; for (let i = 0; i < pages.length; i++) { // pdf-lib doesn't expose text extraction APIs directly. // This would require parsing the content streams. textPages.push('[Text extraction not implemented - use pdf-parse]'); } logger.warning( 'Text extraction is not fully implemented in pdf-lib. Consider using pdf-parse or pdfjs-dist for robust text extraction.', logContext, ); return textPages; } catch (e: unknown) { const error = e as Error; logger.error('Failed to extract text from PDF.', { ...logContext, errorDetails: error.message, }); throw new McpError( JsonRpcErrorCode.InternalError, `Failed to extract text: ${error.message}`, { ...context, rawError: error instanceof Error ? error.stack : String(error), }, ); } } /** * Serializes a PDF document to bytes (Uint8Array) for saving to disk or transmission. * * @param doc - The PDFDocument to serialize. * @param context - Optional RequestContext for logging and error correlation. * @returns The PDF as Uint8Array. * @throws {McpError} If serialization fails. * @example * ```typescript * const pdfBytes = await pdfParser.saveDocument(doc); * await fs.writeFile('output.pdf', pdfBytes); * ``` */ async saveDocument( doc: PDFDocument, context?: RequestContext, ): Promise<Uint8Array> { const logContext = context || requestContextService.createRequestContext({ operation: 'PdfParser.saveDocument', }); try { logger.debug('Serializing PDF document to bytes.', logContext); const bytes = await doc.save(); logger.debug('Successfully serialized PDF document.', { ...logContext, byteLength: bytes.length, }); return bytes; } catch (e: unknown) { const error = e as Error; logger.error('Failed to serialize PDF document.', { ...logContext, errorDetails: error.message, }); throw new McpError( JsonRpcErrorCode.InternalError, `Failed to save PDF document: ${error.message}`, { ...context, rawError: error instanceof Error ? error.stack : String(error), }, ); } } } /** * Singleton instance of the PdfParser. * Use this instance for all PDF operations with support for creating, modifying, * and parsing PDF documents using pdf-lib. * * @example * ```typescript * import { pdfParser, rgb } from '@/utils'; * * // Create a new PDF * const doc = await pdfParser.createDocument(); * const page = pdfParser.addPage(doc); * const font = await pdfParser.embedFont(doc, 'Helvetica'); * * pdfParser.drawText(page, { * text: 'Hello, World!', * x: 50, * y: 750, * size: 30, * font, * color: rgb(0, 0.53, 0.71) * }); * * const pdfBytes = await pdfParser.saveDocument(doc); * await fs.writeFile('output.pdf', pdfBytes); * ``` */ export const pdfParser = new PdfParser(); /** * Re-export commonly used pdf-lib utilities for convenience. */ export { PDFDocument, StandardFonts, degrees, rgb };

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cyanheads/git-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server