Nutrient Document Engine MCP Server

extractTables.ts•6.51 KiB

import { DocumentEngineClient } from '../../api/Client.js'; import { z } from 'zod'; import { DocumentFingerprintSchema } from '../schemas/DocumentFingerprintSchema.js'; import { MCPToolOutput } from '../../mcpTools.js'; import { getDocumentInfo } from '../../api/DocumentLayerAbstraction.js'; /** * Schema for extract_tables tool */ // Create a Zod schema for PageRange const PageRangeSchema = z.object({ start: z.number().optional(), end: z.number().optional(), }); export const ExtractTablesSchema = { document_fingerprint: DocumentFingerprintSchema, page_range: PageRangeSchema.optional().describe( 'Range of pages to include with start and end indices (0-based)' ), }; export const ExtractTablesInputSchema = z.object(ExtractTablesSchema); export type ExtractTablesInput = z.infer<typeof ExtractTablesInputSchema>; /** * Extract tables from a document using the build-document API with JSONContentOutput */ export async function extractTables( client: DocumentEngineClient, params: ExtractTablesInput ): Promise<MCPToolOutput> { try { // Validate input const validatedParams = ExtractTablesInputSchema.parse(params); const { document_fingerprint, page_range } = validatedParams; // Get document info to get total page count and title const docInfo = await getDocumentInfo(client, document_fingerprint); const pageCount = docInfo.pageCount || 0; const title = docInfo.title || 'Untitled Document'; // Prepare document part with optional page range const documentPart: { document: { id: string; layer?: string }; pages?: { start?: number; end?: number }; } = { document: { id: document_fingerprint.document_id, layer: document_fingerprint.layer, }, }; // Add page range if provided if (page_range) { documentPart.pages = page_range; } // Build request with JSONContentOutput for tables const response = await client['build-document']( {}, { parts: [documentPart], output: { type: 'json-content', plainText: false, structuredText: false, keyValuePairs: false, tables: true, }, } ); // Extract the tables from the response // Ensure pages exist in the response const pages = response.data.pages || []; const tables = pages.flatMap((page, pageIndex) => { return (page.tables || []).map(table => ({ ...table, pageIndex, })); }); // Build the markdown content let markdown = `# Table Extraction\n\n`; markdown += `📄 **Document:** ${title} \n`; markdown += `📄 **Document ID:** ${document_fingerprint.document_id} \n`; if (document_fingerprint.layer) { markdown += `🔀 **Layer:** ${document_fingerprint.layer} \n`; } markdown += `📑 **Total Pages:** ${pageCount} \n`; // Add page range information if provided if (page_range) { let pageRangeText = 'Pages '; if (page_range.start !== undefined) { pageRangeText += `from ${page_range.start}`; } if (page_range.end !== undefined) { pageRangeText += `${page_range.start !== undefined ? ' to ' : ''}${page_range.end}`; } markdown += `📖 **Pages Processed:** ${pageRangeText} \n`; } else { markdown += `📖 **Pages Processed:** All pages \n`; } markdown += `📋 **Tables Found:** ${tables.length} \n\n`; markdown += `---\n\n`; if (tables.length === 0) { markdown += `## No Tables Found\n\n`; markdown += `No tables were detected in the document. This could be because:\n\n`; markdown += `- The document doesn't contain any tables\n`; markdown += `- The tables are represented as images and not as structured data\n`; markdown += `- The table detection algorithm couldn't recognize the tables\n\n`; } else { markdown += `## Extracted Tables\n\n`; tables.forEach((table, tableIndex) => { const pageNumber = table.pageIndex + 1; // Convert to 1-based for display markdown += `### Table ${tableIndex + 1} (Page ${pageNumber})\n\n`; // Create markdown table if (table.cells && table.cells.length > 0) { // Group cells by row const rowsMap = new Map(); table.cells.forEach(cell => { if (!rowsMap.has(cell.rowIndex)) { rowsMap.set(cell.rowIndex, []); } rowsMap.get(cell.rowIndex).push(cell); }); // Sort rows by index const rows = Array.from(rowsMap.entries()) .sort(([rowIndexA], [rowIndexB]) => rowIndexA - rowIndexB) .map(([_, cells]) => cells); // Find the maximum column index to determine table width const maxColIndex = Math.max(...table.cells.map(cell => cell.columnIndex)); // Create header row markdown += '| '; for (let i = 0; i <= maxColIndex; i++) { markdown += `Column ${i + 1} | `; } markdown += '\n'; // Create separator row markdown += '| '; for (let i = 0; i <= maxColIndex; i++) { markdown += '--- | '; } markdown += '\n'; // Create data rows rows.forEach(cells => { markdown += '| '; // Sort cells by column index const sortedCells = [...cells].sort((a, b) => a.columnIndex - b.columnIndex); // Fill in empty cells let currentColIndex = 0; sortedCells.forEach(cell => { // Fill in any missing columns before this cell while (currentColIndex < cell.columnIndex) { markdown += ' | '; currentColIndex++; } // Add this cell's content markdown += `${cell.text || ''} | `; currentColIndex++; }); // Fill in any remaining columns while (currentColIndex <= maxColIndex) { markdown += ' | '; currentColIndex++; } markdown += '\n'; }); markdown += '\n'; } else { markdown += `*Table structure detected but no cells were found*\n\n`; } }); } return { markdown }; } catch (error) { // Provide a more user-friendly error message return { markdown: `# Error Extracting Tables\n\nAn error occurred while trying to extract tables: ${error instanceof Error ? error.message : String(error)}\n\nPlease check your connection and try again.`, }; } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PSPDFKit/nutrient-document-engine-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

extractTables.ts•6.51 KiB