Skip to main content
Glama
PSPDFKit

Nutrient Document Engine MCP Server

by PSPDFKit
extractFormData.ts10.2 kB
import { DocumentEngineClient } from '../../api/Client.js'; import { FormFieldValue, FormFieldWithWidgets } from '../../api/DocumentEngineSchema.js'; import { z } from 'zod'; import { MCPToolOutput } from '../../mcpTools.js'; import { getDocumentFormFields, getDocumentFormFieldValues, getDocumentInfo, } from '../../api/DocumentLayerAbstraction.js'; import { DocumentFingerprintSchema } from '../schemas/DocumentFingerprintSchema.js'; import { formatBBox } from '../../utils/Common.js'; /** * Schema for extract_form_data tool */ export const ExtractFormDataSchema = { document_fingerprint: DocumentFingerprintSchema, field_names: z .array(z.string()) .optional() .describe('List of specific form field names to extract'), include_empty_fields: z .boolean() .default(true) .describe('Whether to include fields with empty values'), }; const ExtractFormDataInputSchema = z.object(ExtractFormDataSchema); type ExtractFormDataInput = z.infer<typeof ExtractFormDataInputSchema>; /** * Extract structured data from form fields in a document */ export async function extractFormData( client: DocumentEngineClient, params: ExtractFormDataInput ): Promise<MCPToolOutput> { try { // Validate input const validatedParams = ExtractFormDataInputSchema.parse(params); const { document_fingerprint, field_names, include_empty_fields = true } = validatedParams; const docInfo = await getDocumentInfo(client, document_fingerprint); const title = docInfo.title || 'Untitled Document'; const formFieldsResponse = await getDocumentFormFields(client, document_fingerprint); // Check if form fields data exists if (!formFieldsResponse.data.data) { throw new Error( `No form fields data returned for document ID: ${document_fingerprint.document_id}, Layer: ${document_fingerprint.layer}` ); } let formFields = formFieldsResponse.data.data; // Filter form fields if field_names is provided if (field_names && field_names.length > 0) { formFields = formFields.filter(field => { // Check if field.content exists and has a name property return field.content && field.content.name && field_names.includes(field.content.name); }); } // Get form field values (this is layer-aware) const formFieldValuesResponse = await getDocumentFormFieldValues(client, document_fingerprint); // Check if form field values data exists if (!formFieldValuesResponse.data.data) { throw new Error( `No form field values data returned for document ID: ${document_fingerprint.document_id}, Layer: ${document_fingerprint.layer}` ); } const formFieldValues = formFieldValuesResponse.data.data.formFieldValues || []; // Create a map of field names to their values for easy lookup const fieldValueMap = new Map<string, FormFieldValue>(); formFieldValues.forEach((fieldValue: FormFieldValue) => { if (fieldValue && fieldValue.name) { fieldValueMap.set(fieldValue.name, fieldValue); } }); // Filter out empty fields if include_empty_fields is false if (!include_empty_fields) { formFields = formFields.filter(field => { // Check if field.content exists and has a name property if (!field.content || !field.content.name) return false; // Get the field value from the map const fieldValue = fieldValueMap.get(field.content.name); // Check if the field has a value that's not null, undefined, or empty string if (!fieldValue) return false; if (Array.isArray(fieldValue.value)) { return fieldValue.value.length > 0 && fieldValue.value.some(v => v !== null && v !== ''); } return ( fieldValue.value !== null && fieldValue.value !== undefined && fieldValue.value !== '' ); }); } // Group form fields by type const fieldsByType: Record<string, FormFieldWithWidgets[]> = {}; formFields.forEach(field => { // Check if field.content exists and has a type property if (!field.content || !field.content.type) return; const fieldType = field.content.type; if (!fieldsByType[fieldType]) { fieldsByType[fieldType] = []; } fieldsByType[fieldType].push(field); }); // Build the markdown content let markdown = `# Form Data Extraction\n\n`; markdown += `📄 **Document:** ${title}\n`; markdown += `📄 **Document ID:** ${document_fingerprint.document_id} \n`; if (document_fingerprint.layer) { markdown += `🔖 **Layer:** ${document_fingerprint.layer} \n`; } markdown += `📝 **Total Form Fields:** ${formFields.length} \n\n`; if (formFields.length === 0) { markdown += `No form fields found in this document${field_names ? ' matching the specified field names' : ''}.`; return { markdown }; } markdown += `---\n\n`; // Add form fields by type for (const [type, fields] of Object.entries(fieldsByType)) { markdown += `## ${formatFieldType(type)} Fields (${fields.length})\n\n`; // Create a table for this field type markdown += `| Field Name | Value | Page | Required |\n`; markdown += `|------------|-------|------|----------|\n`; // Process fields in parallel to get page indices for (const field of fields) { // Check if field.content exists if (!field.content) continue; // Check if field.widgetAnnotations exists const widgetAnnotations = field.widgetAnnotations || []; let pageNumber = 'N/A'; if ( widgetAnnotations.length > 0 && widgetAnnotations[0]?.content?.pageIndex !== undefined ) { pageNumber = `${widgetAnnotations[0].content.pageIndex + 1}`; } // Check if field is required const required = field.content.flags && Array.isArray(field.content.flags) && field.content.flags.includes('required') ? '✓' : ''; // Get the field value from the map const fieldValue = field.content.name ? fieldValueMap.get(field.content.name) : undefined; const value = fieldValue && field.content.type ? formatFieldValue(fieldValue.value) : 'Empty'; markdown += `| ${field.content.name || 'Unnamed Field'} | ${value} | ${pageNumber} | ${required} |\n`; } markdown += `\n`; } // Add detailed information section markdown += `## Detailed Information\n\n`; markdown += `For each form field, the following details are available:\n\n`; await Promise.all( formFields.map(async field => { // Check if field.content exists if (!field.content) { markdown += `### Unnamed Field\n\n`; markdown += `- **Type:** Unknown\n`; markdown += `- **Value:** Empty\n`; markdown += `- **Page:** N/A\n`; return; } // Get the field value from the map const fieldValue = field.content.name ? fieldValueMap.get(field.content.name) : undefined; markdown += `### ${field.content.name || 'Unnamed Field'}\n\n`; markdown += `- **Type:** ${field.content.type ? formatFieldType(field.content.type) : 'Unknown'}\n`; // Use the field value from the map if available const value = fieldValue && field.content.type ? formatFieldValue(fieldValue.value) : 'Empty'; markdown += `- **Value:** ${value}\n`; // Check if field.widgetAnnotations exists const widgetAnnotations = field.widgetAnnotations || []; let pageIndex; if ( widgetAnnotations.length > 0 && widgetAnnotations[0]?.content?.pageIndex !== undefined ) { pageIndex = widgetAnnotations[0].content.pageIndex; } markdown += `- **Page:** ${pageIndex !== undefined ? pageIndex + 1 : 'N/A'}\n`; // Check if field is required if ( field.content.flags && Array.isArray(field.content.flags) && field.content.flags.includes('required') ) { markdown += `- **Required:** Yes\n`; } if (widgetAnnotations.length > 0 && widgetAnnotations[0]?.content) { const widget = widgetAnnotations[0].content; if (widget.bbox && widget.bbox.length === 4 && widget.pageIndex !== undefined) { markdown += `- **Location:** Page ${widget.pageIndex + 1}, coordinates ${formatBBox(widget.bbox)}\n`; } } // Add options if available for choice fields if ('options' in field && field.options) { const options = field.options as string[] | unknown; markdown += `- **Options:** ${Array.isArray(options) ? options.join(', ') : JSON.stringify(options)}\n`; } markdown += `\n`; }) ); return { markdown }; } catch (error) { // Provide a more user-friendly error message return { markdown: `# Error Extracting Form Data\n\nAn error occurred while trying to extract form data: ${error instanceof Error ? error.message : String(error)}\n\nPlease check your connection and try again.`, }; } } /** * Format field type for display */ function formatFieldType(type: string): string { // Remove prefix if present and capitalize const cleanType = type.replace(/^pspdfkit\//, ''); return cleanType.charAt(0).toUpperCase() + cleanType.slice(1); } /** * Format field value for display */ // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Form field values can be any type function formatFieldValue(value: any): string { if (value === null || value === undefined || value === '') { return 'Empty'; } if (typeof value === 'boolean') { return value ? '✓' : '✗'; } if (Array.isArray(value)) { return value.join(', '); } if (typeof value === 'object') { return JSON.stringify(value); } // For text values, truncate if too long if (typeof value === 'string' && value.length > 50) { return `${value.substring(0, 47)}...`; } return String(value); }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PSPDFKit/nutrient-document-engine-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server