Skip to main content
Glama
PSPDFKit

Nutrient Document Engine MCP Server

by PSPDFKit
extractTables.test.ts11.7 kB
import { beforeEach, describe, expect, it, vi } from 'vitest'; import { extractTables } from '../../../src/tools/extraction/extractTables.js'; import { createMockClient, MockedDocumentEngineClient } from '../../utils/mockTypes.js'; // Mock the logger to prevent console output during tests vi.mock('../../../src/utils/Logger.js', () => ({ logger: { error: vi.fn(), info: vi.fn(), debug: vi.fn(), warn: vi.fn(), }, })); describe('extract_tables', () => { let mockClient: MockedDocumentEngineClient; beforeEach(() => { mockClient = createMockClient(); }); describe('successful scenarios', () => { it('should extract tables from all pages when no page range is provided', async () => { // Mock document info response mockClient['fetch-document-info'].mockResolvedValue({ data: { data: { title: 'Test Document', pageCount: 2, }, }, }); // Mock build-document response with tables const mockBuildResponse = { data: { pages: [ { tables: [ { cells: [ { rowIndex: 0, columnIndex: 0, text: 'Header 1' }, { rowIndex: 0, columnIndex: 1, text: 'Header 2' }, { rowIndex: 1, columnIndex: 0, text: 'Data 1' }, { rowIndex: 1, columnIndex: 1, text: 'Data 2' }, ], }, ], }, { tables: [ { cells: [ { rowIndex: 0, columnIndex: 0, text: 'Table 2 Header' }, { rowIndex: 1, columnIndex: 0, text: 'Table 2 Data' }, ], }, ], }, ], }, }; // Set up the mock responses mockClient['build-document'].mockResolvedValue(mockBuildResponse); // Call the function const result = await extractTables(mockClient, { document_fingerprint: { document_id: 'doc_123' }, }); expect(mockClient['build-document']).toHaveBeenCalledWith( {}, { parts: [ { document: { id: 'doc_123', }, }, ], output: { type: 'json-content', plainText: false, structuredText: false, keyValuePairs: false, tables: true, }, } ); // Verify the result contains the expected markdown expect(result.markdown).toContain('# Table Extraction'); expect(result.markdown).toContain('**Document:** Test Document'); expect(result.markdown).toContain('**Total Pages:** 2'); expect(result.markdown).toContain('**Pages Processed:** All pages'); expect(result.markdown).toContain('**Tables Found:** 2'); expect(result.markdown).toContain('### Table 1 (Page 1)'); expect(result.markdown).toContain('### Table 2 (Page 2)'); expect(result.markdown).toContain('Header 1'); expect(result.markdown).toContain('Header 2'); expect(result.markdown).toContain('Data 1'); expect(result.markdown).toContain('Data 2'); expect(result.markdown).toContain('Table 2 Header'); expect(result.markdown).toContain('Table 2 Data'); }); it('should extract tables from specified page range', async () => { // Mock document info response mockClient['fetch-document-info'].mockResolvedValue({ data: { data: { title: 'Test Document', pageCount: 3, }, }, }); // Mock build-document response with tables const mockBuildResponse = { data: { pages: [ { tables: [ { cells: [ { rowIndex: 0, columnIndex: 0, text: 'Page 2 Table Header' }, { rowIndex: 1, columnIndex: 0, text: 'Page 2 Table Data' }, ], }, ], }, ], }, }; // Set up the mock responses mockClient['build-document'].mockResolvedValue(mockBuildResponse); // Call the function with page_range const result = await extractTables(mockClient, { document_fingerprint: { document_id: 'doc_123' }, page_range: { start: 2 }, }); // Verify the client was called with the correct parameters expect(mockClient['build-document']).toHaveBeenCalledWith( {}, { parts: [ { document: { id: 'doc_123', }, pages: { start: 2 }, }, ], output: { type: 'json-content', plainText: false, structuredText: false, keyValuePairs: false, tables: true, }, } ); // Verify the result contains the expected markdown expect(result.markdown).toContain('**Pages Processed:** Pages from 2'); expect(result.markdown).toContain('**Tables Found:** 1'); expect(result.markdown).toContain('### Table 1 (Page 1)'); expect(result.markdown).toContain('Page 2 Table Header'); expect(result.markdown).toContain('Page 2 Table Data'); }); it('should handle no tables found', async () => { // Mock document info response mockClient['fetch-document-info'].mockResolvedValue({ data: { data: { title: 'Test Document', pageCount: 1, }, }, }); // Mock build-document response with no tables const mockBuildResponse = { data: { pages: [ { tables: [], }, ], }, }; // Set up the mock responses mockClient['build-document'].mockResolvedValue(mockBuildResponse); // Call the function const result = await extractTables(mockClient, { document_fingerprint: { document_id: 'doc_123' }, }); // Verify the result contains the expected markdown expect(result.markdown).toContain('**Tables Found:** 0'); expect(result.markdown).toContain('## No Tables Found'); expect(result.markdown).toContain('No tables were detected in the document.'); expect(result.markdown).toContain("- The document doesn't contain any tables"); }); it('should handle table structure with no cells', async () => { // Mock document info response mockClient['fetch-document-info'].mockResolvedValue({ data: { data: { title: 'Test Document', pageCount: 1, }, }, }); // Mock build-document response with table structure but no cells const mockBuildResponse = { data: { pages: [ { tables: [ { cells: [], }, ], }, ], }, }; // Set up the mock responses mockClient['build-document'].mockResolvedValue(mockBuildResponse); // Call the function const result = await extractTables(mockClient, { document_fingerprint: { document_id: 'doc_123' }, }); // Verify the result contains the expected markdown expect(result.markdown).toContain('**Tables Found:** 1'); expect(result.markdown).toContain('### Table 1 (Page 1)'); expect(result.markdown).toContain('*Table structure detected but no cells were found*'); }); it('should work with layers', async () => { // Mock layer info response mockClient['fetch-document-layer-info'].mockResolvedValue({ data: { data: { title: 'Test Document', layer: 'review-layer', documentId: 'doc_123', createdAt: '2023-01-01T12:00:00Z', updatedAt: '2023-01-02T12:00:00Z', }, }, }); // Mock build-document response with tables const mockBuildResponse = { data: { pages: [ { tables: [ { cells: [ { rowIndex: 0, columnIndex: 0, text: 'Layer Header 1' }, { rowIndex: 0, columnIndex: 1, text: 'Layer Header 2' }, { rowIndex: 1, columnIndex: 0, text: 'Layer Data 1' }, { rowIndex: 1, columnIndex: 1, text: 'Layer Data 2' }, ], }, ], }, ], }, }; // Set up the mock responses mockClient['build-document'].mockResolvedValue(mockBuildResponse); // Call the function with layer const result = await extractTables(mockClient, { document_fingerprint: { document_id: 'doc_123', layer: 'review-layer', }, }); // Verify the build-document was called with the correct parameters including layer expect(mockClient['build-document']).toHaveBeenCalledWith( {}, { parts: [ { document: { id: 'doc_123', layer: 'review-layer', }, }, ], output: { type: 'json-content', plainText: false, structuredText: false, keyValuePairs: false, tables: true, }, } ); // Verify the result contains layer information expect(result.markdown).toContain('# Table Extraction'); expect(result.markdown).toContain('**Document:** Test Document'); expect(result.markdown).toContain('**Document ID:** doc_123'); expect(result.markdown).toContain('**Layer:** review-layer'); expect(result.markdown).toContain('**Tables Found:** 1'); expect(result.markdown).toContain('Layer Header 1'); expect(result.markdown).toContain('Layer Data 1'); }); }); describe('error scenarios', () => { it('should handle API errors gracefully', async () => { // Mock fetch-document-info to throw an error const errorMessage = 'API Error: Document not found'; mockClient['fetch-document-info'].mockRejectedValue(new Error(errorMessage)); // Call the function const result = await extractTables(mockClient, { document_fingerprint: { document_id: 'doc_123' }, }); // Verify the result contains the error message expect(result.markdown).toContain('# Error Extracting Tables'); expect(result.markdown).toContain( `An error occurred while trying to extract tables: ${errorMessage}` ); }); it('should handle build-document API errors', async () => { // Mock document info response mockClient['fetch-document-info'].mockResolvedValue({ data: { data: { title: 'Test Document', pageCount: 1, }, }, }); // Mock the build-document to throw an error const errorMessage = 'API Error: Table extraction failed'; mockClient['build-document'].mockRejectedValue(new Error(errorMessage)); // Call the function const result = await extractTables(mockClient, { document_fingerprint: { document_id: 'doc_123' }, }); // Verify the result contains the error message expect(result.markdown).toContain('# Error Extracting Tables'); expect(result.markdown).toContain( `An error occurred while trying to extract tables: ${errorMessage}` ); }); }); });

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PSPDFKit/nutrient-document-engine-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server