PDF Reader MCP Server

Overview Schema Related Servers Score Discussions

readPdf.test.ts

readPdf.test.ts•43.8 KiB

import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; import { z } from 'zod'; import { ErrorCode, PdfError } from '../../src/utils/errors.js'; import * as pathUtils from '../../src/utils/pathUtils.js'; // Import the module itself for spying import { resolvePath } from '../../src/utils/pathUtils.js'; // Define a type for the expected structure after JSON.parse interface ExpectedResultType { results: { source: string; success: boolean; data?: object; error?: string }[]; } // --- Mocking pdfjs-dist --- const mockGetMetadata = vi.fn(); const mockGetPage = vi.fn(); const mockGetDocument = vi.fn(); const mockReadFile = vi.fn(); vi.mock('pdfjs-dist/legacy/build/pdf.mjs', () => ({ getDocument: mockGetDocument, OPS: { paintImageXObject: 89, paintXObject: 92, }, })); vi.mock('node:fs/promises', () => ({ default: { readFile: mockReadFile, }, readFile: mockReadFile, })); // Dynamically import the handler *once* after mocks are defined // Define a more specific type for the handler's return value content interface HandlerResultContent { type: string; text: string; } let handler: (args: unknown) => Promise<{ content: HandlerResultContent[] }>; let readPdfSchema: z.ZodType<unknown>; beforeAll(async () => { // Import the readPdf tool - the new SDK uses a builder pattern const { readPdf } = await import('../../src/handlers/readPdf.js'); const { readPdfArgsSchema } = await import('../../src/schemas/readPdf.js'); readPdfSchema = readPdfArgsSchema; // The tool is created with .handler() which returns a function // We need to wrap it to match the expected interface handler = async (args: unknown) => { // Validate input with Zod first (as the server would do) let parsedArgs: unknown; try { parsedArgs = readPdfSchema.parse(args); } catch (error: unknown) { if (error instanceof z.ZodError) { throw new PdfError( ErrorCode.InvalidParams, `Invalid arguments: ${error.issues.map((e: z.ZodIssue) => `${e.path.join('.')} (${e.message})`).join(', ')}` ); } throw error; } const result = await readPdf.handler({ input: parsedArgs, ctx: {} as unknown }); // Handle toolError case - it returns { content: [...], isError: true } if (result && typeof result === 'object' && 'isError' in result && result.isError) { throw new PdfError(ErrorCode.InvalidRequest, (result as { content: { text: string }[] }).content[0].text); } // Convert array result to expected format if (Array.isArray(result)) { return { content: result.map((item) => { if ('text' in item) return { type: 'text', text: item.text }; if ('data' in item) return { type: 'image', data: item.data, mimeType: item.mimeType }; return item; }), }; } return result as { content: HandlerResultContent[] }; }; }); // Renamed describe block as it now only tests the handler describe('handleReadPdfFunc Integration Tests', () => { beforeEach(() => { vi.resetAllMocks(); // Reset mocks for pathUtils if we spy on it vi.spyOn(pathUtils, 'resolvePath').mockImplementation((p) => p); // Simple mock for resolvePath mockReadFile.mockResolvedValue(Buffer.from('mock pdf content')); const mockDocumentAPI = { numPages: 3, getMetadata: mockGetMetadata, getPage: mockGetPage, }; const mockLoadingTaskAPI = { promise: Promise.resolve(mockDocumentAPI) }; mockGetDocument.mockReturnValue(mockLoadingTaskAPI); mockGetMetadata.mockResolvedValue({ info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { _metadataMap: new Map([['dc:format', 'application/pdf']]), get(key: string) { return this._metadataMap.get(key); }, has(key: string) { return this._metadataMap.has(key); }, getAll() { return Object.fromEntries(this._metadataMap); }, }, }); // Removed unnecessary async and eslint-disable comment mockGetPage.mockImplementation((pageNum: number) => { if (pageNum > 0 && pageNum <= mockDocumentAPI.numPages) { return { getTextContent: vi.fn().mockResolvedValueOnce({ items: [ { str: `Mock page text ${String(pageNum)}`, transform: [1, 0, 0, 1, 0, 100 + pageNum * 10], }, ], }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [], argsArray: [], }), objs: { get: vi.fn(), }, }; } throw new Error(`Mock getPage error: Invalid page number ${String(pageNum)}`); }); }); // Removed unit tests for parsePageRanges // --- Integration Tests for handleReadPdfFunc --- it('should successfully read full text, metadata, and page count for a local file', async () => { const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_metadata: true, include_page_count: true, }; const result = await handler(args); const expectedData = { results: [ { source: 'test.pdf', success: true, data: { info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { 'dc:format': 'application/pdf' }, num_pages: 3, full_text: 'Mock page text 1\n\nMock page text 2\n\nMock page text 3', }, }, ], }; expect(mockReadFile).toHaveBeenCalledWith(resolvePath('test.pdf')); expect(mockGetDocument).toHaveBeenCalledWith(new Uint8Array(Buffer.from('mock pdf content'))); expect(mockGetMetadata).toHaveBeenCalled(); expect(mockGetPage).toHaveBeenCalledTimes(3); // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(result.content[0].type).toBe('text'); expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); it('should successfully read specific pages for a local file', async () => { const args = { sources: [{ path: 'test.pdf', pages: [1, 3] }], include_metadata: false, include_page_count: true, }; const result = await handler(args); const expectedData = { results: [ { source: 'test.pdf', success: true, data: { num_pages: 3, page_texts: [ { page: 1, text: 'Mock page text 1' }, { page: 3, text: 'Mock page text 3' }, ], }, }, ], }; expect(mockGetPage).toHaveBeenCalledTimes(2); expect(mockGetPage).toHaveBeenCalledWith(1); expect(mockGetPage).toHaveBeenCalledWith(3); expect(mockReadFile).toHaveBeenCalledWith(resolvePath('test.pdf')); expect(mockGetDocument).toHaveBeenCalledWith(new Uint8Array(Buffer.from('mock pdf content'))); expect(mockGetMetadata).not.toHaveBeenCalled(); // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(result.content[0].type).toBe('text'); expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); it('should successfully read specific pages using string range', async () => { const args = { sources: [{ path: 'test.pdf', pages: '1,3-3' }], include_page_count: true, }; const result = await handler(args); const expectedData = { results: [ { source: 'test.pdf', success: true, data: { info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { 'dc:format': 'application/pdf' }, num_pages: 3, page_texts: [ { page: 1, text: 'Mock page text 1' }, { page: 3, text: 'Mock page text 3' }, ], }, }, ], }; // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); it('should successfully read metadata only for a URL', async () => { const testUrl = 'http://example.com/test.pdf'; const args = { sources: [{ url: testUrl }], include_full_text: false, include_metadata: true, include_page_count: false, }; const result = await handler(args); const expectedData = { results: [ { source: testUrl, success: true, data: { info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { 'dc:format': 'application/pdf' }, }, }, ], }; expect(mockReadFile).not.toHaveBeenCalled(); expect(mockGetDocument).toHaveBeenCalledWith({ url: testUrl }); expect(mockGetMetadata).toHaveBeenCalled(); expect(mockGetPage).not.toHaveBeenCalled(); // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(result.content[0].type).toBe('text'); expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); it('should handle multiple sources with different options', async () => { const urlSource = 'http://example.com/another.pdf'; const args = { sources: [{ path: 'local.pdf', pages: [1] }, { url: urlSource }], include_full_text: true, include_metadata: true, include_page_count: true, }; // Setup mocks for the second source (URL) const secondMockGetPage = vi.fn().mockImplementation((pageNum: number) => { // Removed unnecessary async if (pageNum === 1) return { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'URL Mock page text 1', transform: [1, 0, 0, 1, 0, 200] }], }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [], argsArray: [] }), objs: { get: vi.fn() }, }; if (pageNum === 2) return { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'URL Mock page text 2', transform: [1, 0, 0, 1, 0, 210] }], }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [], argsArray: [] }), objs: { get: vi.fn() }, }; throw new Error(`Mock getPage error: Invalid page number ${String(pageNum)}`); }); const secondMockGetMetadata = vi.fn().mockResolvedValue({ // Separate metadata mock if needed info: { Title: 'URL PDF' }, metadata: { getAll: () => ({ 'dc:creator': 'URL Author' }) }, }); const secondMockDocumentAPI = { numPages: 2, getMetadata: secondMockGetMetadata, // Use separate metadata mock getPage: secondMockGetPage, }; const secondLoadingTaskAPI = { promise: Promise.resolve(secondMockDocumentAPI) }; // Reset getDocument mock before setting implementation mockGetDocument.mockReset(); // Mock getDocument based on input source mockGetDocument.mockImplementation((source: Buffer | { url: string }) => { // Check if source is not a Buffer and has the matching url property if (typeof source === 'object' && !Buffer.isBuffer(source) && source.url === urlSource) { return secondLoadingTaskAPI; } // Default mock for path-based source (local.pdf) const defaultMockDocumentAPI = { numPages: 3, getMetadata: mockGetMetadata, // Use original metadata mock getPage: mockGetPage, // Use original page mock }; return { promise: Promise.resolve(defaultMockDocumentAPI) }; }); const result = await handler(args); const expectedData = { results: [ { source: 'local.pdf', success: true, data: { info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { 'dc:format': 'application/pdf' }, num_pages: 3, page_texts: [{ page: 1, text: 'Mock page text 1' }], }, }, { source: urlSource, success: true, data: { // Use the metadata returned by secondMockGetMetadata info: { Title: 'URL PDF' }, metadata: { 'dc:creator': 'URL Author' }, num_pages: 2, full_text: 'URL Mock page text 1\n\nURL Mock page text 2', }, }, ], }; expect(mockReadFile).toHaveBeenCalledOnce(); expect(mockReadFile).toHaveBeenCalledWith(resolvePath('local.pdf')); expect(mockGetDocument).toHaveBeenCalledTimes(2); expect(mockGetDocument).toHaveBeenCalledWith(new Uint8Array(Buffer.from('mock pdf content'))); expect(mockGetDocument).toHaveBeenCalledWith({ url: urlSource }); expect(mockGetPage).toHaveBeenCalledTimes(1); // Should be called once for local.pdf page 1 expect(secondMockGetPage).toHaveBeenCalledTimes(2); // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); // --- Error Handling Tests --- it('should throw error if local file not found', async () => { const error = new Error('Mock ENOENT') as NodeJS.ErrnoException; error.code = 'ENOENT'; mockReadFile.mockRejectedValue(error); const args = { sources: [{ path: 'nonexistent.pdf' }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow("File not found at 'nonexistent.pdf'"); }); it('should throw error if pdfjs fails to load document', async () => { const loadError = new Error('Mock PDF loading failed'); const failingLoadingTask = { promise: Promise.reject(loadError) }; mockGetDocument.mockReturnValue(failingLoadingTask); const args = { sources: [{ path: 'bad.pdf' }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow('Mock PDF loading failed'); }); it('should throw PdfError for invalid input arguments (Zod error)', async () => { const args = { sources: [{ path: 'test.pdf' }], include_full_text: 'yes' }; await expect(handler(args)).rejects.toThrow(PdfError); // Zod 4 format: "Invalid input: expected boolean, received string" await expect(handler(args)).rejects.toThrow(/include_full_text.*boolean.*string/i); await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); // Test case for the initial Zod parse failure it('should throw PdfError if top-level argument parsing fails', async () => { const invalidArgs = { invalid_prop: true }; // Completely wrong structure await expect(handler(invalidArgs)).rejects.toThrow(PdfError); // Zod 4 format: "Invalid input: expected array, received undefined" await expect(handler(invalidArgs)).rejects.toThrow(/sources.*array/i); await expect(handler(invalidArgs)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); // Updated test: Expect Zod validation to throw PdfError directly it('should throw PdfError for invalid page specification string (Zod)', async () => { const args = { sources: [{ path: 'test.pdf', pages: '1,abc,3' }] }; await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow( /Invalid arguments: sources.0.pages $Page string must contain only numbers, commas, and hyphens.$/ ); await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); // Updated test: Expect Zod validation to throw PdfError directly it('should throw PdfError for invalid page specification array (non-positive - Zod)', async () => { const args = { sources: [{ path: 'test.pdf', pages: [1, 0, 3] }] }; await expect(handler(args)).rejects.toThrow(PdfError); // Zod 4 format: "Too small: expected number to be >=1" await expect(handler(args)).rejects.toThrow(/sources\.0\.pages\.1.*>=1/i); await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); // Test case for resolvePath failure within the catch block it('should throw error if resolvePath fails', async () => { const resolveError = new Error('Mock resolvePath failed'); vi.spyOn(pathUtils, 'resolvePath').mockImplementation(() => { throw resolveError; }); const args = { sources: [{ path: 'some/path' }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow('Mock resolvePath failed'); }); // Test case for the final catch block with a generic error it('should throw error when generic errors during processing', async () => { const genericError = new Error('Something unexpected happened'); mockReadFile.mockRejectedValue(genericError); // Simulate error after path resolution const args = { sources: [{ path: 'generic/error/path' }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow('Something unexpected happened'); }); // Test case for the final catch block with a non-Error object it('should throw error with non-Error exceptions during processing', async () => { const nonError = { message: 'Just an object', code: 'UNEXPECTED' }; mockReadFile.mockRejectedValue(nonError); // Simulate error after path resolution const args = { sources: [{ path: 'non/error/path' }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow('non/error/path'); }); it('should include warnings for requested pages exceeding total pages', async () => { const args = { sources: [{ path: 'test.pdf', pages: [1, 4, 5] }], include_page_count: true, }; const result = await handler(args); const expectedData = { results: [ { source: 'test.pdf', success: true, data: { info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { 'dc:format': 'application/pdf' }, num_pages: 3, page_texts: [{ page: 1, text: 'Mock page text 1' }], warnings: ['Requested page numbers 4, 5 exceed total pages (3).'], }, }, ], }; expect(mockGetPage).toHaveBeenCalledTimes(1); expect(mockGetPage).toHaveBeenCalledWith(1); // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); it('should handle errors during page processing gracefully when specific pages are requested', async () => { // Removed unnecessary async and eslint-disable comment mockGetPage.mockImplementation((pageNum: number) => { if (pageNum === 1) return { getTextContent: vi.fn().mockResolvedValueOnce({ items: [{ str: `Mock page text 1`, transform: [1, 0, 0, 1, 0, 100] }], }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [], argsArray: [] }), objs: { get: vi.fn() }, }; if (pageNum === 2) throw new Error('Failed to get page 2'); if (pageNum === 3) return { getTextContent: vi.fn().mockResolvedValueOnce({ items: [{ str: `Mock page text 3`, transform: [1, 0, 0, 1, 0, 120] }], }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [], argsArray: [] }), objs: { get: vi.fn() }, }; throw new Error(`Mock getPage error: Invalid page number ${String(pageNum)}`); }); const args = { sources: [{ path: 'test.pdf', pages: [1, 2, 3] }], }; const result = await handler(args); const expectedData = { results: [ { source: 'test.pdf', success: true, data: { info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { 'dc:format': 'application/pdf' }, num_pages: 3, page_texts: [ { page: 1, text: 'Mock page text 1' }, { page: 2, text: 'Error processing page: Failed to get page 2' }, { page: 3, text: 'Mock page text 3' }, ], }, }, ], }; expect(mockGetPage).toHaveBeenCalledTimes(3); // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); // --- Additional Coverage Tests --- it('should throw error if pdfjs fails to load document from URL', async () => { const testUrl = 'http://example.com/bad-url.pdf'; const loadError = new Error('Mock URL PDF loading failed'); const failingLoadingTask = { promise: Promise.reject(loadError) }; // Ensure getDocument is mocked specifically for this URL mockGetDocument.mockReset(); mockGetDocument.mockImplementation((source: unknown) => { if ( typeof source === 'object' && source !== null && Object.hasOwn(source, 'url') && typeof (source as { url?: unknown }).url === 'string' && (source as { url: string }).url === testUrl ) { return failingLoadingTask; } const mockDocumentAPI = { numPages: 1, getMetadata: vi.fn(), getPage: vi.fn() }; return { promise: Promise.resolve(mockDocumentAPI) }; }); const args = { sources: [{ url: testUrl }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow('Mock URL PDF loading failed'); }); it('should not include page count when include_page_count is false', async () => { const args = { sources: [{ path: 'test.pdf' }], include_page_count: false, // Explicitly false include_metadata: false, // Keep it simple include_full_text: false, }; const result = await handler(args); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { const parsedResult = JSON.parse(result.content[0].text) as ExpectedResultType; expect(parsedResult.results[0]).toBeDefined(); if (parsedResult.results[0]?.data) { expect(parsedResult.results[0].success).toBe(true); expect(parsedResult.results[0].data).not.toHaveProperty('num_pages'); expect(parsedResult.results[0].data).not.toHaveProperty('metadata'); expect(parsedResult.results[0].data).not.toHaveProperty('info'); } } else { expect.fail('result.content[0] was undefined'); } expect(mockGetMetadata).not.toHaveBeenCalled(); // Because include_metadata is false }); it('should handle ENOENT error where resolvePath also fails in catch block', async () => { const enoentError = new Error('Mock ENOENT') as NodeJS.ErrnoException; enoentError.code = 'ENOENT'; const targetPath = 'enoent/and/resolve/fails.pdf'; // Mock resolvePath to return path as-is vi.spyOn(pathUtils, 'resolvePath').mockImplementation((p) => p); mockReadFile.mockRejectedValue(enoentError); const args = { sources: [{ path: targetPath }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow(`File not found at '${targetPath}'`); // Ensure readFile was called with the path that resolvePath returned expect(mockReadFile).toHaveBeenCalledWith(targetPath); }); // --- Additional Error Coverage Tests --- it('should throw error for invalid page range string (e.g., 5-3)', async () => { const args = { sources: [{ path: 'test.pdf', pages: '1,5-3,7' }] }; // When page parsing fails, it should throw await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow(/Invalid page range values: 5-3/); }); it('should throw PdfError for invalid page number string (e.g., 1,a,3)', async () => { const args = { sources: [{ path: 'test.pdf', pages: '1,a,3' }] }; // Zod catches this first due to refine await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow( // Escaped backslash for JSON /Invalid arguments: sources.0.pages $Page string must contain only numbers, commas, and hyphens.$/ ); await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); // Test Zod refinement for path/url exclusivity it('should throw PdfError if source has both path and url', async () => { const args = { sources: [{ path: 'test.pdf', url: 'http://example.com' }] }; await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow( // Escaped backslash for JSON /Invalid arguments: sources.0 $Each source must have either 'path' or 'url', but not both.$/ ); await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); it('should throw PdfError if source has neither path nor url', async () => { const args = { sources: [{ pages: [1] }] }; // Missing path and url await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow( // Escaped backslash for JSON /Invalid arguments: sources.0 $Each source must have either 'path' or 'url', but not both.$/ ); await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); it.skip('should handle non-Error exceptions during processing', async () => { // TODO: Fix this test - spy from previous test is persisting in Bun's test runner // Reset all mocks to ensure clean state vi.clearAllMocks(); vi.spyOn(pathUtils, 'resolvePath') .mockClear() .mockImplementation((p) => p); // Reset mock functions mockReadFile.mockResolvedValue(Buffer.from('mock pdf content')); // Mock to throw non-Error at processSingleSource level // We need to throw something that's not Error or PdfError mockGetDocument.mockReset(); mockGetDocument.mockImplementation(() => { throw { custom: 'object error' }; // Non-Error, non-PdfError }); const args = { sources: [{ path: 'test.pdf' }] }; const result = await handler(args); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { const parsedResult = JSON.parse(result.content[0].text) as ExpectedResultType; expect(parsedResult.results[0]).toBeDefined(); if (parsedResult.results[0]) { expect(parsedResult.results[0].success).toBe(false); expect(parsedResult.results[0].error).toContain('Unknown error'); expect(parsedResult.results[0].error).toContain('custom'); } } else { expect.fail('result.content[0] was undefined'); } }); it.skip('should extract images when include_images is true with full text', async () => { // TODO: Fix this test - Bun test runner handles image content differently const mockImageData = { width: 100, height: 50, data: new Uint8Array([255, 0, 0]), kind: 2, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], // OPS.paintImageXObject value argsArray: [['img1', [1, 0, 0, 1, 0, 50]]], }), objs: { get: vi.fn().mockImplementation((_name: string, callback: (data: unknown) => void) => { callback(mockImageData); }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Should have content parts: summary text + images expect(result.content.length).toBeGreaterThanOrEqual(2); // First part should be summary expect(result.content[0].type).toBe('text'); expect(result.content[0].text).toBeDefined(); // Check JSON format includes image_info const parsed = JSON.parse(result.content[0].text as string); expect(parsed.results[0].data.image_info).toBeDefined(); // Should have image parts const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBeGreaterThan(0); expect(imageParts[0].data).toBeDefined(); expect(imageParts[0].mimeType).toBeDefined(); }); it.skip('should extract images with page_texts preserving order', async () => { // TODO: Fix this test - Bun test runner handles image content differently const mockImageData = { width: 50, height: 50, data: new Uint8Array([128, 128, 128]), kind: 1, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'Page text', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], argsArray: [['img1', [1, 0, 0, 1, 0, 50]]], }), objs: { get: vi.fn().mockImplementation((_name: string, callback: (data: unknown) => void) => { callback(mockImageData); }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 2, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf', pages: [1, 2] }], include_images: true, }; const result = await handler(args); // Should have: summary + (page1_images + page2_images) expect(result.content.length).toBeGreaterThan(1); // Check image parts exist const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(2); // One image per page }); it('should handle image extraction timeout when callback never fires', async () => { // Reset resolvePath mock to not interfere vi.spyOn(pathUtils, 'resolvePath').mockImplementation((p) => p); const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], // OPS.paintImageXObject argsArray: [['hanging_img']], }), objs: { get: vi.fn().mockImplementation((_name: string, _callback?: (data: unknown) => void) => { // Return undefined for sync call, never call callback for async return undefined; }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; // Should complete despite hanging callback (timeout after 10 seconds) const result = await handler(args); expect(result.content.length).toBeGreaterThanOrEqual(1); expect(result.content[0].type).toBe('text'); // Image parts should be empty or missing since extraction timed out const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(0); }, 15000); // Set test timeout to 15 seconds (10s timeout + buffer) it('should extract different image formats (grayscale, rgb, rgba)', async () => { const mockGrayscaleImage = { width: 50, height: 50, data: new Uint8Array([128]), kind: 1, // grayscale }; const mockRGBImage = { width: 100, height: 100, data: new Uint8Array([255, 0, 0]), kind: 2, // RGB }; const mockRGBAImage = { width: 75, height: 75, data: new Uint8Array([0, 255, 0, 255]), kind: 3, // RGBA }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89, 89, 89], // Three images argsArray: [['img1'], ['img2'], ['img3']], }), objs: { get: vi.fn().mockImplementation((name: string, callback: (data: unknown) => void) => { if (name === 'img1') callback(mockGrayscaleImage); else if (name === 'img2') callback(mockRGBImage); else if (name === 'img3') callback(mockRGBAImage); }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Check JSON includes image info const parsed = JSON.parse(result.content[0].text as string); expect(parsed.results[0].data.image_info).toHaveLength(3); expect(parsed.results[0].data.image_info[0].format).toBe('grayscale'); expect(parsed.results[0].data.image_info[1].format).toBe('rgb'); expect(parsed.results[0].data.image_info[2].format).toBe('rgba'); // Check image parts with correct MIME types (all images are now PNG) const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(3); // All images should be PNG now expect(imageParts[0].mimeType).toBe('image/png'); expect(imageParts[1].mimeType).toBe('image/png'); expect(imageParts[2].mimeType).toBe('image/png'); }); it('should skip images with missing or invalid data', async () => { const mockValidImage = { width: 100, height: 50, data: new Uint8Array([255, 0, 0]), kind: 2, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89, 89, 89, 89], // Four images argsArray: [['valid_img'], ['no_data'], ['no_width'], ['invalid']], }), objs: { get: vi.fn().mockImplementation((name: string, callback: (data: unknown) => void) => { if (name === 'valid_img') { callback(mockValidImage); } else if (name === 'no_data') { callback({ width: 100, height: 50, kind: 2 }); // Missing data } else if (name === 'no_width') { callback({ data: new Uint8Array([0]), height: 50, kind: 2 }); // Missing width } else if (name === 'invalid') { callback(null); // Invalid data } }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Only valid image should be extracted const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(1); const parsed = JSON.parse(result.content[0].text as string); expect(parsed.results[0].data.image_info).toHaveLength(1); }); it('should preserve Y-coordinate ordering for mixed text and images', async () => { const mockImageData = { width: 100, height: 50, data: new Uint8Array([255, 0, 0]), kind: 2, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [ { str: 'Top text', transform: [1, 0, 0, 1, 0, 200] }, // Y=200 (top) { str: 'Bottom text', transform: [1, 0, 0, 1, 0, 50] }, // Y=50 (bottom) ], }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], // One image argsArray: [['img1', [1, 0, 0, 1, 0, 150]]], // Y=150 (middle) - transform in args }), objs: { get: vi.fn().mockImplementation((_name: string, callback: (data: unknown) => void) => { callback(mockImageData); }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Content order should be: summary, top_text, image, bottom_text // (sorted by Y-coordinate descending = top to bottom) expect(result.content.length).toBe(4); expect(result.content[0].type).toBe('text'); // Summary JSON expect(result.content[1].type).toBe('text'); // Top text (Y=200) expect(result.content[1].text).toBe('Top text'); expect(result.content[2].type).toBe('image'); // Image (Y=150) expect(result.content[3].type).toBe('text'); // Bottom text (Y=50) expect(result.content[3].text).toBe('Bottom text'); }); it('should extract images from commonObjs with g_ prefix', async () => { const mockImageData = { width: 100, height: 50, data: new Uint8Array([255, 0, 0]), kind: 2, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], argsArray: [['g_image1']], // Image with g_ prefix }), objs: { get: vi.fn().mockReturnValue(undefined), // Not in objs }, commonObjs: { get: vi.fn().mockReturnValue(mockImageData), // Found in commonObjs }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Should have extracted the image from commonObjs const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(1); expect(mockPage.commonObjs.get).toHaveBeenCalledWith('g_image1'); }); it('should use sync objs.get when image is already loaded', async () => { const mockImageData = { width: 100, height: 50, data: new Uint8Array([255, 0, 0]), kind: 2, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], argsArray: [['img1']], }), objs: { get: vi.fn().mockImplementation((_name: string, callback?: (data: unknown) => void) => { // Sync call - return immediately if (!callback) { return mockImageData; } // Should not reach async callback callback(mockImageData); }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Should have extracted the image synchronously const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(1); // Verify sync call was made (without callback parameter) expect(mockPage.objs.get).toHaveBeenCalled(); }); it('should fallback to async when sync get returns undefined', async () => { const mockImageData = { width: 100, height: 50, data: new Uint8Array([255, 0, 0]), kind: 2, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], argsArray: [['img1']], }), objs: { get: vi.fn().mockImplementation((_name: string, callback?: (data: unknown) => void) => { // Sync call returns undefined if (!callback) { return undefined; } // Async callback provides the data callback(mockImageData); }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Should have extracted the image via async callback const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(1); }); it('should handle Error (not PdfError) during processing', async () => { // Mock getDocument to throw a regular Error (not PdfError) mockGetDocument.mockReturnValue({ promise: Promise.reject(new Error('Regular error message')), }); const args = { sources: [{ path: 'error.pdf' }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow('Regular error message'); }); }); // End top-level describe

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/SylphxAI/pdf-reader-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

readPdf.test.ts•43.8 KiB