Skip to main content
Glama
readPdf.test.ts44.8 kB
import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; import { z } from 'zod'; import { ErrorCode, PdfError } from '../../src/utils/errors.js'; import * as pathUtils from '../../src/utils/pathUtils.js'; // Import the module itself for spying import { resolvePath } from '../../src/utils/pathUtils.js'; // Define a type for the expected structure after JSON.parse interface ExpectedResultType { results: { source: string; success: boolean; data?: object; error?: string }[]; } // --- Mocking pdfjs-dist --- const mockGetMetadata = vi.fn(); const mockGetPage = vi.fn(); const mockGetDocument = vi.fn(); const mockReadFile = vi.fn(); vi.mock('pdfjs-dist/legacy/build/pdf.mjs', () => ({ getDocument: mockGetDocument, OPS: { paintImageXObject: 89, paintXObject: 92, }, })); vi.mock('node:fs/promises', () => ({ default: { readFile: mockReadFile, }, readFile: mockReadFile, })); // Dynamically import the handler *once* after mocks are defined // Define a more specific type for the handler's return value content interface HandlerResultContent { type: string; text: string; } let handler: (args: unknown) => Promise<{ content: HandlerResultContent[] }>; let readPdfSchema: z.ZodType<unknown>; beforeAll(async () => { // Import the readPdf tool - the new SDK uses a builder pattern const { readPdf } = await import('../../src/handlers/readPdf.js'); const { readPdfArgsSchema } = await import('../../src/schemas/readPdf.js'); readPdfSchema = readPdfArgsSchema; // The tool is created with .handler() which returns a function // We need to wrap it to match the expected interface handler = async (args: unknown) => { // Validate input with Zod first (as the server would do) let parsedArgs: unknown; try { parsedArgs = readPdfSchema.parse(args); } catch (error: unknown) { if (error instanceof z.ZodError) { throw new PdfError( ErrorCode.InvalidParams, `Invalid arguments: ${error.issues.map((e: z.ZodIssue) => `${e.path.join('.')} (${e.message})`).join(', ')}` ); } throw error; } const result = await readPdf.handler({ input: parsedArgs, ctx: {} as unknown }); // Handle toolError case - it returns { content: [...], isError: true } if (result && typeof result === 'object' && 'isError' in result && result.isError) { throw new PdfError(ErrorCode.InvalidRequest, (result as { content: { text: string }[] }).content[0].text); } // Convert array result to expected format if (Array.isArray(result)) { return { content: result.map((item) => { if ('text' in item) return { type: 'text', text: item.text }; if ('data' in item) return { type: 'image', data: item.data, mimeType: item.mimeType }; return item; }), }; } return result as { content: HandlerResultContent[] }; }; }); // Renamed describe block as it now only tests the handler describe('handleReadPdfFunc Integration Tests', () => { beforeEach(() => { vi.resetAllMocks(); // Reset mocks for pathUtils if we spy on it vi.spyOn(pathUtils, 'resolvePath').mockImplementation((p) => p); // Simple mock for resolvePath mockReadFile.mockResolvedValue(Buffer.from('mock pdf content')); const mockDocumentAPI = { numPages: 3, getMetadata: mockGetMetadata, getPage: mockGetPage, }; const mockLoadingTaskAPI = { promise: Promise.resolve(mockDocumentAPI) }; mockGetDocument.mockReturnValue(mockLoadingTaskAPI); mockGetMetadata.mockResolvedValue({ info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { _metadataMap: new Map([['dc:format', 'application/pdf']]), get(key: string) { return this._metadataMap.get(key); }, has(key: string) { return this._metadataMap.has(key); }, getAll() { return Object.fromEntries(this._metadataMap); }, }, }); // Removed unnecessary async and eslint-disable comment mockGetPage.mockImplementation((pageNum: number) => { if (pageNum > 0 && pageNum <= mockDocumentAPI.numPages) { return { getTextContent: vi.fn().mockResolvedValueOnce({ items: [ { str: `Mock page text ${String(pageNum)}`, transform: [1, 0, 0, 1, 0, 100 + pageNum * 10], }, ], }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [], argsArray: [], }), objs: { get: vi.fn(), }, }; } throw new Error(`Mock getPage error: Invalid page number ${String(pageNum)}`); }); }); // Removed unit tests for parsePageRanges // --- Integration Tests for handleReadPdfFunc --- it('should successfully read full text, metadata, and page count for a local file', async () => { const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_metadata: true, include_page_count: true, }; const result = await handler(args); const expectedData = { results: [ { source: 'test.pdf', success: true, data: { info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { 'dc:format': 'application/pdf' }, num_pages: 3, full_text: 'Mock page text 1\n\nMock page text 2\n\nMock page text 3', }, }, ], }; expect(mockReadFile).toHaveBeenCalledWith(resolvePath('test.pdf')); expect(mockGetDocument).toHaveBeenCalledWith(new Uint8Array(Buffer.from('mock pdf content'))); expect(mockGetMetadata).toHaveBeenCalled(); expect(mockGetPage).toHaveBeenCalledTimes(3); // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(result.content[0].type).toBe('text'); expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); it('should successfully read specific pages for a local file', async () => { const args = { sources: [{ path: 'test.pdf', pages: [1, 3] }], include_metadata: false, include_page_count: true, }; const result = await handler(args); const expectedData = { results: [ { source: 'test.pdf', success: true, data: { num_pages: 3, page_texts: [ { page: 1, text: 'Mock page text 1' }, { page: 3, text: 'Mock page text 3' }, ], }, }, ], }; expect(mockGetPage).toHaveBeenCalledTimes(2); expect(mockGetPage).toHaveBeenCalledWith(1); expect(mockGetPage).toHaveBeenCalledWith(3); expect(mockReadFile).toHaveBeenCalledWith(resolvePath('test.pdf')); expect(mockGetDocument).toHaveBeenCalledWith(new Uint8Array(Buffer.from('mock pdf content'))); expect(mockGetMetadata).not.toHaveBeenCalled(); // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(result.content[0].type).toBe('text'); expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); it('should successfully read specific pages using string range', async () => { const args = { sources: [{ path: 'test.pdf', pages: '1,3-3' }], include_page_count: true, }; const result = await handler(args); const expectedData = { results: [ { source: 'test.pdf', success: true, data: { info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { 'dc:format': 'application/pdf' }, num_pages: 3, page_texts: [ { page: 1, text: 'Mock page text 1' }, { page: 3, text: 'Mock page text 3' }, ], }, }, ], }; // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); it('should successfully read metadata only for a URL', async () => { const testUrl = 'http://example.com/test.pdf'; const args = { sources: [{ url: testUrl }], include_full_text: false, include_metadata: true, include_page_count: false, }; const result = await handler(args); const expectedData = { results: [ { source: testUrl, success: true, data: { info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { 'dc:format': 'application/pdf' }, }, }, ], }; expect(mockReadFile).not.toHaveBeenCalled(); expect(mockGetDocument).toHaveBeenCalledWith({ url: testUrl }); expect(mockGetMetadata).toHaveBeenCalled(); expect(mockGetPage).not.toHaveBeenCalled(); // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(result.content[0].type).toBe('text'); expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); it('should handle multiple sources with different options', async () => { const urlSource = 'http://example.com/another.pdf'; const args = { sources: [{ path: 'local.pdf', pages: [1] }, { url: urlSource }], include_full_text: true, include_metadata: true, include_page_count: true, }; // Setup mocks for the second source (URL) const secondMockGetPage = vi.fn().mockImplementation((pageNum: number) => { // Removed unnecessary async if (pageNum === 1) return { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'URL Mock page text 1', transform: [1, 0, 0, 1, 0, 200] }], }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [], argsArray: [] }), objs: { get: vi.fn() }, }; if (pageNum === 2) return { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'URL Mock page text 2', transform: [1, 0, 0, 1, 0, 210] }], }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [], argsArray: [] }), objs: { get: vi.fn() }, }; throw new Error(`Mock getPage error: Invalid page number ${String(pageNum)}`); }); const secondMockGetMetadata = vi.fn().mockResolvedValue({ // Separate metadata mock if needed info: { Title: 'URL PDF' }, metadata: { getAll: () => ({ 'dc:creator': 'URL Author' }) }, }); const secondMockDocumentAPI = { numPages: 2, getMetadata: secondMockGetMetadata, // Use separate metadata mock getPage: secondMockGetPage, }; const secondLoadingTaskAPI = { promise: Promise.resolve(secondMockDocumentAPI) }; // Reset getDocument mock before setting implementation mockGetDocument.mockReset(); // Mock getDocument based on input source mockGetDocument.mockImplementation((source: Buffer | { url: string }) => { // Check if source is not a Buffer and has the matching url property if (typeof source === 'object' && !Buffer.isBuffer(source) && source.url === urlSource) { return secondLoadingTaskAPI; } // Default mock for path-based source (local.pdf) const defaultMockDocumentAPI = { numPages: 3, getMetadata: mockGetMetadata, // Use original metadata mock getPage: mockGetPage, // Use original page mock }; return { promise: Promise.resolve(defaultMockDocumentAPI) }; }); const result = await handler(args); const expectedData = { results: [ { source: 'local.pdf', success: true, data: { info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { 'dc:format': 'application/pdf' }, num_pages: 3, page_texts: [{ page: 1, text: 'Mock page text 1' }], }, }, { source: urlSource, success: true, data: { // Use the metadata returned by secondMockGetMetadata info: { Title: 'URL PDF' }, metadata: { 'dc:creator': 'URL Author' }, num_pages: 2, full_text: 'URL Mock page text 1\n\nURL Mock page text 2', }, }, ], }; expect(mockReadFile).toHaveBeenCalledOnce(); expect(mockReadFile).toHaveBeenCalledWith(resolvePath('local.pdf')); expect(mockGetDocument).toHaveBeenCalledTimes(2); expect(mockGetDocument).toHaveBeenCalledWith(new Uint8Array(Buffer.from('mock pdf content'))); expect(mockGetDocument).toHaveBeenCalledWith({ url: urlSource }); expect(mockGetPage).toHaveBeenCalledTimes(1); // Should be called once for local.pdf page 1 expect(secondMockGetPage).toHaveBeenCalledTimes(2); // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); // --- Error Handling Tests --- it('should throw error if local file not found', async () => { const error = new Error('Mock ENOENT') as NodeJS.ErrnoException; error.code = 'ENOENT'; mockReadFile.mockRejectedValue(error); const args = { sources: [{ path: 'nonexistent.pdf' }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow("File not found at 'nonexistent.pdf'"); }); it('should throw error if pdfjs fails to load document', async () => { const loadError = new Error('Mock PDF loading failed'); const failingLoadingTask = { promise: Promise.reject(loadError) }; mockGetDocument.mockReturnValue(failingLoadingTask); const args = { sources: [{ path: 'bad.pdf' }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow('Mock PDF loading failed'); }); it('should throw PdfError for invalid input arguments (Zod error)', async () => { const args = { sources: [{ path: 'test.pdf' }], include_full_text: 'yes' }; await expect(handler(args)).rejects.toThrow(PdfError); // Zod 4 format: "Invalid input: expected boolean, received string" await expect(handler(args)).rejects.toThrow(/include_full_text.*boolean.*string/i); await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); // Test case for the initial Zod parse failure it('should throw PdfError if top-level argument parsing fails', async () => { const invalidArgs = { invalid_prop: true }; // Completely wrong structure await expect(handler(invalidArgs)).rejects.toThrow(PdfError); // Zod 4 format: "Invalid input: expected array, received undefined" await expect(handler(invalidArgs)).rejects.toThrow(/sources.*array/i); await expect(handler(invalidArgs)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); // Updated test: Expect Zod validation to throw PdfError directly it('should throw PdfError for invalid page specification string (Zod)', async () => { const args = { sources: [{ path: 'test.pdf', pages: '1,abc,3' }] }; await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow( /Invalid arguments: sources.0.pages \(Page string must contain only numbers, commas, and hyphens.\)/ ); await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); // Updated test: Expect Zod validation to throw PdfError directly it('should throw PdfError for invalid page specification array (non-positive - Zod)', async () => { const args = { sources: [{ path: 'test.pdf', pages: [1, 0, 3] }] }; await expect(handler(args)).rejects.toThrow(PdfError); // Zod 4 format: "Too small: expected number to be >=1" await expect(handler(args)).rejects.toThrow(/sources\.0\.pages\.1.*>=1/i); await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); // Test case for resolvePath failure within the catch block it('should throw error if resolvePath fails', async () => { const resolveError = new Error('Mock resolvePath failed'); vi.spyOn(pathUtils, 'resolvePath').mockImplementation(() => { throw resolveError; }); const args = { sources: [{ path: 'some/path' }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow('Mock resolvePath failed'); }); // Test case for the final catch block with a generic error it('should throw error when generic errors during processing', async () => { const genericError = new Error('Something unexpected happened'); mockReadFile.mockRejectedValue(genericError); // Simulate error after path resolution const args = { sources: [{ path: 'generic/error/path' }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow('Something unexpected happened'); }); // Test case for the final catch block with a non-Error object it('should throw error with non-Error exceptions during processing', async () => { const nonError = { message: 'Just an object', code: 'UNEXPECTED' }; mockReadFile.mockRejectedValue(nonError); // Simulate error after path resolution const args = { sources: [{ path: 'non/error/path' }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow('non/error/path'); }); it('should include warnings for requested pages exceeding total pages', async () => { const args = { sources: [{ path: 'test.pdf', pages: [1, 4, 5] }], include_page_count: true, }; const result = await handler(args); const expectedData = { results: [ { source: 'test.pdf', success: true, data: { info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { 'dc:format': 'application/pdf' }, num_pages: 3, page_texts: [{ page: 1, text: 'Mock page text 1' }], warnings: ['Requested page numbers 4, 5 exceed total pages (3).'], }, }, ], }; expect(mockGetPage).toHaveBeenCalledTimes(1); expect(mockGetPage).toHaveBeenCalledWith(1); // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); it('should handle errors during page processing gracefully when specific pages are requested', async () => { // Removed unnecessary async and eslint-disable comment mockGetPage.mockImplementation((pageNum: number) => { if (pageNum === 1) return { getTextContent: vi.fn().mockResolvedValueOnce({ items: [{ str: `Mock page text 1`, transform: [1, 0, 0, 1, 0, 100] }], }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [], argsArray: [] }), objs: { get: vi.fn() }, }; if (pageNum === 2) throw new Error('Failed to get page 2'); if (pageNum === 3) return { getTextContent: vi.fn().mockResolvedValueOnce({ items: [{ str: `Mock page text 3`, transform: [1, 0, 0, 1, 0, 120] }], }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [], argsArray: [] }), objs: { get: vi.fn() }, }; throw new Error(`Mock getPage error: Invalid page number ${String(pageNum)}`); }); const args = { sources: [{ path: 'test.pdf', pages: [1, 2, 3] }], }; const result = await handler(args); const expectedData = { results: [ { source: 'test.pdf', success: true, data: { info: { PDFFormatVersion: '1.7', Title: 'Mock PDF' }, metadata: { 'dc:format': 'application/pdf' }, num_pages: 3, page_texts: [ { page: 1, text: 'Mock page text 1' }, { page: 2, text: 'Error processing page: Failed to get page 2' }, { page: 3, text: 'Mock page text 3' }, ], }, }, ], }; expect(mockGetPage).toHaveBeenCalledTimes(3); // Add check for content existence and access safely expect(result.content).toBeDefined(); expect(result.content.length).toBeGreaterThan(0); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { expect(JSON.parse(result.content[0].text) as ExpectedResultType).toEqual(expectedData); } else { expect.fail('result.content[0] was undefined'); } }); // --- Additional Coverage Tests --- it('should throw error if pdfjs fails to load document from URL', async () => { const testUrl = 'http://example.com/bad-url.pdf'; const loadError = new Error('Mock URL PDF loading failed'); const failingLoadingTask = { promise: Promise.reject(loadError) }; // Ensure getDocument is mocked specifically for this URL mockGetDocument.mockReset(); mockGetDocument.mockImplementation((source: unknown) => { if ( typeof source === 'object' && source !== null && Object.hasOwn(source, 'url') && typeof (source as { url?: unknown }).url === 'string' && (source as { url: string }).url === testUrl ) { return failingLoadingTask; } const mockDocumentAPI = { numPages: 1, getMetadata: vi.fn(), getPage: vi.fn() }; return { promise: Promise.resolve(mockDocumentAPI) }; }); const args = { sources: [{ url: testUrl }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow('Mock URL PDF loading failed'); }); it('should not include page count when include_page_count is false', async () => { const args = { sources: [{ path: 'test.pdf' }], include_page_count: false, // Explicitly false include_metadata: false, // Keep it simple include_full_text: false, }; const result = await handler(args); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { const parsedResult = JSON.parse(result.content[0].text) as ExpectedResultType; expect(parsedResult.results[0]).toBeDefined(); if (parsedResult.results[0]?.data) { expect(parsedResult.results[0].success).toBe(true); expect(parsedResult.results[0].data).not.toHaveProperty('num_pages'); expect(parsedResult.results[0].data).not.toHaveProperty('metadata'); expect(parsedResult.results[0].data).not.toHaveProperty('info'); } } else { expect.fail('result.content[0] was undefined'); } expect(mockGetMetadata).not.toHaveBeenCalled(); // Because include_metadata is false }); it('should handle ENOENT error where resolvePath also fails in catch block', async () => { const enoentError = new Error('Mock ENOENT') as NodeJS.ErrnoException; enoentError.code = 'ENOENT'; const targetPath = 'enoent/and/resolve/fails.pdf'; // Mock resolvePath to return path as-is vi.spyOn(pathUtils, 'resolvePath').mockImplementation((p) => p); mockReadFile.mockRejectedValue(enoentError); const args = { sources: [{ path: targetPath }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow(`File not found at '${targetPath}'`); // Ensure readFile was called with the path that resolvePath returned expect(mockReadFile).toHaveBeenCalledWith(targetPath); }); // --- Additional Error Coverage Tests --- it('should throw error for invalid page range string (e.g., 5-3)', async () => { const args = { sources: [{ path: 'test.pdf', pages: '1,5-3,7' }] }; // When page parsing fails, it should throw await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow(/Invalid page range values: 5-3/); }); it('should throw PdfError for invalid page number string (e.g., 1,a,3)', async () => { const args = { sources: [{ path: 'test.pdf', pages: '1,a,3' }] }; // Zod catches this first due to refine await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow( // Escaped backslash for JSON /Invalid arguments: sources.0.pages \(Page string must contain only numbers, commas, and hyphens.\)/ ); await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); // Test Zod refinement for path/url exclusivity it('should throw PdfError if source has both path and url', async () => { const args = { sources: [{ path: 'test.pdf', url: 'http://example.com' }] }; await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow( // Escaped backslash for JSON /Invalid arguments: sources.0 \(Each source must have either 'path' or 'url', but not both.\)/ ); await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); it('should throw PdfError if source has neither path nor url', async () => { const args = { sources: [{ pages: [1] }] }; // Missing path and url await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow( // Escaped backslash for JSON /Invalid arguments: sources.0 \(Each source must have either 'path' or 'url', but not both.\)/ ); await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams); }); it.skip('should handle non-Error exceptions during processing', async () => { // TODO: Fix this test - spy from previous test is persisting in Bun's test runner // Reset all mocks to ensure clean state vi.clearAllMocks(); vi.spyOn(pathUtils, 'resolvePath') .mockClear() .mockImplementation((p) => p); // Reset mock functions mockReadFile.mockResolvedValue(Buffer.from('mock pdf content')); // Mock to throw non-Error at processSingleSource level // We need to throw something that's not Error or PdfError mockGetDocument.mockReset(); mockGetDocument.mockImplementation(() => { throw { custom: 'object error' }; // Non-Error, non-PdfError }); const args = { sources: [{ path: 'test.pdf' }] }; const result = await handler(args); // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (result.content?.[0]) { const parsedResult = JSON.parse(result.content[0].text) as ExpectedResultType; expect(parsedResult.results[0]).toBeDefined(); if (parsedResult.results[0]) { expect(parsedResult.results[0].success).toBe(false); expect(parsedResult.results[0].error).toContain('Unknown error'); expect(parsedResult.results[0].error).toContain('custom'); } } else { expect.fail('result.content[0] was undefined'); } }); it.skip('should extract images when include_images is true with full text', async () => { // TODO: Fix this test - Bun test runner handles image content differently const mockImageData = { width: 100, height: 50, data: new Uint8Array([255, 0, 0]), kind: 2, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], // OPS.paintImageXObject value argsArray: [['img1', [1, 0, 0, 1, 0, 50]]], }), objs: { get: vi.fn().mockImplementation((_name: string, callback: (data: unknown) => void) => { callback(mockImageData); }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Should have content parts: summary text + images expect(result.content.length).toBeGreaterThanOrEqual(2); // First part should be summary expect(result.content[0].type).toBe('text'); expect(result.content[0].text).toBeDefined(); // Check JSON format includes image_info const parsed = JSON.parse(result.content[0].text as string); expect(parsed.results[0].data.image_info).toBeDefined(); // Should have image parts const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBeGreaterThan(0); expect(imageParts[0].data).toBeDefined(); expect(imageParts[0].mimeType).toBeDefined(); }); it.skip('should extract images with page_texts preserving order', async () => { // TODO: Fix this test - Bun test runner handles image content differently const mockImageData = { width: 50, height: 50, data: new Uint8Array([128, 128, 128]), kind: 1, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'Page text', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], argsArray: [['img1', [1, 0, 0, 1, 0, 50]]], }), objs: { get: vi.fn().mockImplementation((_name: string, callback: (data: unknown) => void) => { callback(mockImageData); }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 2, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf', pages: [1, 2] }], include_images: true, }; const result = await handler(args); // Should have: summary + (page1_images + page2_images) expect(result.content.length).toBeGreaterThan(1); // Check image parts exist const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(2); // One image per page }); it('should handle image extraction timeout when callback never fires', async () => { // Reset resolvePath mock to not interfere vi.spyOn(pathUtils, 'resolvePath').mockImplementation((p) => p); const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], // OPS.paintImageXObject argsArray: [['hanging_img']], }), objs: { get: vi.fn().mockImplementation((_name: string, _callback?: (data: unknown) => void) => { // Return undefined for sync call, never call callback for async return undefined; }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; // Should complete despite hanging callback (timeout after 10 seconds) const result = await handler(args); expect(result.content.length).toBeGreaterThanOrEqual(1); expect(result.content[0].type).toBe('text'); // Image parts should be empty or missing since extraction timed out const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(0); }, 15000); // Set test timeout to 15 seconds (10s timeout + buffer) it('should extract different image formats (grayscale, rgb, rgba)', async () => { const mockGrayscaleImage = { width: 50, height: 50, data: new Uint8Array([128]), kind: 1, // grayscale }; const mockRGBImage = { width: 100, height: 100, data: new Uint8Array([255, 0, 0]), kind: 2, // RGB }; const mockRGBAImage = { width: 75, height: 75, data: new Uint8Array([0, 255, 0, 255]), kind: 3, // RGBA }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89, 89, 89], // Three images argsArray: [['img1'], ['img2'], ['img3']], }), objs: { get: vi.fn().mockImplementation((name: string, callback: (data: unknown) => void) => { if (name === 'img1') callback(mockGrayscaleImage); else if (name === 'img2') callback(mockRGBImage); else if (name === 'img3') callback(mockRGBAImage); }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Check JSON includes image info const parsed = JSON.parse(result.content[0].text as string); expect(parsed.results[0].data.image_info).toHaveLength(3); expect(parsed.results[0].data.image_info[0].format).toBe('grayscale'); expect(parsed.results[0].data.image_info[1].format).toBe('rgb'); expect(parsed.results[0].data.image_info[2].format).toBe('rgba'); // Check image parts with correct MIME types (all images are now PNG) const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(3); // All images should be PNG now expect(imageParts[0].mimeType).toBe('image/png'); expect(imageParts[1].mimeType).toBe('image/png'); expect(imageParts[2].mimeType).toBe('image/png'); }); it('should skip images with missing or invalid data', async () => { const mockValidImage = { width: 100, height: 50, data: new Uint8Array([255, 0, 0]), kind: 2, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89, 89, 89, 89], // Four images argsArray: [['valid_img'], ['no_data'], ['no_width'], ['invalid']], }), objs: { get: vi.fn().mockImplementation((name: string, callback: (data: unknown) => void) => { if (name === 'valid_img') { callback(mockValidImage); } else if (name === 'no_data') { callback({ width: 100, height: 50, kind: 2 }); // Missing data } else if (name === 'no_width') { callback({ data: new Uint8Array([0]), height: 50, kind: 2 }); // Missing width } else if (name === 'invalid') { callback(null); // Invalid data } }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Only valid image should be extracted const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(1); const parsed = JSON.parse(result.content[0].text as string); expect(parsed.results[0].data.image_info).toHaveLength(1); }); it('should preserve Y-coordinate ordering for mixed text and images', async () => { const mockImageData = { width: 100, height: 50, data: new Uint8Array([255, 0, 0]), kind: 2, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [ { str: 'Top text', transform: [1, 0, 0, 1, 0, 200] }, // Y=200 (top) { str: 'Bottom text', transform: [1, 0, 0, 1, 0, 50] }, // Y=50 (bottom) ], }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], // One image argsArray: [['img1', [1, 0, 0, 1, 0, 150]]], // Y=150 (middle) - transform in args }), objs: { get: vi.fn().mockImplementation((_name: string, callback: (data: unknown) => void) => { callback(mockImageData); }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Content order should be: summary, top_text, image, bottom_text // (sorted by Y-coordinate descending = top to bottom) expect(result.content.length).toBe(4); expect(result.content[0].type).toBe('text'); // Summary JSON expect(result.content[1].type).toBe('text'); // Top text (Y=200) expect(result.content[1].text).toBe('Top text'); expect(result.content[2].type).toBe('image'); // Image (Y=150) expect(result.content[3].type).toBe('text'); // Bottom text (Y=50) expect(result.content[3].text).toBe('Bottom text'); }); it('should extract images from commonObjs with g_ prefix', async () => { const mockImageData = { width: 100, height: 50, data: new Uint8Array([255, 0, 0]), kind: 2, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], argsArray: [['g_image1']], // Image with g_ prefix }), objs: { get: vi.fn().mockReturnValue(undefined), // Not in objs }, commonObjs: { get: vi.fn().mockReturnValue(mockImageData), // Found in commonObjs }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Should have extracted the image from commonObjs const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(1); expect(mockPage.commonObjs.get).toHaveBeenCalledWith('g_image1'); }); it('should use sync objs.get when image is already loaded', async () => { const mockImageData = { width: 100, height: 50, data: new Uint8Array([255, 0, 0]), kind: 2, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], argsArray: [['img1']], }), objs: { get: vi.fn().mockImplementation((_name: string, callback?: (data: unknown) => void) => { // Sync call - return immediately if (!callback) { return mockImageData; } // Should not reach async callback callback(mockImageData); }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Should have extracted the image synchronously const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(1); // Verify sync call was made (without callback parameter) expect(mockPage.objs.get).toHaveBeenCalled(); }); it('should fallback to async when sync get returns undefined', async () => { const mockImageData = { width: 100, height: 50, data: new Uint8Array([255, 0, 0]), kind: 2, }; const mockPage = { getTextContent: vi.fn().mockResolvedValue({ items: [{ str: 'test', transform: [1, 0, 0, 1, 0, 100] }] }), getOperatorList: vi.fn().mockResolvedValue({ fnArray: [89], argsArray: [['img1']], }), objs: { get: vi.fn().mockImplementation((_name: string, callback?: (data: unknown) => void) => { // Sync call returns undefined if (!callback) { return undefined; } // Async callback provides the data callback(mockImageData); }), }, }; mockGetDocument.mockReset(); mockGetDocument.mockReturnValue({ promise: Promise.resolve({ numPages: 1, getMetadata: vi.fn().mockResolvedValue({ info: {}, metadata: {} }), getPage: vi.fn().mockResolvedValue(mockPage), }), }); const args = { sources: [{ path: 'test.pdf' }], include_full_text: true, include_images: true, }; const result = await handler(args); // Should have extracted the image via async callback const imageParts = result.content.filter((c) => c.type === 'image'); expect(imageParts.length).toBe(1); }); it('should handle Error (not PdfError) during processing', async () => { // Mock getDocument to throw a regular Error (not PdfError) mockGetDocument.mockReturnValue({ promise: Promise.reject(new Error('Regular error message')), }); const args = { sources: [{ path: 'error.pdf' }] }; // When all sources fail, handler now throws toolError await expect(handler(args)).rejects.toThrow(PdfError); await expect(handler(args)).rejects.toThrow('Regular error message'); }); }); // End top-level describe

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/SylphxAI/pdf-reader-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server