read_pdf_pages
Extract text from a PDF by specifying a page range. Provide the file path and start page; optionally set an end page. Returns the text content from those pages.
Instructions
Extract text from specific pages or page range in a PDF
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| filePath | Yes | Absolute path to the PDF file | |
| startPage | Yes | Starting page number (1-indexed) | |
| endPage | No | Ending page number (optional, defaults to startPage) |
Implementation Reference
- src/index.ts:56-77 (registration)Tool registration: defines the 'read_pdf_pages' tool with input schema requiring filePath and startPage, with optional endPage.
{ name: 'read_pdf_pages', description: 'Extract text from specific pages or page range in a PDF', inputSchema: { type: 'object', properties: { filePath: { type: 'string', description: 'Absolute path to the PDF file', }, startPage: { type: 'number', description: 'Starting page number (1-indexed)', }, endPage: { type: 'number', description: 'Ending page number (optional, defaults to startPage)', }, }, required: ['filePath', 'startPage'], }, }, - src/index.ts:218-240 (handler)Handler: dispatches the 'read_pdf_pages' tool call, constructs a PageRange from args, and delegates to extractTextFromPages() from pdf-tools.ts.
case 'read_pdf_pages': { const { filePath, startPage, endPage } = args as { filePath: string; startPage: number; endPage?: number; }; const pageRange: PageRange = { start: startPage, end: endPage ?? undefined, }; const text = await extractTextFromPages(filePath, pageRange); return { content: [ { type: 'text', text, }, ], }; } - src/pdf-tools.ts:83-108 (helper)Helper function that performs the actual PDF page text extraction using PDFParse, reading specified pages by number range.
export async function extractTextFromPages( filePath: string, pageRange: PageRange ): Promise<string> { try { const dataBuffer = await fs.readFile(filePath); const parser = new PDFParse({ data: dataBuffer }); // Get text from specific pages const start = pageRange.start; const end = pageRange.end || pageRange.start; // Create array of page numbers const pages: number[] = []; for (let i = start; i <= end; i++) { pages.push(i); } const result = await parser.getText({ partial: pages }); await parser.destroy(); return result.text; } catch (error) { throw new Error(`Failed to extract pages: ${error instanceof Error ? error.message : String(error)}`); } } - src/types.ts:19-22 (schema)Schema: PageRange type defining start and optional end page numbers used by the extractTextFromPages function.
export interface PageRange { start: number; end?: number | undefined; }