get_pdf_outline
Extract the table of contents or outline structure from a PDF, returning hierarchical or flat list of sections with titles and page references.
Instructions
Extract the table of contents (TOC) or outline/bookmarks structure from a PDF file. Returns hierarchical or flattened list of document sections with titles, page references, and navigation structure. Use either absolute_path for any location or relative_path for files in ~/pdf-agent/ directory.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| absolute_path | No | Absolute path to the PDF file (e.g., '/Users/john/documents/report.pdf') | |
| relative_path | No | Path relative to ~/pdf-agent/ directory (e.g., 'reports/annual.pdf') | |
| use_pdf_home | No | Use PDF agent home directory for relative paths (default: true) | |
| include_destinations | No | Resolve internal destinations to page numbers when possible (default: true) | |
| max_depth | No | Maximum nesting depth to process (1-10). Optional - limits deep hierarchies | |
| flatten_structure | No | Return flat list instead of hierarchical tree structure (default: false) |
Implementation Reference
- src/index.ts:160-172 (schema)Zod schema defining input parameters for get_pdf_outline: absolute_path, relative_path, use_pdf_home, include_destinations, max_depth, flatten_structure.
const GetPdfOutlineSchema = z.object({ absolute_path: z.string().optional(), relative_path: z.string().optional(), use_pdf_home: z.boolean().default(true), include_destinations: z.boolean().default(true), max_depth: z.coerce.number().min(1).max(10).optional(), flatten_structure: z.boolean().default(false), }).refine( (data) => (data.absolute_path && !data.relative_path) || (!data.absolute_path && data.relative_path), { message: "Exactly one of 'absolute_path' or 'relative_path' must be provided", } ); - src/index.ts:1629-1666 (registration)Tool registration in ListToolsRequestSchema: defines name 'get_pdf_outline', description, and JSON inputSchema for the tool.
{ name: "get_pdf_outline", description: "Extract the table of contents (TOC) or outline/bookmarks structure from a PDF file. Returns hierarchical or flattened list of document sections with titles, page references, and navigation structure. Use either absolute_path for any location or relative_path for files in ~/pdf-agent/ directory.", inputSchema: { type: "object", properties: { absolute_path: { type: "string", description: "Absolute path to the PDF file (e.g., '/Users/john/documents/report.pdf')", }, relative_path: { type: "string", description: "Path relative to ~/pdf-agent/ directory (e.g., 'reports/annual.pdf')", }, use_pdf_home: { type: "boolean", description: "Use PDF agent home directory for relative paths (default: true)", default: true, }, include_destinations: { type: "boolean", description: "Resolve internal destinations to page numbers when possible (default: true)", default: true, }, max_depth: { type: "number", description: "Maximum nesting depth to process (1-10). Optional - limits deep hierarchies", minimum: 1, maximum: 10, }, flatten_structure: { type: "boolean", description: "Return flat list instead of hierarchical tree structure (default: false)", default: false, }, }, }, }, - src/index.ts:2370-2457 (handler)Tool handler (CallToolRequestSchema case 'get_pdf_outline'): parses args, resolves file path, reads PDF, calls extractPdfOutline(), and returns outline result.
case "get_pdf_outline": { const { absolute_path, relative_path, use_pdf_home, include_destinations, max_depth, flatten_structure } = GetPdfOutlineSchema.parse(args); try { // Resolve the final path based on parameters let resolvedPath: string; if (use_pdf_home && relative_path) { // Use relative path from PDF agent home directory const pdfAgentHome = await ensurePdfAgentHome(); resolvedPath = join(pdfAgentHome, relative_path); } else if (absolute_path) { // Use absolute path directly if (!isAbsolute(absolute_path)) { return { content: [ { type: "text", text: JSON.stringify({ error: `Path '${absolute_path}' is not absolute. Use relative_path parameter for relative paths or provide a full absolute path.` }), }, ], }; } resolvedPath = absolute_path; } else { return { content: [ { type: "text", text: JSON.stringify({ error: `Must provide either 'absolute_path' or 'relative_path'. Examples: {"absolute_path": "/Users/john/document.pdf"} or {"relative_path": "reports/annual.pdf"}` }), }, ], }; } if (!(await fileExists(resolvedPath))) { const pathType = relative_path ? 'relative path' : 'absolute path'; const homeInfo = relative_path ? ` (resolved from ~/pdf-agent/ to ${resolvedPath})` : ''; return { content: [ { type: "text", text: JSON.stringify({ error: `PDF file not found at ${pathType} '${relative_path || absolute_path}'${homeInfo}. Please check the file path and ensure the file exists.` }), }, ], }; } // Read PDF file const pdfBuffer = await safeReadFile(resolvedPath); // Extract PDF outline const outlineResult = await extractPdfOutline(pdfBuffer, resolvedPath, { includeDestinations: include_destinations, maxDepth: max_depth, flattenStructure: flatten_structure, }); return { content: [ { type: "text", text: JSON.stringify(outlineResult, null, 2), }, ], }; } catch (e) { const providedPath = relative_path || absolute_path || 'unknown'; const pathType = relative_path ? 'relative path' : 'absolute path'; return { content: [ { type: "text", text: JSON.stringify({ error: `Error extracting PDF outline at ${pathType} '${providedPath}': ${e}. Please ensure the file is a valid PDF and check the file path.` }), }, ], }; } } - src/index.ts:1213-1278 (helper)extractPdfOutline() - Core outline extraction logic using pdfjsLib to get document outline, process items recursively, and return structured OutlineResult with summary statistics.
async function extractPdfOutline( pdfBuffer: Buffer, filePath: string, options: { includeDestinations: boolean; maxDepth?: number; flattenStructure: boolean; } ): Promise<OutlineResult> { try { log('info', `Extracting PDF outline from ${filePath}`); // Load PDF document const pdfDoc = await pdfjsLib.getDocument({ data: new Uint8Array(pdfBuffer) }).promise; // Get outline const outline = await pdfDoc.getOutline(); if (!outline || outline.length === 0) { log('info', 'PDF has no outline/bookmarks'); return { file_path: filePath, has_outline: false, outline_items: [], summary: { total_items: 0, max_depth: 0, items_with_pages: 0, items_with_urls: 0, }, }; } log('info', `Found ${outline.length} top-level outline items`); // Process outline items let processedItems = processOutlineItems( outline, 0, options.maxDepth, pdfDoc, options.includeDestinations ); // Flatten structure if requested if (options.flattenStructure) { processedItems = flattenOutlineItems(processedItems); } // Calculate statistics const summary = calculateOutlineStats(processedItems); log('info', `Processed outline: ${summary.total_items} items, max depth ${summary.max_depth}`); return { file_path: filePath, has_outline: true, outline_items: processedItems, summary, }; } catch (error) { log('error', 'Failed to extract PDF outline', { error }); throw new Error(`PDF outline extraction failed: ${error}`); } } - src/index.ts:901-966 (helper)processOutlineItems() - Recursively processes outline items to build hierarchical tree with titles, levels, page numbers, destinations, and children.
function processOutlineItems( items: any[], level: number = 0, maxDepth?: number, pdfDoc?: any, includeDestinations: boolean = true ): OutlineItem[] { if (!items || items.length === 0) { return []; } if (maxDepth !== undefined && level >= maxDepth) { return []; } const processedItems: OutlineItem[] = []; for (const item of items) { try { const outlineItem: OutlineItem = { title: item.title || '', level, bold: item.bold || false, italic: item.italic || false, }; // Add color if present if (item.color && Array.isArray(item.color) && item.color.length === 3) { outlineItem.color = item.color as [number, number, number]; } // Add URL if present if (item.url) { outlineItem.url = item.url; } // Parse destination to page number if requested if (includeDestinations && item.dest && pdfDoc) { const pageNum = parseDestination(item.dest, pdfDoc); if (pageNum !== undefined) { outlineItem.page = pageNum; } if (item.dest) { outlineItem.destination = JSON.stringify(item.dest); } } // Process children recursively if (item.items && item.items.length > 0) { outlineItem.children = processOutlineItems( item.items, level + 1, maxDepth, pdfDoc, includeDestinations ); } processedItems.push(outlineItem); } catch (error) { log('warn', `Failed to process outline item: ${item.title}`, { error }); } } return processedItems; }