get_pdf_outline

Extract the table of contents or outline structure from a PDF, returning hierarchical or flat list of sections with titles and page references.

Instructions

Extract the table of contents (TOC) or outline/bookmarks structure from a PDF file. Returns hierarchical or flattened list of document sections with titles, page references, and navigation structure. Use either absolute_path for any location or relative_path for files in ~/pdf-agent/ directory.

Input Schema

TableJSON Schema

Name	Required	Description
`absolute_path`	No	Absolute path to the PDF file (e.g., '/Users/john/documents/report.pdf')
`relative_path`	No	Path relative to ~/pdf-agent/ directory (e.g., 'reports/annual.pdf')
`use_pdf_home`	No	Use PDF agent home directory for relative paths (default: true)
`include_destinations`	No	Resolve internal destinations to page numbers when possible (default: true)
`max_depth`	No	Maximum nesting depth to process (1-10). Optional - limits deep hierarchies
`flatten_structure`	No	Return flat list instead of hierarchical tree structure (default: false)

Implementation Reference

src/index.ts:160-172 (schema)

Zod schema defining input parameters for get_pdf_outline: absolute_path, relative_path, use_pdf_home, include_destinations, max_depth, flatten_structure.

const GetPdfOutlineSchema = z.object({
  absolute_path: z.string().optional(),
  relative_path: z.string().optional(),
  use_pdf_home: z.boolean().default(true),
  include_destinations: z.boolean().default(true),
  max_depth: z.coerce.number().min(1).max(10).optional(),
  flatten_structure: z.boolean().default(false),
}).refine(
  (data) => (data.absolute_path && !data.relative_path) || (!data.absolute_path && data.relative_path),
  {
    message: "Exactly one of 'absolute_path' or 'relative_path' must be provided",
  }
);

src/index.ts:1629-1666 (registration)

Tool registration in ListToolsRequestSchema: defines name 'get_pdf_outline', description, and JSON inputSchema for the tool.

{
  name: "get_pdf_outline",
  description: "Extract the table of contents (TOC) or outline/bookmarks structure from a PDF file. Returns hierarchical or flattened list of document sections with titles, page references, and navigation structure. Use either absolute_path for any location or relative_path for files in ~/pdf-agent/ directory.",
  inputSchema: {
    type: "object",
    properties: {
      absolute_path: {
        type: "string",
        description: "Absolute path to the PDF file (e.g., '/Users/john/documents/report.pdf')",
      },
      relative_path: {
        type: "string",
        description: "Path relative to ~/pdf-agent/ directory (e.g., 'reports/annual.pdf')",
      },
      use_pdf_home: {
        type: "boolean",
        description: "Use PDF agent home directory for relative paths (default: true)",
        default: true,
      },
      include_destinations: {
        type: "boolean",
        description: "Resolve internal destinations to page numbers when possible (default: true)",
        default: true,
      },
      max_depth: {
        type: "number",
        description: "Maximum nesting depth to process (1-10). Optional - limits deep hierarchies",
        minimum: 1,
        maximum: 10,
      },
      flatten_structure: {
        type: "boolean",
        description: "Return flat list instead of hierarchical tree structure (default: false)",
        default: false,
      },
    },
  },
},

src/index.ts:2370-2457 (handler)

Tool handler (CallToolRequestSchema case 'get_pdf_outline'): parses args, resolves file path, reads PDF, calls extractPdfOutline(), and returns outline result.

case "get_pdf_outline": {
  const { absolute_path, relative_path, use_pdf_home, include_destinations, max_depth, flatten_structure } = GetPdfOutlineSchema.parse(args);
  
  try {
    // Resolve the final path based on parameters
    let resolvedPath: string;
    
    if (use_pdf_home && relative_path) {
      // Use relative path from PDF agent home directory
      const pdfAgentHome = await ensurePdfAgentHome();
      resolvedPath = join(pdfAgentHome, relative_path);
    } else if (absolute_path) {
      // Use absolute path directly
      if (!isAbsolute(absolute_path)) {
        return {
          content: [
            {
              type: "text",
              text: JSON.stringify({ 
                error: `Path '${absolute_path}' is not absolute. Use relative_path parameter for relative paths or provide a full absolute path.` 
              }),
            },
          ],
        };
      }
      resolvedPath = absolute_path;
    } else {
      return {
        content: [
          {
            type: "text",
            text: JSON.stringify({ 
              error: `Must provide either 'absolute_path' or 'relative_path'. Examples: {"absolute_path": "/Users/john/document.pdf"} or {"relative_path": "reports/annual.pdf"}` 
            }),
          },
        ],
      };
    }
    
    if (!(await fileExists(resolvedPath))) {
      const pathType = relative_path ? 'relative path' : 'absolute path';
      const homeInfo = relative_path ? ` (resolved from ~/pdf-agent/ to ${resolvedPath})` : '';
      return {
        content: [
          {
            type: "text",
            text: JSON.stringify({ 
              error: `PDF file not found at ${pathType} '${relative_path || absolute_path}'${homeInfo}. Please check the file path and ensure the file exists.` 
            }),
          },
        ],
      };
    }
    
    // Read PDF file
    const pdfBuffer = await safeReadFile(resolvedPath);
    
    // Extract PDF outline
    const outlineResult = await extractPdfOutline(pdfBuffer, resolvedPath, {
      includeDestinations: include_destinations,
      maxDepth: max_depth,
      flattenStructure: flatten_structure,
    });
    
    return {
      content: [
        {
          type: "text",
          text: JSON.stringify(outlineResult, null, 2),
        },
      ],
    };
    
  } catch (e) {
    const providedPath = relative_path || absolute_path || 'unknown';
    const pathType = relative_path ? 'relative path' : 'absolute path';
    return {
      content: [
        {
          type: "text",
          text: JSON.stringify({ 
            error: `Error extracting PDF outline at ${pathType} '${providedPath}': ${e}. Please ensure the file is a valid PDF and check the file path.` 
          }),
        },
      ],
    };
  }
}

src/index.ts:1213-1278 (helper)

extractPdfOutline() - Core outline extraction logic using pdfjsLib to get document outline, process items recursively, and return structured OutlineResult with summary statistics.

async function extractPdfOutline(
  pdfBuffer: Buffer, 
  filePath: string,
  options: {
    includeDestinations: boolean;
    maxDepth?: number;
    flattenStructure: boolean;
  }
): Promise<OutlineResult> {
  try {
    log('info', `Extracting PDF outline from ${filePath}`);
    
    // Load PDF document
    const pdfDoc = await pdfjsLib.getDocument({ data: new Uint8Array(pdfBuffer) }).promise;
    
    // Get outline
    const outline = await pdfDoc.getOutline();
    
    if (!outline || outline.length === 0) {
      log('info', 'PDF has no outline/bookmarks');
      return {
        file_path: filePath,
        has_outline: false,
        outline_items: [],
        summary: {
          total_items: 0,
          max_depth: 0,
          items_with_pages: 0,
          items_with_urls: 0,
        },
      };
    }
    
    log('info', `Found ${outline.length} top-level outline items`);
    
    // Process outline items
    let processedItems = processOutlineItems(
      outline, 
      0, 
      options.maxDepth, 
      pdfDoc, 
      options.includeDestinations
    );
    
    // Flatten structure if requested
    if (options.flattenStructure) {
      processedItems = flattenOutlineItems(processedItems);
    }
    
    // Calculate statistics
    const summary = calculateOutlineStats(processedItems);
    
    log('info', `Processed outline: ${summary.total_items} items, max depth ${summary.max_depth}`);
    
    return {
      file_path: filePath,
      has_outline: true,
      outline_items: processedItems,
      summary,
    };
    
  } catch (error) {
    log('error', 'Failed to extract PDF outline', { error });
    throw new Error(`PDF outline extraction failed: ${error}`);
  }
}

src/index.ts:901-966 (helper)

processOutlineItems() - Recursively processes outline items to build hierarchical tree with titles, levels, page numbers, destinations, and children.

function processOutlineItems(
  items: any[], 
  level: number = 0,
  maxDepth?: number,
  pdfDoc?: any,
  includeDestinations: boolean = true
): OutlineItem[] {
  if (!items || items.length === 0) {
    return [];
  }
  
  if (maxDepth !== undefined && level >= maxDepth) {
    return [];
  }
  
  const processedItems: OutlineItem[] = [];
  
  for (const item of items) {
    try {
      const outlineItem: OutlineItem = {
        title: item.title || '',
        level,
        bold: item.bold || false,
        italic: item.italic || false,
      };
      
      // Add color if present
      if (item.color && Array.isArray(item.color) && item.color.length === 3) {
        outlineItem.color = item.color as [number, number, number];
      }
      
      // Add URL if present
      if (item.url) {
        outlineItem.url = item.url;
      }
      
      // Parse destination to page number if requested
      if (includeDestinations && item.dest && pdfDoc) {
        const pageNum = parseDestination(item.dest, pdfDoc);
        if (pageNum !== undefined) {
          outlineItem.page = pageNum;
        }
        if (item.dest) {
          outlineItem.destination = JSON.stringify(item.dest);
        }
      }
      
      // Process children recursively
      if (item.items && item.items.length > 0) {
        outlineItem.children = processOutlineItems(
          item.items, 
          level + 1, 
          maxDepth, 
          pdfDoc, 
          includeDestinations
        );
      }
      
      processedItems.push(outlineItem);
    } catch (error) {
      log('warn', `Failed to process outline item: ${item.title}`, { error });
    }
  }
  
  return processedItems;
}

PDF Agent MCP

get_pdf_outline

Instructions

Input Schema

Implementation Reference

Tool Definition Quality

Other Tools

Latest Blog Posts

MCP directory API