[
{
"id": "/changelog",
"title": "Changelog",
"text": "All notable changes to this project will be documented in this file.",
"path": "/changelog"
},
{
"id": "/changelog#1",
"title": "Changelog > [Unreleased]",
"text": "",
"path": "/changelog",
"section": "[Unreleased]"
},
{
"id": "/changelog#2",
"title": "Changelog > Added",
"text": "Nothing yet. Nothing yet.",
"path": "/changelog",
"section": "Added"
},
{
"id": "/changelog#3",
"title": "Changelog > [0.3.9] - 2025-04-05",
"text": "",
"path": "/changelog",
"section": "[0.3.9] - 2025-04-05"
},
{
"id": "/changelog#4",
"title": "Changelog > Fixed",
"text": "Removed artifact download/extract steps from job in workflow, as Docker build needs the full source context provided by checkout. Removed artifact download/extract steps from job in workflow, as Docker build needs the full source context provided by checkout.",
"path": "/changelog",
"section": "Fixed"
},
{
"id": "/changelog#5",
"title": "Changelog > [0.3.8] - 2025-04-05",
"text": "",
"path": "/changelog",
"section": "[0.3.8] - 2025-04-05"
},
{
"id": "/changelog#6",
"title": "Changelog > Fixed",
"text": "Removed duplicate entry in step in . Removed duplicate entry in step in .",
"path": "/changelog",
"section": "Fixed"
},
{
"id": "/changelog#7",
"title": "Changelog > [0.3.7] - 2025-04-05",
"text": "",
"path": "/changelog",
"section": "[0.3.7] - 2025-04-05"
},
{
"id": "/changelog#8",
"title": "Changelog > Fixed",
"text": "Removed explicit from Dockerfile (rely on ). Removed explicit from Dockerfile (rely on ). Explicitly set in docker build-push action. Explicitly set in docker build-push action.",
"path": "/changelog",
"section": "Fixed"
},
{
"id": "/changelog#9",
"title": "Changelog > [0.3.6] - 2025-04-05",
"text": "",
"path": "/changelog",
"section": "[0.3.6] - 2025-04-05"
},
{
"id": "/changelog#10",
"title": "Changelog > Fixed",
"text": "Explicitly added before in Dockerfile to ensure it exists before build step. Explicitly added before in Dockerfile to ensure it exists before build step.",
"path": "/changelog",
"section": "Fixed"
},
{
"id": "/changelog#11",
"title": "Changelog > [0.3.5] - 2025-04-05",
"text": "",
"path": "/changelog",
"section": "[0.3.5] - 2025-04-05"
},
{
"id": "/changelog#12",
"title": "Changelog > Fixed",
"text": "Added before build step in Dockerfile to debug not found error. Added before build step in Dockerfile to debug not found error.",
"path": "/changelog",
"section": "Fixed"
},
{
"id": "/changelog#13",
"title": "Changelog > [0.3.4] - 2025-04-05",
"text": "",
"path": "/changelog",
"section": "[0.3.4] - 2025-04-05"
},
{
"id": "/changelog#14",
"title": "Changelog > Fixed",
"text": "Explicitly specify path in Dockerfile build step ( ) to debug build failure. Explicitly specify path in Dockerfile build step ( ) to debug build failure.",
"path": "/changelog",
"section": "Fixed"
},
{
"id": "/changelog#15",
"title": "Changelog > [0.3.3] - 2025-04-05",
"text": "",
"path": "/changelog",
"section": "[0.3.3] - 2025-04-05"
},
{
"id": "/changelog#16",
"title": "Changelog > Fixed",
"text": "Changed Dockerfile build step from to to debug build failure. Changed Dockerfile build step from to to debug build failure.",
"path": "/changelog",
"section": "Fixed"
},
{
"id": "/changelog#17",
"title": "Changelog > [0.3.2] - 2025-04-05",
"text": "",
"path": "/changelog",
"section": "[0.3.2] - 2025-04-05"
},
{
"id": "/changelog#18",
"title": "Changelog > Fixed",
"text": "Simplified script in to only run (removed ) to debug Docker build failure. Simplified script in to only run (removed ) to debug Docker build failure.",
"path": "/changelog",
"section": "Fixed"
},
{
"id": "/changelog#19",
"title": "Changelog > [0.3.1] - 2025-04-05",
"text": "",
"path": "/changelog",
"section": "[0.3.1] - 2025-04-05"
},
{
"id": "/changelog#20",
"title": "Changelog > Fixed",
"text": "Attempted various fixes for GitHub Actions workflow artifact upload issue ( ). Final attempt uses fixed artifact filename in upload/download steps. Attempted various fixes for GitHub Actions workflow artifact upload issue ( ). Final attempt uses fixed artifact filename in upload/download steps.",
"path": "/changelog",
"section": "Fixed"
},
{
"id": "/changelog#21",
"title": "Changelog > [0.3.0] - 2025-04-05",
"text": "",
"path": "/changelog",
"section": "[0.3.0] - 2025-04-05"
},
{
"id": "/changelog#22",
"title": "Changelog > Added",
"text": "file based on Keep a Changelog format. file based on Keep a Changelog format. file (MIT License). file (MIT License). Improved GitHub Actions workflow ( ): Triggers on push to branch and version tags ( ). Conditionally archives build artifacts only on tag pushes. Conditionally runs and jobs only on tag pushes. Added job to automatically create GitHub Releases from tags, using for the body. Improved GitHub Actions workflow ( ): Triggers on push to branch and version tags ( ). Triggers on push to branch and version tags ( ). Conditionally archives build artifacts only on tag pushes. Conditionally archives build artifacts only on tag pushes. Conditionally runs and jobs only on tag pushes. Conditionally runs and jobs only on tag pushes. Added job to automatically create GitHub Releases from tags, using for the body. Added job to automatically create GitHub Releases from tags, using for the body. Added version headers to Memory Bank files ( , ). Added version headers to Memory Bank files ( , ).",
"path": "/changelog",
"section": "Added"
},
{
"id": "/changelog#23",
"title": "Changelog > Changed",
"text": "Bumped version from 0.2.2 to 0.3.0. Bumped version from 0.2.2 to 0.3.0.",
"path": "/changelog",
"section": "Changed"
},
{
"id": "/contributing",
"title": "Contributing to PDF Reader MCP Server",
"text": "Thank you for your interest in contributing!",
"path": "/contributing"
},
{
"id": "/contributing#1",
"title": "Contributing to PDF Reader MCP Server > How to Contribute",
"text": "We welcome contributions in various forms: Reporting Bugs: If you find a bug, please open an issue on GitHub detailing the problem, steps to reproduce, and your environment. Reporting Bugs: If you find a bug, please open an issue on GitHub detailing the problem, steps to reproduce, and your environment. Suggesting Enhancements: Have an idea for a new feature or improvement? Open an issue to discuss it. Suggesting Enhancements: Have an idea for a new feature or improvement? Open an issue to discuss it. Pull Requests: If you'd like to contribute code: Fork the repository. Create a new branch for your feature or bug fix ( or ). Make your changes, ensuring they adhere to the project's coding style and principles (see ). Add tests for any new functionality and ensure all tests pass ( ). Ensure code coverage remains high ( ). Make sure your code lints correctly ( ). Commit your changes using the Conventional Commits standard (e.g., , ). Push your branch to your fork ( ). Open a Pull Request against the branch of the original repository. Pull Requests: If you'd like to contribute code: Fork the repository. Fork the repository. Create a new branch for your feature or bug fix ( or ). Create a new branch for your feature or bug fix ( or ). Make your changes, ensuring they adhere to the project's coding style and principles (see ). Make your changes, ensuring they adhere to the project's coding style and principles (see ). Add tests for any new functionality and ensure all tests pass ( ). Add tests for any new functionality and ensure all tests pass ( ). Ensure code coverage remains high ( ). Ensure code coverage remains high ( ). Make sure your code lints correctly ( ). Make sure your code lints correctly ( ). Commit your changes using the Conventional Commits standard (e.g., , ). Commit your changes using the Conventional Commits standard (e.g., , ). Push your branch to your fork ( ). Push your branch to your fork ( ). Open a Pull Request against the branch of the original repository. Open a Pull Request against the branch of the original repository.",
"path": "/contributing",
"section": "How to Contribute"
},
{
"id": "/contributing#2",
"title": "Contributing to PDF Reader MCP Server > Development Setup",
"text": "Clone your fork. Clone your fork. Install dependencies: Install dependencies: Build the project: Build the project: Run in watch mode during development: Run in watch mode during development: Run tests: or Run tests: or",
"path": "/contributing",
"section": "Development Setup"
},
{
"id": "/contributing#3",
"title": "Contributing to PDF Reader MCP Server > Code Style",
"text": "Please ensure your code adheres to the formatting and linting rules defined in the project: Run to format your code with Prettier. Run to format your code with Prettier. Run to check for ESLint issues. Run to check for ESLint issues. Thank you for contributing!",
"path": "/contributing",
"section": "Code Style"
},
{
"id": "/",
"title": "Untitled",
"text": "",
"path": "/"
},
{
"id": "/license",
"title": "Untitled",
"text": "MIT License",
"path": "/license"
},
{
"id": "/performance",
"title": "Performance",
"text": "Performance is a key consideration for the PDF Reader MCP Server, as slow responses can negatively impact the interaction flow of AI agents.",
"path": "/performance"
},
{
"id": "/performance#1",
"title": "Performance > Core Library: ",
"text": "The server relies on Mozilla's pdf.js (specifically the distribution) for the heavy lifting of PDF parsing. This library is widely used and generally considered performant for standard PDF documents. However, performance can vary depending on: PDF Complexity: Documents with many pages, complex graphics, large embedded fonts, or non-standard structures may take longer to parse. PDF Complexity: Documents with many pages, complex graphics, large embedded fonts, or non-standard structures may take longer to parse. Requested Data: Extracting full text from a very large document will naturally take longer than just retrieving metadata or the page count. Requesting text from only a few specific pages is usually more efficient than extracting the entire text. Requested Data: Extracting full text from a very large document will naturally take longer than just retrieving metadata or the page count. Requesting text from only a few specific pages is usually more efficient than extracting the entire text. Server Resources: The performance will also depend on the CPU and memory resources available to the Node.js process running the server. Server Resources: The performance will also depend on the CPU and memory resources available to the Node.js process running the server.",
"path": "/performance",
"section": "Core Library: "
},
{
"id": "/performance#2",
"title": "Performance > Asynchronous Operations",
"text": "All potentially long-running operations, including file reading (for local PDFs), network requests (for URL PDFs), and PDF parsing itself, are handled asynchronously using . This prevents the server from blocking the Node.js event loop and allows it to handle other requests or tasks concurrently (though typically an MCP server handles one request at a time from its host).",
"path": "/performance",
"section": "Asynchronous Operations"
},
{
"id": "/performance#3",
"title": "Performance > Benchmarking (Planned)",
"text": "(Section to be added) Formal benchmarking is planned to quantify the performance characteristics of the tool under various conditions. Goals: Measure the time taken to extract metadata, page count, specific pages, and full text for PDFs of varying sizes and complexities. Measure the time taken to extract metadata, page count, specific pages, and full text for PDFs of varying sizes and complexities. Compare the performance of processing local files vs. URLs (network latency will be a factor for URLs). Compare the performance of processing local files vs. URLs (network latency will be a factor for URLs). Identify potential bottlenecks within the handler logic or the library usage. Identify potential bottlenecks within the handler logic or the library usage. Establish baseline performance metrics to track potential regressions in the future. Establish baseline performance metrics to track potential regressions in the future. Tools: We plan to use Vitest's built-in benchmarking ( function) or a dedicated library like . We plan to use Vitest's built-in benchmarking ( function) or a dedicated library like . Benchmark results will be published in this section once available.",
"path": "/performance",
"section": "Benchmarking (Planned)"
},
{
"id": "/performance#4",
"title": "Performance > Current Optimization Considerations",
"text": "Lazy Loading: The library loads pages on demand when is called. This means that if only metadata or page count is requested, the entire document's page content doesn't necessarily need to be parsed immediately. Lazy Loading: The library loads pages on demand when is called. This means that if only metadata or page count is requested, the entire document's page content doesn't necessarily need to be parsed immediately. Selective Extraction: The ability to request specific pages ( parameter) allows agents to avoid the cost of extracting text from the entire document if only a small portion is needed. Selective Extraction: The ability to request specific pages ( parameter) allows agents to avoid the cost of extracting text from the entire document if only a small portion is needed. (This section will be updated with concrete data and findings as benchmarking is performed.)",
"path": "/performance",
"section": "Current Optimization Considerations"
},
{
"id": "/principles",
"title": "Development Principles",
"text": "This project adheres to the following core principles, based on the provided TypeScript Project Development Guidelines:",
"path": "/principles"
},
{
"id": "/principles#1",
"title": "Development Principles > 1. Impact-Driven",
"text": "The primary goal is to solve the real problem of AI agents needing access to PDF content securely and efficiently. Features are added to serve this core purpose.",
"path": "/principles",
"section": "1. Impact-Driven"
},
{
"id": "/principles#2",
"title": "Development Principles > 2. Simplicity & Minimalism",
"text": "We aim for the most direct approach: A single, consolidated tool instead of multiple specific tools. A single, consolidated tool instead of multiple specific tools. Leveraging the robust library for core parsing. Leveraging the robust library for core parsing. Avoiding unnecessary abstractions. Avoiding unnecessary abstractions.",
"path": "/principles",
"section": "2. Simplicity & Minimalism"
},
{
"id": "/principles#3",
"title": "Development Principles > 3. Functional Programming Style (Influences)",
"text": "While not strictly functional, the code emphasizes: Pure helper functions where possible (like path resolution checks). Pure helper functions where possible (like path resolution checks). Minimizing side effects within core logic (parsing doesn't alter files). Minimizing side effects within core logic (parsing doesn't alter files). Using standard asynchronous patterns ( ) effectively. Using standard asynchronous patterns ( ) effectively.",
"path": "/principles",
"section": "3. Functional Programming Style (Influences)"
},
{
"id": "/principles#4",
"title": "Development Principles > 4. Minimal Dependencies",
"text": "Core functionality relies on and . Core functionality relies on and . Development dependencies are standard tools (TypeScript, ESLint, Prettier, Vitest). Development dependencies are standard tools (TypeScript, ESLint, Prettier, Vitest). Dependencies like , , provide essential validation and utility. Dependencies like , , provide essential validation and utility. Unused dependencies inherited from the template ( , ) have been removed. Unused dependencies inherited from the template ( , ) have been removed.",
"path": "/principles",
"section": "4. Minimal Dependencies"
},
{
"id": "/principles#5",
"title": "Development Principles > 5. Code Quality & Consistency",
"text": "Strict TypeScript: Using the strictest compiler options ( , etc.). Strict TypeScript: Using the strictest compiler options ( , etc.). Rigorous Linting: Employing ESLint with recommended and strict type-checked rules. Rigorous Linting: Employing ESLint with recommended and strict type-checked rules. Consistent Formatting: Enforced by Prettier. Consistent Formatting: Enforced by Prettier. Comprehensive Testing: Aiming for high test coverage (currently ~95%) using Vitest, with a 100% threshold configured. Comprehensive Testing: Aiming for high test coverage (currently ~95%) using Vitest, with a 100% threshold configured.",
"path": "/principles",
"section": "5. Code Quality & Consistency"
},
{
"id": "/principles#6",
"title": "Development Principles > 6. Security Focus",
"text": "Path traversal prevention is critical. All file paths are resolved relative to the project root and validated. Path traversal prevention is critical. All file paths are resolved relative to the project root and validated.",
"path": "/principles",
"section": "6. Security Focus"
},
{
"id": "/principles#7",
"title": "Development Principles > 7. No Sponsorship",
"text": "This project does not accept financial contributions, and all related information has been removed.",
"path": "/principles",
"section": "7. No Sponsorship"
},
{
"id": "/testing",
"title": "Testing Strategy",
"text": "Robust testing is essential for ensuring the reliability, correctness, and security of the PDF Reader MCP Server. We employ a multi-faceted testing approach using Vitest.",
"path": "/testing"
},
{
"id": "/testing#1",
"title": "Testing Strategy > Framework: Vitest",
"text": "We use Vitest as our primary testing framework. Its key advantages include: Speed: Fast execution powered by Vite. Speed: Fast execution powered by Vite. Modern Features: Supports ES Modules, TypeScript out-of-the-box. Modern Features: Supports ES Modules, TypeScript out-of-the-box. Compatibility: Familiar API similar to Jest. Compatibility: Familiar API similar to Jest. Integrated Coverage: Built-in support for code coverage analysis using or . Integrated Coverage: Built-in support for code coverage analysis using or .",
"path": "/testing",
"section": "Framework: Vitest"
},
{
"id": "/testing#2",
"title": "Testing Strategy > Goals & Approach",
"text": "Our testing strategy focuses on: High Code Coverage: Target: 100% statement, branch, function, and line coverage. Configuration: Enforced via in . Current Status: ~95%. The remaining uncovered lines are primarily in error handling paths that are difficult to trigger due to Zod's upfront validation or represent extreme edge cases. This level is currently accepted. Tool: Coverage reports generated using . High Code Coverage: Target: 100% statement, branch, function, and line coverage. Target: 100% statement, branch, function, and line coverage. Configuration: Enforced via in . Configuration: Enforced via in . Current Status: ~95%. The remaining uncovered lines are primarily in error handling paths that are difficult to trigger due to Zod's upfront validation or represent extreme edge cases. This level is currently accepted. Current Status: ~95%. The remaining uncovered lines are primarily in error handling paths that are difficult to trigger due to Zod's upfront validation or represent extreme edge cases. This level is currently accepted. Tool: Coverage reports generated using . Tool: Coverage reports generated using . Correctness & Functionality: Unit Tests: (Currently minimal, focus is on integration) Could test utility functions like in isolation. Integration Tests: The primary focus is testing the handler ( ) with mocked dependencies ( , ). These tests verify: Correct parsing of various input arguments (paths, URLs, page selections, flags). Successful extraction of full text, specific page text, metadata, and page counts. Handling of multiple sources (local and URL) within a single request. Correct formatting of the JSON response. Graceful error handling for invalid inputs (caught by Zod or handler logic). Correct error reporting for file-not-found errors. Correct error reporting for PDF loading/parsing failures (mocked). Proper handling of warnings (e.g., requested pages out of bounds). Security: Path resolution logic ( ) is tested separately ( ) to ensure it prevents path traversal and correctly handles relative paths within the project root. Correctness & Functionality: Unit Tests: (Currently minimal, focus is on integration) Could test utility functions like in isolation. Unit Tests: (Currently minimal, focus is on integration) Could test utility functions like in isolation. Integration Tests: The primary focus is testing the handler ( ) with mocked dependencies ( , ). These tests verify: Correct parsing of various input arguments (paths, URLs, page selections, flags). Successful extraction of full text, specific page text, metadata, and page counts. Handling of multiple sources (local and URL) within a single request. Correct formatting of the JSON response. Graceful error handling for invalid inputs (caught by Zod or handler logic). Correct error reporting for file-not-found errors. Correct error reporting for PDF loading/parsing failures (mocked). Proper handling of warnings (e.g., requested pages out of bounds). Integration Tests: The primary focus is testing the handler ( ) with mocked dependencies ( , ). These tests verify: Correct parsing of various input arguments (paths, URLs, page selections, flags). Correct parsing of various input arguments (paths, URLs, page selections, flags). Successful extraction of full text, specific page text, metadata, and page counts. Successful extraction of full text, specific page text, metadata, and page counts. Handling of multiple sources (local and URL) within a single request. Handling of multiple sources (local and URL) within a single request. Correct formatting of the JSON response. Correct formatting of the JSON response. Graceful error handling for invalid inputs (caught by Zod or handler logic). Graceful error handling for invalid inputs (caught by Zod or handler logic). Correct error reporting for file-not-found errors. Correct error reporting for file-not-found errors. Correct error reporting for PDF loading/parsing failures (mocked). Correct error reporting for PDF loading/parsing failures (mocked). Proper handling of warnings (e.g., requested pages out of bounds). Proper handling of warnings (e.g., requested pages out of bounds). Security: Path resolution logic ( ) is tested separately ( ) to ensure it prevents path traversal and correctly handles relative paths within the project root. Security: Path resolution logic ( ) is tested separately ( ) to ensure it prevents path traversal and correctly handles relative paths within the project root. Reliability & Consistency: Tests are designed to be independent and repeatable. Mocking is used extensively to isolate the handler logic from external factors. Reliability & Consistency: Tests are designed to be independent and repeatable. Tests are designed to be independent and repeatable. Mocking is used extensively to isolate the handler logic from external factors. Mocking is used extensively to isolate the handler logic from external factors.",
"path": "/testing",
"section": "Goals & Approach"
},
{
"id": "/testing#3",
"title": "Testing Strategy > Running Tests",
"text": "Use the following npm scripts: : Run all tests once. : Run all tests once. : Run tests in an interactive watch mode, re-running on file changes. : Run tests in an interactive watch mode, re-running on file changes. : Run all tests and generate a detailed coverage report in the directory (view in that directory for an interactive report). This command will fail if coverage thresholds are not met. : Run all tests and generate a detailed coverage report in the directory (view in that directory for an interactive report). This command will fail if coverage thresholds are not met.",
"path": "/testing",
"section": "Running Tests"
},
{
"id": "/testing#4",
"title": "Testing Strategy > Test File Structure",
"text": "Tests reside in the directory, mirroring the structure. Tests reside in the directory, mirroring the structure. Handler tests are in . Handler tests are in . Utility tests are in . Utility tests are in .",
"path": "/testing",
"section": "Test File Structure"
},
{
"id": "/testing#5",
"title": "Testing Strategy > Future Improvements",
"text": "Consider adding end-to-end tests using a test MCP client/host. Consider adding end-to-end tests using a test MCP client/host. Explore property-based testing for more robust input validation checks. Explore property-based testing for more robust input validation checks.",
"path": "/testing",
"section": "Future Improvements"
},
{
"id": "/api/README",
"title": "@sylphx/pdf-reader-mcp",
"text": "@sylphx/pdf-reader-mcp",
"path": "/api/README"
},
{
"id": "/comparison",
"title": "Comparison with Other Solutions",
"text": "When an AI agent needs to access information within PDF files, several approaches exist. Here's how the PDF Reader MCP Server compares:",
"path": "/comparison"
},
{
"id": "/design",
"title": "Design Philosophy",
"text": "The PDF Reader MCP Server is built upon several core principles:",
"path": "/design"
},
{
"id": "/guide/getting-started",
"title": "Getting Started",
"text": "This guide assumes you have an MCP client or host environment capable of launching and communicating with the PDF Reader MCP Server.",
"path": "/guide/getting-started"
},
{
"id": "/guide/getting-started#1",
"title": "Getting Started > 1. Launch the Server",
"text": "Ensure the server is launched with its working directory set to the root of the project containing the PDF files you want to access. If installed via npm/pnpm: Your MCP host might manage this automatically via . If installed via npm/pnpm: Your MCP host might manage this automatically via . If running standalone: If running standalone: If using Docker: If using Docker:",
"path": "/guide/getting-started",
"section": "1. Launch the Server"
},
{
"id": "/guide/getting-started#2",
"title": "Getting Started > 2. Using the Tool",
"text": "The server provides a single primary tool: . Tool Input Schema: The tool accepts an object with the following properties: (Array<Object>, required): An array of PDF sources to process. Each source object must contain either a or a . (string, optional): Relative path to the local PDF file within the project root. (string, optional): URL of the PDF file. (Array<number> | string, optional): Extract text only from specific pages (1-based) or ranges (e.g., ). If provided, is ignored for this source. (Array<Object>, required): An array of PDF sources to process. Each source object must contain either a or a . (string, optional): Relative path to the local PDF file within the project root. (string, optional): Relative path to the local PDF file within the project root. (string, optional): URL of the PDF file. (string, optional): URL of the PDF file. (Array<number> | string, optional): Extract text only from specific pages (1-based) or ranges (e.g., ). If provided, is ignored for this source. (Array<number> | string, optional): Extract text only from specific pages (1-based) or ranges (e.g., ). If provided, is ignored for this source. (boolean, optional, default: ): Include the full text content of each PDF (only if is not specified for that source). (boolean, optional, default: ): Include the full text content of each PDF (only if is not specified for that source). (boolean, optional, default: ): Include metadata and info objects for each PDF. (boolean, optional, default: ): Include metadata and info objects for each PDF. (boolean, optional, default: ): Include the total number of pages for each PDF. (boolean, optional, default: ): Include the total number of pages for each PDF. (See the API Reference (once generated) for the full JSON schema) Example MCP Request (Get metadata and page count for one PDF): Example MCP Request (Get text from page 2 of one PDF, full text of another):",
"path": "/guide/getting-started",
"section": "2. Using the Tool"
},
{
"id": "/guide/getting-started#3",
"title": "Getting Started > 3. Understanding the Response",
"text": "The response will be an array named , with each element corresponding to a source object in the request array. Each result object contains: (string): The original path or URL provided in the request. (string): The original path or URL provided in the request. (boolean): Indicates if processing this source was successful. (boolean): Indicates if processing this source was successful. (Object, optional): Present if is . Contains the requested data: (number, optional): Total page count (if was true). (Object, optional): PDF information dictionary (if was true). (Object, optional): PDF metadata (if was true). (Array<Object>, optional): Array of objects, each with (number) and (string), for pages where text was extracted (if was specified or was true without ). (Object, optional): Present if is . Contains the requested data: (number, optional): Total page count (if was true). (number, optional): Total page count (if was true). (Object, optional): PDF information dictionary (if was true). (Object, optional): PDF information dictionary (if was true). (Object, optional): PDF metadata (if was true). (Object, optional): PDF metadata (if was true). (Array<Object>, optional): Array of objects, each with (number) and (string), for pages where text was extracted (if was specified or was true without ). (Array<Object>, optional): Array of objects, each with (number) and (string), for pages where text was extracted (if was specified or was true without ). (Object, optional): Present if is . Contains: (string): An error code (e.g., , , , , ). (string): A description of the error. (Object, optional): Present if is . Contains: (string): An error code (e.g., , , , , ). (string): An error code (e.g., , , , , ). (string): A description of the error. (string): A description of the error. (See the API Reference (once generated) for detailed response structure and error codes.)",
"path": "/guide/getting-started",
"section": "3. Understanding the Response"
},
{
"id": "/guide",
"title": "Introduction",
"text": "Welcome to the PDF Reader MCP Server documentation!",
"path": "/guide"
},
{
"id": "/guide#1",
"title": "Introduction > What Problem Does It Solve?",
"text": "AI agents often need information from PDFs (reports, invoices, manuals). Directly feeding PDF content is impractical due to format and size. This server offers specific tools to extract: Full text content Full text content Text from specific pages Text from specific pages Metadata (author, title, etc.) Metadata (author, title, etc.) Total page count Total page count All interactions happen securely within the defined project boundaries.",
"path": "/guide",
"section": "What Problem Does It Solve?"
},
{
"id": "/guide#2",
"title": "Introduction > Core Principles",
"text": "Security: Confined file access. Security: Confined file access. Efficiency: Structured data retrieval, avoiding large raw content transfer. Efficiency: Structured data retrieval, avoiding large raw content transfer. Simplicity: Easy integration into MCP-enabled agent workflows. Simplicity: Easy integration into MCP-enabled agent workflows.",
"path": "/guide",
"section": "Core Principles"
},
{
"id": "/guide/installation",
"title": "Installation",
"text": "Node.js (>= 18.0.0 recommended)",
"path": "/guide/installation"
},
{
"id": "/guide/installation#0",
"title": "Installation > Prerequisites",
"text": "Node.js (>= 18.0.0 recommended) Node.js (>= 18.0.0 recommended) npm (comes with Node.js) npm (comes with Node.js)",
"path": "/guide/installation",
"section": "Prerequisites"
},
{
"id": "/guide/installation#1",
"title": "Installation > Using npm (Recommended)",
"text": "To use the server in your project or MCP host environment, install it as a dependency:",
"path": "/guide/installation",
"section": "Using npm (Recommended)"
},
{
"id": "/guide/installation#2",
"title": "Installation > Running Standalone (for testing/development)",
"text": "Clone the repository: Clone the repository: Install dependencies: Install dependencies: Build the project: Build the project: Run the server: \nThe server communicates via stdio. You'll typically run it from an MCP host. Important: Ensure you run this command from the root directory of the project containing the PDFs you want the server to access. Run the server: \nThe server communicates via stdio. You'll typically run it from an MCP host. Important: Ensure you run this command from the root directory of the project containing the PDFs you want the server to access.",
"path": "/guide/installation",
"section": "Running Standalone (for testing/development)"
},
{
"id": "/guide/installation#3",
"title": "Installation > Using Docker",
"text": "A Docker image is available on Docker Hub. To run the container, you need to mount the project directory containing your PDFs into the container's working directory ( ): Replace with the actual absolute path to your project folder.",
"path": "/guide/installation",
"section": "Using Docker"
},
{
"id": "/performance",
"title": "Performance",
"text": "Performance is an important consideration for the PDF Reader MCP Server, especially when dealing with large or complex PDF documents. This page outlines the benchmarking approach and presents results from initial tests.",
"path": "/performance"
},
{
"id": "/performance#1",
"title": "Performance > Benchmarking Setup",
"text": "Benchmarks are conducted using the Vitest testing framework's built-in functionality. The tests measure the number of operations per second (hz) for different scenarios using the handler. Environment: Node.js (latest LTS), Windows 11 (as per user environment) Environment: Node.js (latest LTS), Windows 11 (as per user environment) Test File: A sample PDF located at . The exact characteristics of this file (size, page count, complexity) will influence the results. Test File: A sample PDF located at . The exact characteristics of this file (size, page count, complexity) will influence the results. Methodology: Each scenario is run for a fixed duration (1000ms) to determine the average operations per second. The benchmark code can be found in . Methodology: Each scenario is run for a fixed duration (1000ms) to determine the average operations per second. The benchmark code can be found in .",
"path": "/performance",
"section": "Benchmarking Setup"
},
{
"id": "/performance#2",
"title": "Performance > Initial Benchmark Results",
"text": "The following results were obtained on 2025-04-07 using the setup described above: (Higher hz indicates better performance) Interpretation: Handling errors for non-existent files is the fastest operation as it involves minimal I/O and no PDF parsing. Handling errors for non-existent files is the fastest operation as it involves minimal I/O and no PDF parsing. Extracting the full text was slightly faster than extracting specific pages or just metadata/page count in this particular test run. This might be influenced by the specific structure of and potential caching mechanisms within the library. Extracting the full text was slightly faster than extracting specific pages or just metadata/page count in this particular test run. This might be influenced by the specific structure of and potential caching mechanisms within the library. Extracting only metadata and page count was slightly slower than full text extraction for this file. Extracting only metadata and page count was slightly slower than full text extraction for this file. Note: These results are specific to the file and the testing environment used. Performance with different PDFs (varying sizes, complexities, versions, or structures) may differ significantly.",
"path": "/performance",
"section": "Initial Benchmark Results"
},
{
"id": "/performance#3",
"title": "Performance > Future Benchmarking Goals",
"text": "Further benchmarks are planned to measure: Parsing Time: Time taken to load and parse PDFs of varying sizes (e.g., 1 page, 10 pages, 100 pages, 1000 pages). Parsing Time: Time taken to load and parse PDFs of varying sizes (e.g., 1 page, 10 pages, 100 pages, 1000 pages). Text Extraction Speed: More detailed analysis across different page ranges and document structures. Text Extraction Speed: More detailed analysis across different page ranges and document structures. Memory Usage: Peak memory consumption during processing of different PDF sizes. Memory Usage: Peak memory consumption during processing of different PDF sizes. URL vs. Local File: Performance difference between processing local files and downloading/processing from URLs. URL vs. Local File: Performance difference between processing local files and downloading/processing from URLs. Comparison: Comparison with other PDF processing methods or libraries, if applicable. Comparison: Comparison with other PDF processing methods or libraries, if applicable. Results will be updated here as more comprehensive testing is completed.",
"path": "/performance",
"section": "Future Benchmarking Goals"
}
]