read_pdf
Extract text, metadata, and page count from PDF files or URLs, with options to specify pages or ranges for targeted content retrieval using the PDF Reader MCP Server.
Instructions
Reads content/metadata from one or more PDFs (local/URL). Each source can specify pages to extract.
Input Schema
Name | Required | Description | Default |
---|---|---|---|
include_full_text | No | Include the full text content of each PDF (only if 'pages' is not specified for that source). | |
include_metadata | No | Include metadata and info objects for each PDF. | |
include_page_count | No | Include the total number of pages for each PDF. | |
sources | Yes | An array of PDF sources to process, each can optionally specify pages. |
Input Schema (JSON Schema)
{
"additionalProperties": false,
"properties": {
"include_full_text": {
"default": false,
"description": "Include the full text content of each PDF (only if 'pages' is not specified for that source).",
"type": "boolean"
},
"include_metadata": {
"default": true,
"description": "Include metadata and info objects for each PDF.",
"type": "boolean"
},
"include_page_count": {
"default": true,
"description": "Include the total number of pages for each PDF.",
"type": "boolean"
},
"sources": {
"description": "An array of PDF sources to process, each can optionally specify pages.",
"items": {
"additionalProperties": false,
"properties": {
"pages": {
"anyOf": [
{
"items": {
"exclusiveMinimum": true,
"minimum": 0,
"type": "integer"
},
"minItems": 1,
"type": "array"
},
{
"minLength": 1,
"type": "string"
}
],
"description": "Extract text only from specific pages (1-based) or ranges for *this specific source*. If provided, 'include_full_text' for the entire request is ignored for this source."
},
"path": {
"description": "Relative path to the local PDF file.",
"minLength": 1,
"type": "string"
},
"url": {
"description": "URL of the PDF file.",
"format": "uri",
"type": "string"
}
},
"type": "object"
},
"minItems": 1,
"type": "array"
}
},
"required": [
"sources"
],
"type": "object"
}