document_reader
Read extractable text from non-image document files including PDF, DOCX, TXT, HTML, and CSV at specified paths.
Instructions
Read content from non-image document-files at specified paths, supporting various file formats: .pdf, .docx, .txt, .html, .csv
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| filePath | Yes | Path to the file to be read |
Implementation Reference
- src/tools/documentReader.ts:83-118 (handler)Main handler function 'readFile' that dispatches to format-specific readers (.pdf, .docx, .txt, .html, .csv) based on file extension.
export async function readFile(filePath: string) { try { const ext = path.extname(filePath).toLowerCase(); let content: string; switch (ext) { case ".pdf": content = await readPDFFile(filePath); break; case ".docx": content = await readDocxFile(filePath); break; case ".txt": content = await readTextFile(filePath); break; case ".html": content = await readHTMLFile(filePath); break; case ".csv": content = await readCSVFile(filePath); break; default: throw new Error(`Unsupported file format: ${ext}`); } return { success: true, data: content, }; } catch (error) { return { success: false, error: error instanceof Error ? error.message : "Unknown error", }; } } - src/tools/documentReader.ts:38-40 (helper)Helper function to read plain text (.txt) files.
async function readTextFile(filePath: string): Promise<string> { return await fs.readFile(filePath, "utf-8"); } - src/tools/documentReader.ts:42-59 (helper)Helper function to read PDF files using pdfreader library.
async function readPDFFile(filePath: string): Promise<string> { const buffer = await fs.readFile(filePath); return new Promise((resolve, reject) => { let content = ""; const reader = new PdfReader(); reader.parseBuffer(buffer, ((err: null | Error, item: Item | undefined) => { if (err) { reject(err); } else if (!item) { resolve(content); } else if (item.text) { content += item.text + " "; } }) as ItemHandler); }); } - src/tools/documentReader.ts:61-65 (helper)Helper function to read DOCX files using mammoth library.
async function readDocxFile(filePath: string): Promise<string> { const buffer = await fs.readFile(filePath); const result = await mammoth.extractRawText({ buffer }); return result.value; } - src/tools/documentReader.ts:77-81 (helper)Helper function to read HTML files using JSDOM to extract text content.
async function readHTMLFile(filePath: string): Promise<string> { const content = await fs.readFile(filePath, "utf-8"); const dom = new JSDOM(content); return dom.window.document.body.textContent || ""; } - src/tools/documentReader.ts:13-27 (schema)Input schema and type definitions for the document_reader tool, requiring a 'filePath' string property.
inputSchema: { type: "object", properties: { filePath: { type: "string", description: "Path to the file to be read", }, }, required: ["filePath"], }, }; export interface FileReaderArgs { filePath: string; } - src/tools/documentReader.ts:9-23 (registration)Tool definition object DOCUMENT_READER_TOOL with name 'document_reader', description, and inputSchema.
export const DOCUMENT_READER_TOOL: Tool = { name: "document_reader", description: "Read content from non-image document-files at specified paths, supporting various file formats: .pdf, .docx, .txt, .html, .csv", inputSchema: { type: "object", properties: { filePath: { type: "string", description: "Path to the file to be read", }, }, required: ["filePath"], }, }; - src/tools/_index.ts:1-9 (registration)Re-export of DOCUMENT_READER_TOOL and tool registration in the tools array and barrel export.
import { DOCUMENT_READER_TOOL } from "./documentReader.js"; import { DOCX_TO_HTML_TOOL, DOCX_TO_PDF_TOOL } from "./docxTools.js"; import { EXCEL_READ_TOOL } from "./excelTools.js"; import { FORMAT_CONVERTER_TOOL } from "./formatConverterPlus.js"; import { HTML_CLEAN_TOOL, HTML_EXTRACT_RESOURCES_TOOL, HTML_FORMAT_TOOL, HTML_TO_MARKDOWN_TOOL, HTML_TO_TEXT_TOOL } from "./htmlTools.js"; import { PDF_MERGE_TOOL, PDF_SPLIT_TOOL } from "./pdfTools.js"; import { TEXT_DIFF_TOOL, TEXT_ENCODING_CONVERT_TOOL, TEXT_FORMAT_TOOL, TEXT_SPLIT_TOOL } from "./txtTools.js"; export const tools = [DOCUMENT_READER_TOOL, PDF_MERGE_TOOL, PDF_SPLIT_TOOL, DOCX_TO_PDF_TOOL, DOCX_TO_HTML_TOOL, HTML_CLEAN_TOOL, HTML_TO_TEXT_TOOL, HTML_TO_MARKDOWN_TOOL, HTML_EXTRACT_RESOURCES_TOOL, HTML_FORMAT_TOOL, TEXT_DIFF_TOOL, TEXT_SPLIT_TOOL, TEXT_FORMAT_TOOL, TEXT_ENCODING_CONVERT_TOOL, EXCEL_READ_TOOL, FORMAT_CONVERTER_TOOL]; - src/index.ts:59-75 (handler)Server request handler that validates args and calls readFile() for the 'document_reader' tool name.
if (name === "document_reader") { if (!isFileReaderArgs(args)) { throw new Error("Invalid arguments for document_reader"); } const result = await readFile(args.filePath); if (!result.success) { return { content: [{ type: "text", text: `Error: ${result.error}` }], isError: true, }; } return { content: [{ type: "text", text: result.data }], isError: false, }; }