index.js•50.2 kB
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
import fs from "fs/promises";
import path from "path";
import { createRequire } from "module";
import axios from "axios";
import { pdfToPng } from "pdf-to-png-converter";
import { PDFDocument } from "pdf-lib";
import crypto from "crypto";
import os from "os";
const require = createRequire(import.meta.url);
const CACHE_CONFIG = {
MAX_SIZE_MB: 500,
CACHE_DIR: path.join(os.homedir(), 'AppData', 'Local', 'pdf-ocr-cache')
};
async function fastFileHash(filePath) {
try {
const stats = await fs.stat(filePath);
const fileSize = stats.size;
const sampleSize = 64 * 1024;
const hash = crypto.createHash('md5');
hash.update(fileSize.toString());
hash.update(stats.mtimeMs.toString());
const fileHandle = await fs.open(filePath, 'r');
try {
const headSize = Math.min(sampleSize, fileSize);
const headBuffer = Buffer.alloc(headSize);
await fileHandle.read(headBuffer, 0, headSize, 0);
hash.update(headBuffer);
if (fileSize > sampleSize * 2) {
const midBuffer = Buffer.alloc(sampleSize);
const midPos = Math.floor((fileSize - sampleSize) / 2);
await fileHandle.read(midBuffer, 0, sampleSize, midPos);
hash.update(midBuffer);
}
if (fileSize > sampleSize * 3) {
const tailBuffer = Buffer.alloc(sampleSize);
const tailPos = fileSize - sampleSize;
await fileHandle.read(tailBuffer, 0, sampleSize, tailPos);
hash.update(tailBuffer);
}
return hash.digest('hex');
} finally {
await fileHandle.close();
}
} catch (error) {
throw new Error(`计算文件哈希失败: ${error.message}`);
}
}
async function ensureCacheDir() {
try {
await fs.access(CACHE_CONFIG.CACHE_DIR);
} catch {
await fs.mkdir(CACHE_CONFIG.CACHE_DIR, { recursive: true });
}
}
function getCachePath(fileHash) {
return path.join(CACHE_CONFIG.CACHE_DIR, `${fileHash}.json`);
}
async function loadCache(fileHash) {
try {
const cachePath = getCachePath(fileHash);
const cacheData = await fs.readFile(cachePath, 'utf8');
return JSON.parse(cacheData);
} catch {
return null;
}
}
async function saveCache(fileHash, cacheData) {
try {
await ensureCacheDir();
const cachePath = getCachePath(fileHash);
await fs.writeFile(cachePath, JSON.stringify(cacheData, null, 2), 'utf8');
await cleanCacheIfNeeded();
} catch (error) {
console.error(`保存缓存失败: ${error.message}`);
}
}
async function getCacheSize() {
try {
await ensureCacheDir();
const files = await fs.readdir(CACHE_CONFIG.CACHE_DIR);
let totalSize = 0;
for (const file of files) {
if (file.endsWith('.json')) {
const filePath = path.join(CACHE_CONFIG.CACHE_DIR, file);
const stats = await fs.stat(filePath);
totalSize += stats.size;
}
}
return totalSize / (1024 * 1024);
} catch {
return 0;
}
}
async function cleanCacheIfNeeded() {
try {
const currentSize = await getCacheSize();
if (currentSize > CACHE_CONFIG.MAX_SIZE_MB) {
await ensureCacheDir();
const files = await fs.readdir(CACHE_CONFIG.CACHE_DIR);
const cacheFiles = [];
for (const file of files) {
if (file.endsWith('.json')) {
const filePath = path.join(CACHE_CONFIG.CACHE_DIR, file);
const stats = await fs.stat(filePath);
cacheFiles.push({
path: filePath,
mtime: stats.mtimeMs
});
}
}
cacheFiles.sort((a, b) => a.mtime - b.mtime);
const deleteCount = Math.ceil(cacheFiles.length * 0.2);
for (let i = 0; i < deleteCount; i++) {
await fs.unlink(cacheFiles[i].path);
}
}
} catch (error) {
console.error(`清理缓存失败: ${error.message}`);
}
}
const server = new McpServer({
name: "PDF Reader",
version: "1.0.0"
});
async function loadPDF(filePath) {
try {
const stats = await fs.stat(filePath);
if (stats.size > 50 * 1024 * 1024) {
throw new Error(`文件过大: ${(stats.size / 1024 / 1024).toFixed(2)}MB。最大支持 50MB。`);
}
const dataBuffer = await fs.readFile(filePath);
const pdfParse = require("pdf-parse");
const originalConsole = {
log: console.log,
warn: console.warn,
error: console.error,
info: console.info
};
console.log = () => {};
console.warn = () => {};
console.error = () => {};
console.info = () => {};
try {
const result = await pdfParse(dataBuffer);
return result;
} finally {
console.log = originalConsole.log;
console.warn = originalConsole.warn;
console.error = originalConsole.error;
console.info = originalConsole.info;
}
} catch (error) {
throw new Error(`PDF 加载失败: ${error.message}`);
}
}
async function validatePDFPath(filePath) {
try {
await fs.access(filePath);
} catch {
throw new Error(`文件不存在: ${filePath}`);
}
if (!filePath.toLowerCase().endsWith('.pdf')) {
throw new Error(`文件必须是 .pdf 格式: ${filePath}`);
}
}
async function extractSinglePageText(filePath, pageNum) {
try {
const pdfBytes = await fs.readFile(filePath);
const pdfDoc = await PDFDocument.load(pdfBytes);
const newPdf = await PDFDocument.create();
const [copiedPage] = await newPdf.copyPages(pdfDoc, [pageNum - 1]);
newPdf.addPage(copiedPage);
const singlePageBytes = await newPdf.save();
const pdfParse = require("pdf-parse");
const originalConsole = {
log: console.log,
warn: console.warn,
error: console.error,
info: console.info
};
console.log = () => {};
console.warn = () => {};
console.error = () => {};
console.info = () => {};
try {
const data = await pdfParse(Buffer.from(singlePageBytes));
return data.text;
} finally {
console.log = originalConsole.log;
console.warn = originalConsole.warn;
console.error = originalConsole.error;
console.info = originalConsole.info;
}
} catch (error) {
throw new Error(`提取第 ${pageNum} 页文本失败: ${error.message}`);
}
}
async function pageContainsImages(filePath, pageNum) {
try {
const pdfBytes = await fs.readFile(filePath);
const pdfDoc = await PDFDocument.load(pdfBytes);
const page = pdfDoc.getPage(pageNum - 1);
const pageNode = page.node;
const resources = pageNode.Resources();
if (!resources) return false;
const xObject = resources.lookup(resources.context.obj('XObject'));
if (!xObject) return false;
const xObjectDict = xObject.dict;
if (!xObjectDict) return false;
const entries = xObjectDict.entries();
for (const [key, value] of entries) {
try {
const obj = xObjectDict.context.lookup(value);
if (obj && obj.dict) {
const subtype = obj.dict.get(obj.dict.context.obj('Subtype'));
if (subtype && subtype.toString().includes('Image')) {
return true;
}
}
} catch (e) {
}
}
return false;
} catch (error) {
return false;
}
}
function checkSinglePageQuality(text) {
if (!text || text.length < 100) return 'poor';
const alphanumericCount = (text.match(/[\w\u4e00-\u9fa5]/g) || []).length;
const ratio = alphanumericCount / text.length;
if (ratio < 0.4) return 'poor';
if (text.trim().length === 0) return 'poor';
const trimmedLength = text.trim().length;
if (trimmedLength < 300) {
const words = text.trim().split(/\s+/);
if (words.length < 30) return 'poor';
}
return 'good';
}
function checkTextQuality(text, pageCount) {
if (text.length < 50 * pageCount) return 'poor';
const alphanumericCount = (text.match(/[\w\u4e00-\u9fa5]/g) || []).length;
const ratio = alphanumericCount / text.length;
if (ratio < 0.3) return 'poor';
if (text.trim().length === 0) return 'poor';
return 'good';
}
function parseSmartPageRange(pages, totalPages) {
const pagesLower = pages.toLowerCase();
if (pagesLower === 'first') return [1];
if (pagesLower === 'last') return [totalPages];
if (pagesLower === 'all') {
return Array.from({ length: totalPages }, (_, i) => i + 1);
}
if (pages.includes('-')) {
const [start, end] = pages.split('-').map(Number);
return Array.from({ length: end - start + 1 }, (_, i) => start + i);
} else if (pages.includes(',')) {
return pages.split(',').map(Number);
} else {
return [Number(pages)];
}
}
let cachedAccessToken = null;
let tokenExpireTime = 0;
async function getBaiduAccessToken(apiKey, secretKey) {
if (cachedAccessToken && Date.now() < tokenExpireTime - 5 * 60 * 1000) {
return cachedAccessToken;
}
try {
const response = await axios.post(
'https://aip.baidubce.com/oauth/2.0/token',
null,
{
params: {
grant_type: 'client_credentials',
client_id: apiKey,
client_secret: secretKey
}
}
);
cachedAccessToken = response.data.access_token;
tokenExpireTime = Date.now() + (response.data.expires_in || 2592000) * 1000;
return cachedAccessToken;
} catch (error) {
throw new Error(`获取百度访问令牌失败: ${error.message}`);
}
}
async function baiduOCR(imageBase64, accessToken, options = {}) {
try {
const params = new URLSearchParams();
params.append('image', imageBase64);
if (options.language_type) params.append('language_type', options.language_type);
if (options.detect_direction) params.append('detect_direction', 'true');
if (options.detect_language) params.append('detect_language', 'true');
if (options.probability) params.append('probability', 'true');
if (options.paragraph) params.append('paragraph', 'true');
const response = await axios.post(
`https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=${accessToken}`,
params.toString(),
{
headers: {
'Content-Type': 'application/x-www-form-urlencoded'
}
}
);
return response.data;
} catch (error) {
throw new Error(`百度 OCR 识别失败: ${error.response?.data?.error_msg || error.message}`);
}
}
server.tool(
"read-pdf-smart",
{
file: z.string().describe("PDF 文件路径"),
pages: z.string().optional().describe("页码范围('all', 'first', 'last', '1-5', '1,3,5'),默认 'all'"),
language_type: z.enum(["CHN_ENG", "ENG", "JAP", "KOR", "FRE", "SPA", "POR", "GER", "ITA", "RUS"]).optional().describe("OCR 语言类型(如需要),默认 CHN_ENG")
},
async ({ file, pages = "all", language_type = "CHN_ENG" }) => {
const startTime = Date.now();
try {
await validatePDFPath(file);
let response = `🎯 智能 PDF 识别: ${path.basename(file)}\n\n`;
response += "📂 正在分析文件...\n";
const fileHash = await fastFileHash(file);
const pdfData = await loadPDF(file);
const pageNumbers = parseSmartPageRange(pages, pdfData.numpages);
response += ` 文件大小: ${((await fs.stat(file)).size / 1024).toFixed(2)} KB\n`;
response += ` 总页数: ${pdfData.numpages}\n`;
response += ` 处理页数: ${pageNumbers.length}\n\n`;
const cache = await loadCache(fileHash);
let textExtractCount = 0;
let ocrCount = 0;
let cacheHitCount = 0;
let actualOcrCalls = 0;
const apiKey = process.env.BAIDU_OCR_API_KEY;
const secretKey = process.env.BAIDU_OCR_SECRET_KEY;
let accessToken = null;
const results = [];
const cachePages = {};
response += "🔄 开始逐页处理...\n\n";
for (const pageNum of pageNumbers) {
let pageResult = null;
if (cache && cache.pages && cache.pages[pageNum]) {
const pageCache = cache.pages[pageNum];
if (pageCache.ocrUsed && pageCache.text) {
pageResult = {
page: pageNum,
method: 'OCR识别(缓存)💾',
text: pageCache.text,
ocrUsed: true,
cached: true,
wordsCount: pageCache.wordsCount,
direction: pageCache.direction,
language: pageCache.language
};
ocrCount++;
cacheHitCount++;
} else if (!pageCache.needOCR) {
const pageText = await extractSinglePageText(file, pageNum);
pageResult = {
page: pageNum,
method: '文本提取',
text: pageText,
ocrUsed: false,
cached: false,
charCount: pageText.length
};
textExtractCount++;
}
}
if (!pageResult) {
const hasImages = await pageContainsImages(file, pageNum);
if (hasImages) {
if (!apiKey || !secretKey) {
throw new Error("检测到图片页,但未找到百度 OCR API 凭证。请在环境变量中设置 BAIDU_OCR_API_KEY 和 BAIDU_OCR_SECRET_KEY。");
}
if (!accessToken) {
accessToken = await getBaiduAccessToken(apiKey, secretKey);
}
const pngPages = await pdfToPng(file, {
pagesToProcess: [pageNum]
});
if (pngPages && pngPages.length > 0) {
const imageBase64 = pngPages[0].content.toString('base64');
const ocrResult = await baiduOCR(imageBase64, accessToken, {
language_type,
detect_direction: true,
detect_language: true
});
const extractedText = ocrResult.words_result
? ocrResult.words_result.map(item => item.words).join('\n')
: '';
pageResult = {
page: pageNum,
method: 'OCR识别(图片页)',
text: extractedText,
ocrUsed: true,
cached: false,
wordsCount: ocrResult.words_result_num || 0,
direction: ocrResult.direction,
language: ocrResult.language
};
ocrCount++;
actualOcrCalls++;
cachePages[pageNum] = {
pageNum,
method: 'OCR识别(图片页)',
ocrUsed: true,
needOCR: true,
text: extractedText,
wordsCount: ocrResult.words_result_num || 0,
direction: ocrResult.direction,
language: ocrResult.language
};
}
} else {
const pageText = await extractSinglePageText(file, pageNum);
const textLength = pageText.trim().length;
if (textLength >= 50) {
pageResult = {
page: pageNum,
method: '文本提取',
text: pageText,
ocrUsed: false,
cached: false,
charCount: pageText.length
};
textExtractCount++;
cachePages[pageNum] = {
pageNum,
method: '文本提取',
ocrUsed: false,
needOCR: false,
text: null
};
} else {
if (!apiKey || !secretKey) {
throw new Error("页面文本提取失败(可能是扫描页),但未找到百度 OCR API 凭证。请在环境变量中设置 BAIDU_OCR_API_KEY 和 BAIDU_OCR_SECRET_KEY。");
}
if (!accessToken) {
accessToken = await getBaiduAccessToken(apiKey, secretKey);
}
const pngPages = await pdfToPng(file, {
pagesToProcess: [pageNum]
});
if (pngPages && pngPages.length > 0) {
const imageBase64 = pngPages[0].content.toString('base64');
const ocrResult = await baiduOCR(imageBase64, accessToken, {
language_type,
detect_direction: true,
detect_language: true
});
const extractedText = ocrResult.words_result
? ocrResult.words_result.map(item => item.words).join('\n')
: '';
pageResult = {
page: pageNum,
method: 'OCR识别(文本提取失败)',
text: extractedText,
ocrUsed: true,
cached: false,
wordsCount: ocrResult.words_result_num || 0,
direction: ocrResult.direction,
language: ocrResult.language
};
ocrCount++;
actualOcrCalls++;
cachePages[pageNum] = {
pageNum,
method: 'OCR识别(文本提取失败)',
ocrUsed: true,
needOCR: true,
text: extractedText,
wordsCount: ocrResult.words_result_num || 0,
direction: ocrResult.direction,
language: ocrResult.language
};
}
}
}
}
results.push(pageResult);
}
const newCache = {
fileHash,
fileName: path.basename(file),
fileSize: (await fs.stat(file)).size,
totalPages: pdfData.numpages,
processedAt: Date.now(),
pages: { ...cache?.pages, ...cachePages }
};
await saveCache(fileHash, newCache);
const processingTime = ((Date.now() - startTime) / 1000).toFixed(2);
const savedOcrCalls = ocrCount - actualOcrCalls;
const savedPercent = ocrCount > 0 ? Math.round((savedOcrCalls / ocrCount) * 100) : 0;
const estimatedCost = actualOcrCalls * 0.002;
const savedCost = savedOcrCalls * 0.002;
response += `━━━━━━━━━━━━━━━━━━━━━━━━\n\n`;
response += `📊 处理统计:\n`;
response += ` 总页数: ${pageNumbers.length}\n`;
response += ` 文本提取: ${textExtractCount} 页 ✅\n`;
response += ` OCR 识别: ${ocrCount} 页 🔄\n`;
response += ` 缓存命中: ${cacheHitCount} 页 💾\n\n`;
response += `⚡ 性能统计:\n`;
response += ` 处理时间: ${processingTime} 秒\n`;
response += ` OCR 调用次数: ${actualOcrCalls} 次\n`;
if (savedOcrCalls > 0) {
response += ` 缓存节省: ${savedOcrCalls} 次调用(节省 ${savedPercent}%)💰\n`;
}
response += `\n💰 成本估算:\n`;
response += ` 本次成本: ¥${estimatedCost.toFixed(4)}\n`;
if (savedCost > 0) {
response += ` 节省成本: ¥${savedCost.toFixed(4)}\n`;
}
response += `\n━━━━━━━━━━━━━━━━━━━━━━━━\n\n`;
results.forEach(result => {
response += `=== 第 ${result.page} 页 ===\n`;
response += `识别方式: ${result.method}\n`;
if (result.ocrUsed) {
response += `识别字数: ${result.wordsCount}\n`;
if (result.direction !== undefined) {
const directions = ['未定义', '正向', '逆时针90度', '逆时针180度', '逆时针270度'];
response += `方向: ${directions[result.direction + 1] || '未知'}\n`;
}
if (result.language !== undefined) {
const languages = ['未定义', '英文', '日文', '韩文', '中文'];
response += `语言: ${languages[result.language + 1] || '未知'}\n`;
}
} else {
response += `字符数: ${result.charCount}\n`;
}
response += `\n文本内容:\n${result.text}\n\n`;
});
return {
content: [{
type: "text",
text: response
}]
};
} catch (error) {
return {
content: [{
type: "text",
text: `智能 PDF 识别出错: ${error.message}`
}]
};
}
}
);
server.tool(
"read-pdf",
{
file: z.string().describe("PDF 文件路径"),
pages: z.string().optional().describe("页码范围(如 '1-5', '1,3,5', 'all'),默认 'all'"),
include_metadata: z.boolean().optional().describe("是否包含 PDF 元数据,默认 true"),
clean_text: z.boolean().optional().describe("是否清理和规范化文本,默认 false")
},
async ({ file, pages = "all", include_metadata = true, clean_text = false }) => {
try {
await validatePDFPath(file);
const data = await loadPDF(file);
let extractedText = data.text;
if (clean_text) {
extractedText = extractedText
.replace(/\s+/g, ' ')
.replace(/\n\s*\n/g, '\n\n')
.trim();
}
const result = {
filename: path.basename(file),
fileSize: `${((await fs.stat(file)).size / 1024).toFixed(2)} KB`,
pages: data.numpages,
text: extractedText,
metadata: include_metadata ? {
author: data.info?.Author || "未知",
title: data.info?.Title || "未知",
subject: data.info?.Subject || "未知",
creator: data.info?.Creator || "未知",
producer: data.info?.Producer || "未知",
creationDate: data.info?.CreationDate || "未知",
modificationDate: data.info?.ModDate || "未知",
keywords: data.info?.Keywords || "未知"
} : null
};
let response = `成功提取 PDF 文本: ${result.filename}\n`;
response += `文件大小: ${result.fileSize}\n`;
response += `页面数量: ${result.pages}\n`;
const imageWarnings = await detectPDFImages(data);
if (imageWarnings.length > 0) {
response += `\n🖼️ 内容警告:\n`;
imageWarnings.forEach(warning => {
response += warning + "\n";
});
response += "\n⚠️ 提示:此工具仅提取文本。如果图像包含重要信息,请使用 'ocr-pdf-auto' 或 'smart-read-pdf'。\n";
}
if (include_metadata && result.metadata) {
response += `\n元数据:\n`;
Object.entries(result.metadata).forEach(([key, value]) => {
const keyMap = {
author: '作者',
title: '标题',
subject: '主题',
creator: '创建者',
producer: '生成器',
creationDate: '创建日期',
modificationDate: '修改日期',
keywords: '关键词'
};
response += `- ${keyMap[key] || key}: ${value}\n`;
});
}
response += `\n提取的文本:\n${result.text}`;
return {
content: [{
type: "text",
text: response
}]
};
} catch (error) {
return {
content: [{
type: "text",
text: `读取 PDF 文件出错: ${error.message}`
}]
};
}
}
);
server.tool(
"search-pdf",
{
file: z.string().describe("要搜索的 PDF 文件路径"),
query: z.string().describe("要搜索的文本"),
case_sensitive: z.boolean().optional().describe("是否区分大小写,默认 false"),
whole_word: z.boolean().optional().describe("是否仅匹配完整单词,默认 false")
},
async ({ file, query, case_sensitive = false, whole_word = false }) => {
try {
await validatePDFPath(file);
const data = await loadPDF(file);
let searchText = data.text;
let searchQuery = query;
if (!case_sensitive) {
searchText = searchText.toLowerCase();
searchQuery = searchQuery.toLowerCase();
}
const results = [];
const lines = searchText.split('\n');
lines.forEach((line, index) => {
let searchLine = line;
if (whole_word) {
const regex = new RegExp(`\\b${searchQuery.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, case_sensitive ? 'g' : 'gi');
if (regex.test(line)) {
results.push({
line: index + 1,
content: line.trim(),
matches: (line.match(regex) || []).length
});
}
} else {
if (searchLine.includes(searchQuery)) {
results.push({
line: index + 1,
content: line.trim(),
matches: (searchLine.match(new RegExp(searchQuery.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g')) || []).length
});
}
}
});
let response = `在 ${path.basename(file)} 中搜索 "${query}" 的结果:\n`;
response += `找到 ${results.length} 个匹配行,共 ${results.reduce((sum, r) => sum + r.matches, 0)} 处匹配\n\n`;
if (results.length > 0) {
results.slice(0, 20).forEach(result => {
response += `第 ${result.line} 行 (${result.matches} 处匹配): ${result.content}\n`;
});
if (results.length > 20) {
response += `\n... 还有 ${results.length - 20} 个结果`;
}
} else {
response += "未找到匹配项。";
}
return {
content: [{
type: "text",
text: response
}]
};
} catch (error) {
return {
content: [{
type: "text",
text: `搜索 PDF 文件出错: ${error.message}`
}]
};
}
}
);
server.tool(
"pdf-metadata",
{
file: z.string().describe("PDF 文件路径")
},
async ({ file }) => {
try {
await validatePDFPath(file);
const data = await loadPDF(file);
const stats = await fs.stat(file);
const metadata = {
filename: path.basename(file),
fileSize: `${(stats.size / 1024).toFixed(2)} KB`,
pages: data.numpages,
author: data.info?.Author || "未知",
title: data.info?.Title || "未知",
subject: data.info?.Subject || "未知",
creator: data.info?.Creator || "未知",
producer: data.info?.Producer || "未知",
creationDate: data.info?.CreationDate || "未知",
modificationDate: data.info?.ModDate || "未知",
keywords: data.info?.Keywords || "未知",
encrypted: data.info?.IsEncrypted || false,
version: data.version || "未知"
};
let response = `PDF 元数据: ${metadata.filename}\n\n`;
const keyMap = {
filename: '文件名',
fileSize: '文件大小',
pages: '页面数',
author: '作者',
title: '标题',
subject: '主题',
creator: '创建者',
producer: '生成器',
creationDate: '创建日期',
modificationDate: '修改日期',
keywords: '关键词',
encrypted: '是否加密',
version: 'PDF 版本'
};
Object.entries(metadata).forEach(([key, value]) => {
response += `${keyMap[key] || key}: ${value}\n`;
});
return {
content: [{
type: "text",
text: response
}]
};
} catch (error) {
return {
content: [{
type: "text",
text: `读取 PDF 元数据出错: ${error.message}`
}]
};
}
}
);
server.tool(
"ocr-pdf-legacy",
{
file: z.string().describe("PDF 文件路径"),
api_key: z.string().describe("百度 OCR API Key"),
secret_key: z.string().describe("百度 OCR Secret Key"),
pages: z.string().optional().describe("页码范围(如 '1-3', '1,3,5', 'all'),默认 '1'(仅第一页)"),
language_type: z.enum(["CHN_ENG", "ENG", "JAP", "KOR", "FRE", "SPA", "POR", "GER", "ITA", "RUS"]).optional().describe("语言类型,默认 CHN_ENG"),
detect_direction: z.boolean().optional().describe("是否检测图像方向,默认 false"),
detect_language: z.boolean().optional().describe("是否检测语言,默认 false"),
paragraph: z.boolean().optional().describe("是否输出段落信息,默认 false")
},
async ({ file, api_key, secret_key, pages = "1", language_type = "CHN_ENG", detect_direction = false, detect_language = false, paragraph = false }) => {
try {
await validatePDFPath(file);
const accessToken = await getBaiduAccessToken(api_key, secret_key);
let pageNumbers = [];
if (pages.toLowerCase() === 'all') {
const pdfData = await loadPDF(file);
pageNumbers = Array.from({ length: pdfData.numpages }, (_, i) => i + 1);
} else if (pages.includes('-')) {
const [start, end] = pages.split('-').map(Number);
pageNumbers = Array.from({ length: end - start + 1 }, (_, i) => start + i);
} else if (pages.includes(',')) {
pageNumbers = pages.split(',').map(Number);
} else {
pageNumbers = [Number(pages)];
}
if (pageNumbers.length > 10) {
pageNumbers = pageNumbers.slice(0, 10);
}
const results = [];
for (const pageNum of pageNumbers) {
try {
const pngPages = await pdfToPng(file, {
pagesToProcess: [pageNum]
});
if (pngPages && pngPages.length > 0) {
const pngPage = pngPages[0];
const imageBase64 = pngPage.content.toString('base64');
const ocrResult = await baiduOCR(imageBase64, accessToken, {
language_type,
detect_direction,
detect_language,
paragraph
});
const extractedText = ocrResult.words_result
? ocrResult.words_result.map(item => item.words).join('\n')
: '';
results.push({
page: pageNum,
text: extractedText,
wordsCount: ocrResult.words_result_num || 0,
direction: ocrResult.direction,
language: ocrResult.language
});
}
} catch (pageError) {
results.push({
page: pageNum,
error: `处理第 ${pageNum} 页失败: ${pageError.message}`
});
}
}
let response = `OCR 识别结果: ${path.basename(file)}\n`;
response += `已处理 ${results.length} 页\n\n`;
results.forEach(result => {
if (result.error) {
response += `第 ${result.page} 页: ${result.error}\n\n`;
} else {
response += `=== 第 ${result.page} 页 ===\n`;
response += `识别字数: ${result.wordsCount}\n`;
if (result.direction !== undefined) {
const directions = ['未定义', '正向', '逆时针90度', '逆时针180度', '逆时针270度'];
response += `方向: ${directions[result.direction + 1] || '未知'}\n`;
}
if (result.language !== undefined) {
const languages = ['未定义', '英文', '日文', '韩文', '中文'];
response += `语言: ${languages[result.language + 1] || '未知'}\n`;
}
response += `\n文本内容:\n${result.text}\n\n`;
}
});
return {
content: [{
type: "text",
text: response
}]
};
} catch (error) {
return {
content: [{
type: "text",
text: `OCR 识别出错: ${error.message}`
}]
};
}
}
);
server.tool(
"ocr-pdf-auto-legacy",
{
file: z.string().describe("PDF 文件路径"),
pages: z.string().optional().describe("页码范围('first', 'last', 'all', '1-3', '1,3,5'),默认 'first'"),
language_type: z.enum(["CHN_ENG", "ENG", "JAP", "KOR", "FRE", "SPA", "POR", "GER", "ITA", "RUS"]).optional().describe("语言类型,默认 CHN_ENG"),
detect_direction: z.boolean().optional().describe("是否检测图像方向,默认 false"),
detect_language: z.boolean().optional().describe("是否检测语言,默认 false")
},
async ({ file, pages = "first", language_type = "CHN_ENG", detect_direction = false, detect_language = false }) => {
try {
const apiKey = process.env.BAIDU_OCR_API_KEY;
const secretKey = process.env.BAIDU_OCR_SECRET_KEY;
if (!apiKey || !secretKey) {
throw new Error("未找到百度 OCR API 凭证。请在环境变量中设置 BAIDU_OCR_API_KEY 和 BAIDU_OCR_SECRET_KEY。");
}
await validatePDFPath(file);
const accessToken = await getBaiduAccessToken(apiKey, secretKey);
const pdfData = await loadPDF(file);
const pageNumbers = parseSmartPageRange(pages, pdfData.numpages);
const limitedPages = pageNumbers.slice(0, 10);
const results = [];
for (const pageNum of limitedPages) {
try {
const pngPages = await pdfToPng(file, {
pagesToProcess: [pageNum]
});
if (pngPages && pngPages.length > 0) {
const pngPage = pngPages[0];
const imageBase64 = pngPage.content.toString('base64');
const ocrResult = await baiduOCR(imageBase64, accessToken, {
language_type,
detect_direction,
detect_language
});
const extractedText = ocrResult.words_result
? ocrResult.words_result.map(item => item.words).join('\n')
: '';
results.push({
page: pageNum,
text: extractedText,
wordsCount: ocrResult.words_result_num || 0,
direction: ocrResult.direction,
language: ocrResult.language
});
}
} catch (pageError) {
results.push({
page: pageNum,
error: `处理第 ${pageNum} 页失败: ${pageError.message}`
});
}
}
let response = `OCR 识别结果: ${path.basename(file)}\n`;
response += `已处理 ${results.length} 页\n\n`;
results.forEach(result => {
if (result.error) {
response += `第 ${result.page} 页: ${result.error}\n\n`;
} else {
response += `=== 第 ${result.page} 页 ===\n`;
response += `检测到字数: ${result.wordsCount}\n`;
if (result.direction !== undefined) {
const directions = ['未定义', '正向', '逆时针90度', '逆时针180度', '逆时针270度'];
response += `方向: ${directions[result.direction + 1] || '未知'}\n`;
}
if (result.language !== undefined) {
const languages = ['未定义', '英文', '日文', '韩文', '中文'];
response += `语言: ${languages[result.language + 1] || '未知'}\n`;
}
response += `\n文本内容:\n${result.text}\n\n`;
}
});
return {
content: [{
type: "text",
text: response
}]
};
} catch (error) {
return {
content: [{
type: "text",
text: `执行 OCR 失败: ${error.message}`
}]
};
}
}
);
async function detectPDFImages(pdfData) {
const avgCharsPerPage = pdfData.text.length / pdfData.numpages;
let warnings = [];
if (avgCharsPerPage < 100) {
warnings.push(`⚠️ 警告:文本密度极低(${Math.round(avgCharsPerPage)} 字符/页)。此 PDF 可能包含重要的图像或图表内容,但未被提取。`);
} else if (avgCharsPerPage < 300) {
warnings.push(`⚠️ 提示:文本密度较低(${Math.round(avgCharsPerPage)} 字符/页)。此 PDF 可能包含图像或图表。`);
}
const linesPerPage = pdfData.text.split('\n').length / pdfData.numpages;
if (linesPerPage < 5) {
warnings.push(`⚠️ 警告:每页文本行数很少。可能缺少重要的视觉内容。`);
}
return warnings;
}
server.tool(
"smart-read-pdf-legacy",
{
file: z.string().describe("PDF 文件路径"),
pages: z.string().optional().describe("页码范围('all', 'first', 'last', '1-5', '1,3,5'),默认 'all'"),
language_type: z.enum(["CHN_ENG", "ENG", "JAP", "KOR", "FRE", "SPA", "POR", "GER", "ITA", "RUS"]).optional().describe("OCR 语言类型(如需要),默认 CHN_ENG"),
force_ocr: z.boolean().optional().describe("强制使用 OCR,默认 false")
},
async ({ file, pages = "all", language_type = "CHN_ENG", force_ocr = false }) => {
try {
await validatePDFPath(file);
let response = `智能 PDF 阅读: ${path.basename(file)}\n\n`;
if (!force_ocr) {
try {
response += "🔍 第一步:尝试文本提取...\n";
const data = await loadPDF(file);
const quality = checkTextQuality(data.text, data.numpages);
response += ` 文本质量: ${quality}\n`;
response += ` 从 ${data.numpages} 页中提取了 ${data.text.length} 个字符\n\n`;
if (quality === 'good') {
response += "✅ 文本提取成功!使用提取的文本。\n\n";
const imageWarnings = await detectPDFImages(data);
if (imageWarnings.length > 0) {
response += "🖼️ 内容警告:\n";
imageWarnings.forEach(warning => {
response += warning + "\n";
});
response += "\n⚠️ 建议:如果此 PDF 包含重要的图表、图像或视觉内容,建议:\n";
response += " 1. 使用 OCR 工具提取图像文字\n";
response += " 2. 手动检查 PDF 以确认视觉内容\n";
response += " 3. 询问用户图像是否重要\n\n";
}
response += `文件大小: ${((await fs.stat(file)).size / 1024).toFixed(2)} KB\n`;
response += `页面数量: ${data.numpages}\n\n`;
if (data.info) {
response += `元数据:\n`;
response += `- 标题: ${data.info.Title || "未知"}\n`;
response += `- 作者: ${data.info.Author || "未知"}\n\n`;
}
response += `提取的文本:\n${data.text}`;
return {
content: [{
type: "text",
text: response
}]
};
} else {
response += "⚠️ 文本质量较差,切换到 OCR...\n\n";
}
} catch (textError) {
response += `⚠️ 文本提取失败: ${textError.message}\n`;
response += " 切换到 OCR...\n\n";
}
} else {
response += "🔧 强制 OCR 模式已启用,跳过文本提取。\n\n";
}
response += "🤖 步骤 2:使用百度 OCR...\n";
const apiKey = process.env.BAIDU_OCR_API_KEY;
const secretKey = process.env.BAIDU_OCR_SECRET_KEY;
if (!apiKey || !secretKey) {
throw new Error("未找到百度 OCR API 凭证。请在环境变量中设置 BAIDU_OCR_API_KEY 和 BAIDU_OCR_SECRET_KEY。");
}
const accessToken = await getBaiduAccessToken(apiKey, secretKey);
const pdfData = await loadPDF(file);
const pageNumbers = parseSmartPageRange(pages, pdfData.numpages);
const limitedPages = pageNumbers.slice(0, 10);
response += ` 正在处理 ${limitedPages.length} 页: ${limitedPages.join(', ')}\n\n`;
const results = [];
for (const pageNum of limitedPages) {
try {
const pngPages = await pdfToPng(file, {
pagesToProcess: [pageNum]
});
if (pngPages && pngPages.length > 0) {
const pngPage = pngPages[0];
const imageBase64 = pngPage.content.toString('base64');
const ocrResult = await baiduOCR(imageBase64, accessToken, {
language_type,
detect_direction: true,
detect_language: true
});
const extractedText = ocrResult.words_result
? ocrResult.words_result.map(item => item.words).join('\n')
: '';
results.push({
page: pageNum,
text: extractedText,
wordsCount: ocrResult.words_result_num || 0,
direction: ocrResult.direction,
language: ocrResult.language
});
}
} catch (pageError) {
results.push({
page: pageNum,
error: `处理第 ${pageNum} 页失败: ${pageError.message}`
});
}
}
response += `✅ OCR 完成!\n\n`;
results.forEach(result => {
if (result.error) {
response += `第 ${result.page} 页: ${result.error}\n\n`;
} else {
response += `=== 第 ${result.page} 页 ===\n`;
response += `检测到字数: ${result.wordsCount}\n`;
if (result.direction !== undefined) {
const directions = ['未定义', '正向', '逆时针90度', '逆时针180度', '逆时针270度'];
response += `方向: ${directions[result.direction + 1] || '未知'}\n`;
}
if (result.language !== undefined) {
const languages = ['未定义', '英文', '日文', '韩文', '中文'];
response += `语言: ${languages[result.language + 1] || '未知'}\n`;
}
response += `\n文本内容:\n${result.text}\n\n`;
}
});
return {
content: [{
type: "text",
text: response
}]
};
} catch (error) {
return {
content: [{
type: "text",
text: `智能 PDF 读取出错: ${error.message}`
}]
};
}
}
);
const transport = new StdioServerTransport();
await server.connect(transport);