import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { z } from "zod";
import axios from "axios";
// Configuration schema for Mineru API
export const configSchema = z.object({
mineruApiKey: z.string().describe("Mineru API Bearer token"),
mineruBaseUrl: z.string().default("https://mineru.net/api/v4").describe("Mineru API base URL"),
});
// Error codes mapping
const ERROR_CODES = {
A0202: { message: "Token错误", suggestion: "检查Token是否正确,或者更换新Token" },
A0211: { message: "Token过期", suggestion: "更换新Token" },
"-500": { message: "传参错误", suggestion: "请确保参数类型及Content-Type正确" },
"-10001": { message: "服务异常", suggestion: "请稍后再试" },
"-10002": { message: "请求参数错误", suggestion: "检查请求参数格式" },
"-60001": { message: "生成上传URL失败", suggestion: "请稍后再试" },
"-60002": { message: "获取匹配的文件格式失败", suggestion: "检测文件类型失败,请求的文件名及链接中带有正确的后缀名,且文件为pdf,doc,docx,ppt,pptx,png,jp(e)g中的一种" },
"-60003": { message: "文件读取失败", suggestion: "请检查文件是否损坏并重新上传" },
"-60004": { message: "空文件", suggestion: "请上传有效文件" },
"-60005": { message: "文件大小超出限制", suggestion: "检查文件大小,最大支持200MB" },
"-60006": { message: "文件页数超过限制", suggestion: "请拆分文件后重试" },
"-60007": { message: "模型服务暂时不可用", suggestion: "请稍后重试或联系技术支持" },
"-60008": { message: "文件读取超时", suggestion: "检查URL可访问" },
"-60009": { message: "任务提交队列已满", suggestion: "请稍后再试" },
"-60010": { message: "解析失败", suggestion: "请稍后再试" },
"-60011": { message: "获取有效文件失败", suggestion: "请确保文件已上传" },
"-60012": { message: "找不到任务", suggestion: "请确保task_id有效且未删除" },
"-60013": { message: "没有权限访问该任务", suggestion: "只能访问自己提交的任务" },
"-60014": { message: "删除运行中的任务", suggestion: "运行中的任务暂不支持删除" },
"-60015": { message: "文件转换失败", suggestion: "可以手动转为pdf再上传" },
"-60016": { message: "文件转换失败", suggestion: "文件转换为指定格式失败,可以尝试其他格式导出或重试" }
} as const;
// Types for Mineru API
interface MineruTaskRequest {
url: string;
is_ocr?: boolean;
enable_formula?: boolean;
enable_table?: boolean;
language?: string;
callback?: string;
seed?: string;
extra_formats?: string[];
page_ranges?: string;
model_version?: string;
}
interface MineruBatchRequest {
enable_formula?: boolean;
enable_table?: boolean;
language?: string;
files: Array<{
name: string;
is_ocr?: boolean;
page_ranges?: string;
}>;
callback?: string;
seed?: string;
extra_formats?: string[];
model_version?: string;
}
interface MineruResponse<T> {
code: number;
msg: string;
trace_id: string;
data: T;
}
interface TaskResponse {
task_id: string;
}
interface TaskStatusResponse {
task_id: string;
data_id?: string;
state: "pending" | "running" | "done" | "failed" | "converting";
full_zip_url?: string;
err_msg?: string;
extract_progress?: {
extracted_pages: number;
total_pages: number;
start_time: string;
};
}
interface BatchResponse {
batch_id: string;
file_urls: string[];
}
interface MineruBatchUrlRequest {
enable_formula?: boolean;
enable_table?: boolean;
language?: string;
files: Array<{
url: string;
is_ocr?: boolean;
page_ranges?: string;
}>;
callback?: string;
seed?: string;
extra_formats?: string[];
model_version?: string;
}
interface BatchUrlResponse {
batch_id: string;
}
export default function createStatelessServer({
config,
}: {
config: z.infer<typeof configSchema>;
}) {
const server = new McpServer({
name: "Mineru Document Parser",
version: "1.0.0",
});
// Helper function to make authenticated requests to Mineru API
const makeMineruRequest = async <T>(
endpoint: string,
method: "GET" | "POST" = "GET",
data?: any
): Promise<T> => {
const url = `${config.mineruBaseUrl}${endpoint}`;
const headers = {
"Content-Type": "application/json",
"Authorization": `Bearer ${config.mineruApiKey}`,
};
try {
const response = await axios({
method,
url,
headers,
data,
});
// Check if the response indicates an error
if (response.data && response.data.code !== 0) {
const errorCode = response.data.code?.toString();
const errorInfo = ERROR_CODES[errorCode as keyof typeof ERROR_CODES];
if (errorInfo) {
throw new Error(`Mineru API Error ${errorCode}: ${errorInfo.message}\n\n解决建议: ${errorInfo.suggestion}\n\nTrace ID: ${response.data.trace_id || 'N/A'}`);
} else {
throw new Error(`Mineru API Error ${errorCode}: ${response.data.msg || 'Unknown error'}\n\nTrace ID: ${response.data.trace_id || 'N/A'}`);
}
}
return response.data;
} catch (error: any) {
// If it's already a formatted error, re-throw it
if (error.message && error.message.includes('Mineru API Error')) {
throw error;
}
// Handle axios errors
if (error.response) {
const errorCode = error.response.data?.code?.toString();
const errorInfo = ERROR_CODES[errorCode as keyof typeof ERROR_CODES];
if (errorInfo) {
throw new Error(`Mineru API Error ${errorCode}: ${errorInfo.message}\n\n解决建议: ${errorInfo.suggestion}\n\nTrace ID: ${error.response.data?.trace_id || 'N/A'}`);
} else {
throw new Error(`HTTP ${error.response.status}: ${error.response.data?.msg || error.message}\n\nTrace ID: ${error.response.data?.trace_id || 'N/A'}`);
}
}
throw new Error(`Mineru API request failed: ${error.message}`);
}
};
// Tool 1: Create single file parsing task
server.tool(
"create_parsing_task",
"Create a document parsing task for a single remote file. This tool submits a document URL to Mineru for intelligent parsing, extracting text, tables, formulas, and structure. The document will be converted to markdown and JSON formats by default, with optional additional formats like DOCX, HTML, or LaTeX. Supports OCR for image-based documents and can handle multiple languages.",
{
url: z.string().describe("File URL to parse (supports PDF, DOC, DOCX, PPT, PPTX, PNG, JPG, JPEG)"),
extra_formats: z.array(z.enum(["docx", "html", "latex"])).default(["html"]).describe("Additional export formats, default html"),
},
async (params) => {
const requestData: MineruTaskRequest = {
url: params.url,
enable_formula: true,
enable_table: true,
language: "auto",
model_version: "v2",
extra_formats: params.extra_formats,
};
const response = await makeMineruRequest<MineruResponse<TaskResponse>>(
"/extract/task",
"POST",
requestData
);
return {
content: [
{
type: "text",
text: `Document parsing task created successfully!\n\nTask ID: ${response.data.task_id}\nTrace ID: ${response.trace_id}\n\nUse the task ID to check the parsing status.`,
},
],
};
}
);
// Tool 2: Get task status and results
server.tool(
"get_task_status",
"Check the status and retrieve results of a document parsing task. This tool monitors the parsing progress, showing current status (pending, running, done, failed), progress information for running tasks, and download URLs for completed tasks. Use this to track your document parsing workflow and access the final parsed content.",
{
task_id: z.string().describe("Task ID returned from create_parsing_task"),
},
async ({ task_id }) => {
const response = await makeMineruRequest<MineruResponse<TaskStatusResponse>>(
`/extract/task/${task_id}`
);
const task = response.data;
let statusText = `Task Status: ${task.state}\nTask ID: ${task.task_id}`;
if (task.data_id) {
statusText += `\nData ID: ${task.data_id}`;
}
if (task.state === "running" && task.extract_progress) {
const progress = task.extract_progress;
statusText += `\n\nProgress: ${progress.extracted_pages}/${progress.total_pages} pages processed`;
statusText += `\nStart Time: ${progress.start_time}`;
} else if (task.state === "done" && task.full_zip_url) {
statusText += `\n\n✅ Parsing completed successfully!`;
statusText += `\nDownload URL: ${task.full_zip_url}`;
} else if (task.state === "failed" && task.err_msg) {
statusText += `\n\n❌ Parsing failed: ${task.err_msg}`;
}
statusText += `\n\nTrace ID: ${response.trace_id}`;
return {
content: [{ type: "text", text: statusText }],
};
}
);
// Tool 3: Create batch file parsing task
server.tool(
"create_batch_parsing_task",
"Create a batch document parsing task for multiple local files. This tool generates upload URLs for local files that need to be parsed. After creating the batch task, you'll receive upload URLs for each file. Upload your files to these URLs using PUT requests, and the system will automatically start parsing once all files are uploaded. This is ideal for processing multiple local documents efficiently.",
{
files: z.array(z.object({
name: z.string().describe("File name (supports PDF, DOC, DOCX, PPT, PPTX, PNG, JPG, JPEG)"),
})).describe("Array of files to parse"),
extra_formats: z.array(z.enum(["docx", "html", "latex"])).default(["html"]).describe("Additional export formats, default html"),
},
async (params) => {
const requestData: MineruBatchRequest = {
files: params.files,
enable_formula: true,
enable_table: true,
language: "auto",
model_version: "v2",
extra_formats: params.extra_formats,
};
const response = await makeMineruRequest<MineruResponse<BatchResponse>>(
"/file-urls/batch",
"POST",
requestData
);
let resultText = `Batch parsing task created successfully!\n\nBatch ID: ${response.data.batch_id}`;
resultText += `\nNumber of files: ${response.data.file_urls.length}`;
resultText += `\n\nUpload URLs:\n`;
response.data.file_urls.forEach((url, index) => {
resultText += `${index + 1}. ${url}\n`;
});
resultText += `\nTrace ID: ${response.trace_id}`;
resultText += `\n\nNote: Upload your files to these URLs using PUT requests. The system will automatically start parsing once all files are uploaded.`;
return {
content: [{ type: "text", text: resultText }],
};
}
);
// Tool 5: Create batch URL parsing task
server.tool(
"create_batch_url_parsing_task",
"Create a batch document parsing task for multiple remote file URLs. This tool submits multiple document URLs to Mineru for simultaneous parsing. Unlike the upload batch method, this directly processes remote files without requiring file uploads. All documents in the batch will be parsed with the same settings (OCR, formula recognition, language, etc.) and you can track the progress of all files together.",
{
files: z.array(z.object({
url: z.string().describe("Remote file URL (PDF, DOC, etc.)"),
})).describe("Array of remote files to parse by URL"),
extra_formats: z.array(z.enum(["docx", "html", "latex"])).default(["html"]).describe("Additional export formats, default html"),
},
async (params) => {
const requestData: MineruBatchUrlRequest = {
files: params.files,
enable_formula: true,
enable_table: true,
language: "auto",
model_version: "v2",
extra_formats: params.extra_formats,
};
const response = await makeMineruRequest<MineruResponse<BatchUrlResponse>>(
"/extract/task/batch",
"POST",
requestData
);
let resultText = `Batch URL parsing task created successfully!\n\nBatch ID: ${response.data.batch_id}`;
resultText += `\n\nUse this batch_id to query the batch parsing results.`;
resultText += `\nTrace ID: ${response.trace_id}`;
return {
content: [{ type: "text", text: resultText }],
};
}
);
// Tool 4: Get batch task results
server.tool(
"get_batch_task_results",
"Retrieve results for batch parsing tasks. This tool works for both URL-based batch parsing and local file upload batch parsing. It provides comprehensive status information for all files in the batch, including individual file progress, completion status, and download URLs for finished documents. Use this to monitor and collect results from your batch processing workflows.",
{
batch_id: z.string().describe("Batch ID returned from create_batch_url_parsing_task or create_batch_parsing_task"),
},
async ({ batch_id }) => {
const response = await makeMineruRequest<MineruResponse<any>>(
`/extract-results/batch/${batch_id}`
);
return {
content: [
{
type: "text",
text: `Batch Task Results:\n${JSON.stringify(response.data, null, 2)}\n\nTrace ID: ${response.trace_id}`,
},
],
};
}
);
return server.server;
}