OCR MCP Server

index.js•10.3 KiB

#!/usr/bin/env node /** * ============================================================================ * OCR MCP Server - 主入口文件 * ============================================================================ * * 这是一个基于 MCP (Model Context Protocol) 协议的 OCR 服务器。 * MCP 是由 Anthropic 推出的开放协议，允许 AI 助手（如 Cursor、Claude Desktop） * 与外部工具进行标准化通信。 * * 本服务器的作用： * - 接收来自 Cursor 的图片识别请求 * - 调用 Tesseract.js 进行 OCR 文字识别 * - 将识别结果返回给 Cursor * * 工作流程： * 1. Cursor 通过 stdio（标准输入输出）与本服务器通信 * 2. 用户在 Cursor 中请求识别图片 * 3. Cursor 调用本服务器的 recognize_text 工具 * 4. 服务器执行 OCR 并返回结果 * * @author AI Assistant * @version 1.0.0 */ // ============================================================================ // 依赖导入 // ============================================================================ /** * MCP SDK 核心模块 * - Server: MCP 服务器类，用于创建服务器实例 * - StdioServerTransport: 基于标准输入输出的传输层，Cursor 默认使用此方式通信 */ import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; /** * MCP 协议类型定义 * - CallToolRequestSchema: 工具调用请求的 Schema，用于处理工具执行 * - ListToolsRequestSchema: 工具列表请求的 Schema，用于告诉客户端有哪些可用工具 */ import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js'; /** * 本地 OCR 模块 * - recognizeText: 核心 OCR 识别函数 * - getSupportedLanguages: 获取支持的语言列表 */ import { recognizeText, getSupportedLanguages } from './ocr.js'; /** * 日志模块 - 用于调试 * 日志会同时输出到 stderr 和文件 (mcp-server.log) */ import { log, logError, logDebug } from './logger.js'; // ============================================================================ // MCP 服务器初始化 // ============================================================================ /** * 创建 MCP 服务器实例 * * 第一个参数：服务器元信息 * - name: 服务器名称，会显示在 Cursor 的 MCP 设置中 * - version: 版本号 * * 第二个参数：服务器能力声明 * - capabilities.tools: 声明此服务器提供工具（Tool）能力 * MCP 还支持其他能力如 resources（资源）、prompts（提示词模板）等 */ const server = new Server( { name: 'ocr-mcp-server', // 服务器标识名 version: '1.0.0', // 版本号 }, { capabilities: { tools: {}, // 声明提供工具能力，空对象表示使用默认配置 }, } ); // ============================================================================ // 注册工具列表处理器 // ============================================================================ /** * 处理 "tools/list" 请求 * * 当 Cursor 连接到 MCP 服务器时，会首先请求工具列表， * 以了解该服务器提供哪些可用的工具。 * * 每个工具需要定义： * - name: 工具名称（唯一标识符） * - description: 工具描述（AI 会根据此描述决定何时调用该工具） * - inputSchema: JSON Schema 格式的参数定义 */ server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: [ // -------------------------------- // 工具 1: recognize_text - OCR 文字识别 // -------------------------------- { name: 'recognize_text', // description 非常重要！AI 会根据这段描述来判断是否需要调用此工具 description: '识别图片中的文字内容（OCR）。支持中文、英文等多种语言。支持 PNG、JPG、JPEG、BMP、GIF、WebP 格式。', // inputSchema 定义了工具接受的参数，遵循 JSON Schema 规范 inputSchema: { type: 'object', properties: { // 参数 1: image_path - 图片路径 image_path: { type: 'string', description: '图片文件的本地绝对路径，例如：/Users/xxx/Desktop/image.png', }, // 参数 2: languages - 识别语言（可选） languages: { type: 'array', items: { type: 'string' }, description: '识别语言代码数组，可选。默认 ["chi_sim", "eng"]（简体中文+英文）。可用语言：chi_sim(简中)、chi_tra(繁中)、eng(英文)、jpn(日文)、kor(韩文)等', default: ['chi_sim', 'eng'], }, }, required: ['image_path'], // 必填参数列表 }, }, // -------------------------------- // 工具 2: list_ocr_languages - 列出支持的语言 // -------------------------------- { name: 'list_ocr_languages', description: '列出 OCR 支持的语言及其代码', inputSchema: { type: 'object', properties: {}, // 无参数 required: [], // 无必填参数 }, }, ], }; }); // ============================================================================ // 注册工具调用处理器 // ============================================================================ /** * 处理 "tools/call" 请求 * * 当用户在 Cursor 中触发工具调用时（例如请求识别图片）， * Cursor 会发送 CallToolRequest，包含： * - name: 要调用的工具名称 * - arguments: 传递给工具的参数 * * 返回值格式： * { * content: [ * { type: 'text', text: '返回的文本内容' } * ] * } * * content 数组可以包含多种类型：text（文本）、image（图片）等 */ server.setRequestHandler(CallToolRequestSchema, async (request) => { // ⚠️ 注意：MCP 中绝对不能用 console.log，会破坏 stdio 通信！ // 必须用 console.error 或 log 函数 console.error('收到请求:', JSON.stringify(request, null, 2)); log(`request: ${JSON.stringify(request)}`); // 从请求中解构出工具名称和参数 const { name, arguments: args } = request.params; // 记录工具调用日志 log(`工具调用: ${name}`); logDebug('调用参数:', args); // 根据工具名称分发到对应的处理逻辑 switch (name) { // -------------------------------- // 处理 recognize_text 工具调用 // -------------------------------- case 'recognize_text': { // 获取参数 const imagePath = args.image_path; const languages = args.languages || ['chi_sim', 'eng']; // 默认中英文 // 参数校验：图片路径必须提供 if (!imagePath) { return { content: [ { type: 'text', text: '错误：请提供图片路径 (image_path 参数)', }, ], }; } // 调用 OCR 模块执行识别 log(`开始识别图片: ${imagePath}`); log(`使用语言: ${languages.join(', ')}`); const result = await recognizeText(imagePath, languages); // 记录识别结果 if (result.success) { log(`识别成功，置信度: ${result.confidence?.toFixed(1)}%`); logDebug('识别文字:', result.text?.substring(0, 100) + '...'); } else { logError(`识别失败: ${result.error}`); } // 处理识别失败的情况 if (!result.success) { return { content: [ { type: 'text', text: `OCR 识别失败：${result.error}`, }, ], }; } // 构建成功响应 // 使用 Markdown 格式，让结果在 Cursor 中显示更美观 let responseText = ''; if (result.text) { responseText = `## OCR 识别结果\n\n**置信度**: ${result.confidence?.toFixed(1)}%\n\n**识别内容**:\n\n${result.text}`; } else { responseText = '图片中未识别到文字内容'; } return { content: [ { type: 'text', text: responseText, }, ], }; } // -------------------------------- // 处理 list_ocr_languages 工具调用 // -------------------------------- case 'list_ocr_languages': { // 获取支持的语言列表 const languages = getSupportedLanguages(); // 格式化为 Markdown 列表 const languageList = languages .map((lang) => `- \`${lang.code}\`: ${lang.name}`) .join('\n'); return { content: [ { type: 'text', text: `## 支持的 OCR 语言\n\n${languageList}\n\n使用时将语言代码传入 \`languages\` 参数数组中，例如：\`["chi_sim", "eng"]\``, }, ], }; } // -------------------------------- // 处理未知工具 // -------------------------------- default: return { content: [ { type: 'text', text: `未知工具: ${name}`, }, ], }; } }); // ============================================================================ // 服务器启动 // ============================================================================ /** * 主函数 - 启动 MCP 服务器 * * 使用 StdioServerTransport 作为传输层： * - 通过标准输入（stdin）接收来自 Cursor 的请求 * - 通过标准输出（stdout）发送响应给 Cursor * - 这是 Cursor 与 MCP 服务器通信的默认方式 * * 注意：日志输出使用 console.error 而不是 console.log * 因为 stdout 被 MCP 协议占用，只有 stderr 可以用于调试输出 */ async function main() { // 创建 stdio 传输层实例 const transport = new StdioServerTransport(); // 将服务器连接到传输层，开始监听请求 await server.connect(transport); // 输出启动日志（同时写入文件，方便调试） log('OCR MCP Server 已启动，等待连接...'); log('日志文件位置: mcp-server.log'); } // 启动服务器，捕获并处理启动错误 main().catch((error) => { logError('服务器启动失败:', error); process.exit(1); // 非零退出码表示异常退出 });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/wenxint/ocp-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

index.js•10.3 KiB