GLM OCR MCP Server

Overview Schema Related Servers Score Discussions

glm-ocr-mcp
src
glm_ocr_mcp

ocr.py•5.69 KiB

"""GLM OCR MCP Server""" import base64 import mimetypes import os import time from typing import Union import httpx from dotenv import load_dotenv load_dotenv() DEFAULT_API_URL = "https://open.bigmodel.cn/api/paas/v4/layout_parsing" API_URL = os.getenv("ZHIPU_OCR_API_URL", DEFAULT_API_URL) class ZhipuOCR: """ZhipuAI OCR Client""" def __init__(self, api_key: str): self.api_key = api_key self.headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } def _encode_file(self, file_path: str) -> str: """Encode file to base64""" with open(file_path, "rb") as f: return base64.b64encode(f.read()).decode("utf-8") def _file_to_data_url(self, file_path: str) -> str: """Convert local file path to data URL with detected MIME type.""" mime_type, _ = mimetypes.guess_type(file_path) if not mime_type: mime_type = "application/octet-stream" return f"data:{mime_type};base64,{self._encode_file(file_path)}" def _extract_markdown(self, result: dict) -> str: """Normalize API response shape to markdown string.""" md_results = result.get("md_results", "") if isinstance(md_results, str): return md_results if isinstance(md_results, list): parts = [] for item in md_results: if isinstance(item, dict): if "content" in item and isinstance(item["content"], str): parts.append(item["content"]) elif "md" in item and isinstance(item["md"], str): parts.append(item["md"]) return "\n\n".join(part for part in parts if part) return "" def _build_payload( self, file: Union[str, bytes], start_page_id: int | None = None, end_page_id: int | None = None, ) -> dict: """Build request payload and include PDF paging options when applicable.""" is_pdf = False if isinstance(file, bytes): file_data = ( "data:application/octet-stream;base64," f"{base64.b64encode(file).decode('utf-8')}" ) elif str(file).startswith("http://") or str(file).startswith("https://"): file_data = file is_pdf = str(file).lower().split("?", 1)[0].endswith(".pdf") elif str(file).startswith("data:image") or str(file).startswith("data:application"): file_data = str(file) is_pdf = str(file).lower().startswith("data:application/pdf") elif os.path.isfile(str(file)): file_data = self._file_to_data_url(str(file)) is_pdf = str(file).lower().endswith(".pdf") else: raise ValueError(f"Unsupported file format or path does not exist: {file}") payload = { "model": "glm-ocr", "return_crop_images": True, "file": file_data, } if is_pdf: if start_page_id is not None: payload["start_page_id"] = start_page_id if end_page_id is not None: payload["end_page_id"] = end_page_id return payload def _post_layout_parsing(self, payload: dict, retries: int = 3) -> dict: """Call layout parsing API with retry for transient server/network failures.""" last_exc: Exception | None = None with httpx.Client(timeout=120) as client: for attempt in range(retries): try: response = client.post( API_URL, headers=self.headers, json=payload, ) response.raise_for_status() return response.json() except httpx.HTTPStatusError as exc: # Do not retry client-side invalid requests. if exc.response.status_code < 500: raise last_exc = exc except httpx.HTTPError as exc: last_exc = exc if attempt < retries - 1: time.sleep(0.6 * (attempt + 1)) if last_exc is not None: raise last_exc raise RuntimeError("Unexpected empty response from OCR API") def parse( self, file: Union[str, bytes], start_page_id: int | None = None, end_page_id: int | None = None, ) -> str: """ Call layout parsing API, return markdown content Args: file: File path, base64 data, or URL Returns: Parsed markdown content """ payload = self._build_payload( file, start_page_id=start_page_id, end_page_id=end_page_id, ) result = self._post_layout_parsing(payload) return self._extract_markdown(result) def parse_json( self, file: Union[str, bytes], start_page_id: int | None = None, end_page_id: int | None = None, ) -> dict: """ Call layout parsing API and return structured JSON response. `md_results` is removed because markdown is provided by `parse`. """ payload = self._build_payload( file, start_page_id=start_page_id, end_page_id=end_page_id, ) result = self._post_layout_parsing(payload) result.pop("md_results", None) return result def get_ocr_client() -> ZhipuOCR: """Get OCR client instance""" api_key = os.getenv("ZHIPU_API_KEY") if not api_key: raise ValueError("Please set ZHIPU_API_KEY environment variable") return ZhipuOCR(api_key)

Loading blob content...

Implementation Reference

extract_text

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cs-qyzhang/glm-ocr-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

ocr.py•5.69 KiB