MCP Image Recognition Server

Verified
Image & Video Processing
MIT License
OverviewInspectSchema Related Servers Reviews Score
import logging
import os
from typing import Optional

import pytesseract  # type: ignore
from PIL import Image

logger = logging.getLogger(__name__)


class OCRError(Exception):
    """Exception raised for OCR-related errors."""

    pass


def extract_text_from_image(
    image: Image.Image, ocr_required: bool = False
) -> Optional[str]:
    """Extract text from an image using Tesseract OCR.

    Args:
        image: PIL Image object to process
        ocr_required: If True, raise error when OCR fails. If False, return None.

    Returns:
        Optional[str]: Extracted text if successful, None if Tesseract is not available
                      and ocr_required is False

    Raises:
        OCRError: If OCR fails and ocr_required is True
    """
    try:
        # Check if custom tesseract path is set in environment and not empty
        if tesseract_cmd := os.getenv("TESSERACT_CMD"):
            if tesseract_cmd.strip():  # Only set if path is non-empty
                pytesseract.pytesseract.tesseract_cmd = tesseract_cmd

        # Extract text from image
        text = pytesseract.image_to_string(image)

        # Clean and validate result
        text = text.strip()
        if text:
            logger.info("Successfully extracted text from image using Tesseract")
            logger.debug(f"Extracted text length: {len(text)}")
            return text
        else:
            logger.info("No text found in image")
            return None

    except Exception as e:
        error_msg = f"Failed to extract text using Tesseract: {str(e)}"
        if "not installed" in str(e) or "not in your PATH" in str(e):
            error_msg = (
                "Tesseract OCR is not installed or not in PATH. "
                "Please install Tesseract and ensure it's in your system PATH, "
                "or set TESSERACT_CMD environment variable to the executable path."
            )

        logger.warning(error_msg)
        if ocr_required:
            raise OCRError(error_msg)
        return None