Skip to main content
Glama

Auto-Snap MCP

processing.py9.13 kB
""" Image processing utilities for screenshot enhancement and OCR. """ import os from typing import List, Optional, Dict from pathlib import Path from PIL import Image, ImageEnhance, ImageFilter import pytesseract import hashlib import logging logger = logging.getLogger(__name__) class ImageProcessor: """Handles image processing and OCR operations.""" def __init__(self): self.supported_formats = {'.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.gif'} def enhance_image(self, image_path: str, output_path: Optional[str] = None) -> str: """ Enhance image quality for better OCR results. Args: image_path: Path to input image output_path: Path for enhanced image (optional) Returns: Path to enhanced image """ try: with Image.open(image_path) as img: # Convert to grayscale for better OCR if img.mode != 'L': img = img.convert('L') # Enhance contrast enhancer = ImageEnhance.Contrast(img) img = enhancer.enhance(1.5) # Enhance sharpness enhancer = ImageEnhance.Sharpness(img) img = enhancer.enhance(2.0) # Apply slight blur to reduce noise img = img.filter(ImageFilter.MedianFilter(size=3)) if output_path is None: name, ext = os.path.splitext(image_path) output_path = f"{name}_enhanced{ext}" img.save(output_path) logger.info(f"Image enhanced: {output_path}") return output_path except Exception as e: logger.error(f"Failed to enhance image {image_path}: {e}") raise def extract_text(self, image_path: str, language: str = 'eng') -> str: """ Extract text from image using OCR. Args: image_path: Path to image file language: OCR language (default: 'eng') Returns: Extracted text """ try: # Enhance image first for better OCR enhanced_path = self.enhance_image(image_path) # Extract text using pytesseract text = pytesseract.image_to_string( Image.open(enhanced_path), lang=language, config='--psm 6' # Assume uniform block of text ) # Clean up temporary enhanced image if enhanced_path != image_path: try: os.remove(enhanced_path) except OSError: pass logger.info(f"Text extracted from {image_path}: {len(text)} characters") return text.strip() except Exception as e: logger.error(f"Failed to extract text from {image_path}: {e}") raise def get_image_hash(self, image_path: str) -> str: """ Calculate hash of image for duplicate detection. Args: image_path: Path to image file Returns: MD5 hash of image """ try: with open(image_path, 'rb') as f: return hashlib.md5(f.read()).hexdigest() except Exception as e: logger.error(f"Failed to calculate hash for {image_path}: {e}") return "" def find_duplicates(self, image_paths: List[str]) -> Dict[str, List[str]]: """ Find duplicate images in a list of image paths. Args: image_paths: List of image file paths Returns: Dictionary mapping hash to list of duplicate file paths """ hash_to_files = {} for image_path in image_paths: if not os.path.exists(image_path): continue image_hash = self.get_image_hash(image_path) if image_hash: if image_hash not in hash_to_files: hash_to_files[image_hash] = [] hash_to_files[image_hash].append(image_path) # Return only groups with duplicates duplicates = {h: files for h, files in hash_to_files.items() if len(files) > 1} if duplicates: logger.info(f"Found {len(duplicates)} groups of duplicate images") return duplicates def resize_image(self, image_path: str, max_width: int = 1920, max_height: int = 1080, output_path: Optional[str] = None) -> str: """ Resize image while maintaining aspect ratio. Args: image_path: Path to input image max_width: Maximum width max_height: Maximum height output_path: Output path (optional) Returns: Path to resized image """ try: with Image.open(image_path) as img: # Calculate new size maintaining aspect ratio img.thumbnail((max_width, max_height), Image.Resampling.LANCZOS) if output_path is None: name, ext = os.path.splitext(image_path) output_path = f"{name}_resized{ext}" img.save(output_path, optimize=True, quality=90) logger.info(f"Image resized: {output_path}") return output_path except Exception as e: logger.error(f"Failed to resize image {image_path}: {e}") raise def process_batch(self, image_dir: str, operations: List[str] = None) -> Dict[str, List[str]]: """ Process a batch of images in a directory. Args: image_dir: Directory containing images operations: List of operations ('enhance', 'ocr', 'resize', 'deduplicate') Returns: Dictionary with results of operations """ if operations is None: operations = ['enhance'] image_paths = [] for ext in self.supported_formats: image_paths.extend(Path(image_dir).glob(f"*{ext}")) image_paths.extend(Path(image_dir).glob(f"*{ext.upper()}")) image_paths = [str(p) for p in image_paths] results = { 'processed_files': [], 'enhanced_files': [], 'ocr_results': [], 'resized_files': [], 'duplicates': {} } try: # Find duplicates first if 'deduplicate' in operations: results['duplicates'] = self.find_duplicates(image_paths) for image_path in image_paths: results['processed_files'].append(image_path) try: # Enhance image if 'enhance' in operations: enhanced_path = self.enhance_image(image_path) results['enhanced_files'].append(enhanced_path) # Extract text if 'ocr' in operations: text = self.extract_text(image_path) results['ocr_results'].append({ 'file': image_path, 'text': text }) # Resize image if 'resize' in operations: resized_path = self.resize_image(image_path) results['resized_files'].append(resized_path) except Exception as e: logger.error(f"Failed to process {image_path}: {e}") continue logger.info(f"Batch processed {len(results['processed_files'])} images") return results except Exception as e: logger.error(f"Failed to process batch in {image_dir}: {e}") raise def check_tesseract() -> bool: """ Check if Tesseract OCR is installed and accessible. Returns: True if Tesseract is available, False otherwise """ try: import signal def timeout_handler(signum, frame): raise TimeoutError("Tesseract check timed out") # Set timeout for tesseract check signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(10) # 10 second timeout try: pytesseract.get_tesseract_version() return True finally: signal.alarm(0) # Cancel the alarm except (TimeoutError, Exception) as e: logger.error(f"Tesseract not available: {e}") return False

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PovedaAqui/auto-snap-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server