Percepta MCP Server

Overview Schema Related Servers Score Discussions

visual_tools.py•11 KiB

""" Visual analysis tools for Percepta MCP server. """ import base64 import io from typing import Dict, Any, Optional, List from PIL import Image import cv2 import numpy as np import logging from ..config import Settings logger = logging.getLogger(__name__) class VisualAnalysis: """Visual analysis and image processing tools.""" def __init__(self, settings: Settings): self.settings = settings async def analyze_image(self, image_data: str, analysis_type: str = "general", prompt: Optional[str] = None) -> Dict[str, Any]: """Analyze an image using AI models.""" try: logger.info(f"Analyzing image with type: {analysis_type}") # Decode base64 image if image_data.startswith('data:'): # Handle data URL format _, data = image_data.split(',', 1) image_bytes = base64.b64decode(data) else: # Handle raw base64 image_bytes = base64.b64decode(image_data) # Open image to validate image = Image.open(io.BytesIO(image_bytes)) width, height = image.size # For now, return basic image info # In a real implementation, this would use AI models for analysis analysis_result: Dict[str, Any] = { "width": width, "height": height, "format": image.format, "mode": image.mode, "analysis_type": analysis_type } if analysis_type == "text": # Placeholder for OCR functionality analysis_result["extracted_text"] = "OCR functionality would be implemented here" elif analysis_type == "objects": # Placeholder for object detection analysis_result["detected_objects"] = ["Object detection would be implemented here"] elif analysis_type == "general": # Placeholder for general image analysis analysis_result["description"] = "General image analysis would be implemented here" if prompt: analysis_result["prompt_response"] = f"Response to '{prompt}' would be implemented here" return { "success": True, "analysis": analysis_result } except Exception as e: logger.error(f"Image analysis error: {e}") return { "success": False, "error": str(e) } async def extract_text(self, image_data: str, language: str = "eng") -> Dict[str, Any]: """Extract text from an image using OCR.""" try: logger.info(f"Extracting text from image with language: {language}") # Decode base64 image if image_data.startswith('data:'): _, data = image_data.split(',', 1) image_bytes = base64.b64decode(data) else: image_bytes = base64.b64decode(image_data) # Convert to OpenCV format image_array = np.frombuffer(image_bytes, np.uint8) image = cv2.imdecode(image_array, cv2.IMREAD_COLOR) if image is None: # type: ignore raise ValueError("Invalid image data") # Placeholder for actual OCR implementation # In a real implementation, this would use tesseract or similar extracted_text = "OCR text extraction would be implemented here using tesseract-ocr or similar library" # Get image dimensions height, width = image.shape[:2] return { "success": True, "text": extracted_text, "language": language, "image_info": { "width": width, "height": height } } except Exception as e: logger.error(f"Text extraction error: {e}") return { "success": False, "error": str(e) } async def compare_images(self, image1_data: str, image2_data: str, method: str = "structural") -> Dict[str, Any]: """Compare two images and return similarity metrics.""" try: logger.info(f"Comparing images using method: {method}") # Decode both images def decode_image(data: str) -> np.ndarray: if data.startswith('data:'): _, data = data.split(',', 1) image_bytes = base64.b64decode(data) else: image_bytes = base64.b64decode(data) image_array = np.frombuffer(image_bytes, np.uint8) return cv2.imdecode(image_array, cv2.IMREAD_COLOR) img1 = decode_image(image1_data) img2 = decode_image(image2_data) if img1 is None or img2 is None: # type: ignore raise ValueError("Invalid image data") # Resize images to same size for comparison height = min(img1.shape[0], img2.shape[0]) width = min(img1.shape[1], img2.shape[1]) img1_resized = cv2.resize(img1, (width, height)) img2_resized = cv2.resize(img2, (width, height)) if method == "structural": # Calculate structural similarity # Convert to grayscale gray1 = cv2.cvtColor(img1_resized, cv2.COLOR_BGR2GRAY) gray2 = cv2.cvtColor(img2_resized, cv2.COLOR_BGR2GRAY) # Calculate mean squared error as a simple similarity metric mse = np.mean((gray1 - gray2) ** 2) similarity = 1.0 / (1.0 + mse / 10000.0) # Normalize to 0-1 range elif method == "histogram": # Compare histograms hist1 = cv2.calcHist([img1_resized], [0, 1, 2], None, [50, 50, 50], [0, 256, 0, 256, 0, 256]) hist2 = cv2.calcHist([img2_resized], [0, 1, 2], None, [50, 50, 50], [0, 256, 0, 256, 0, 256]) similarity = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL) else: # Default pixel-wise comparison diff = cv2.absdiff(img1_resized, img2_resized) similarity = 1.0 - (np.mean(diff) / 255.0) return { "success": True, "similarity": float(similarity), "method": method, "image1_size": img1.shape[:2], "image2_size": img2.shape[:2] } except Exception as e: logger.error(f"Image comparison error: {e}") return { "success": False, "error": str(e) } async def detect_objects(self, image_data: str, confidence_threshold: float = 0.5) -> Dict[str, Any]: """Detect objects in an image.""" try: logger.info("Detecting objects in image") # Decode image if image_data.startswith('data:'): _, data = image_data.split(',', 1) image_bytes = base64.b64decode(data) else: image_bytes = base64.b64decode(image_data) image_array = np.frombuffer(image_bytes, np.uint8) image = cv2.imdecode(image_array, cv2.IMREAD_COLOR) if image is None: # type: ignore raise ValueError("Invalid image data") # Placeholder for actual object detection # In a real implementation, this would use YOLO, SSD, or similar detected_objects: List[Dict[str, Any]] = [ { "class": "placeholder", "confidence": 0.9, "bbox": [100, 100, 200, 200], "description": "Object detection would be implemented here using YOLO or similar" } ] return { "success": True, "objects": detected_objects, "confidence_threshold": confidence_threshold, "image_size": image.shape[:2] } except Exception as e: logger.error(f"Object detection error: {e}") return { "success": False, "error": str(e) } async def enhance_image(self, image_data: str, enhancement_type: str = "auto") -> Dict[str, Any]: """Enhance an image with various filters and adjustments.""" try: logger.info(f"Enhancing image with type: {enhancement_type}") # Decode image if image_data.startswith('data:'): _, data = image_data.split(',', 1) image_bytes = base64.b64decode(data) else: image_bytes = base64.b64decode(image_data) image_array = np.frombuffer(image_bytes, np.uint8) image = cv2.imdecode(image_array, cv2.IMREAD_COLOR) if image is None: # type: ignore raise ValueError("Invalid image data") # Apply enhancement based on type if enhancement_type == "sharpen": kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]]) enhanced = cv2.filter2D(image, -1, kernel) elif enhancement_type == "blur": enhanced = cv2.GaussianBlur(image, (15, 15), 0) elif enhancement_type == "brightness": enhanced = cv2.convertScaleAbs(image, alpha=1.2, beta=30) elif enhancement_type == "contrast": enhanced = cv2.convertScaleAbs(image, alpha=1.5, beta=0) else: # auto enhancement # Apply automatic enhancement lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) l, a, b = cv2.split(lab) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) l = clahe.apply(l) enhanced = cv2.merge([l, a, b]) enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR) # Encode enhanced image _, buffer = cv2.imencode('.png', enhanced) enhanced_b64 = base64.b64encode(buffer).decode('utf-8') return { "success": True, "enhanced_image": enhanced_b64, "enhancement_type": enhancement_type, "mime_type": "image/png" } except Exception as e: logger.error(f"Image enhancement error: {e}") return { "success": False, "error": str(e) }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/0x-hewm/percepta-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

visual_tools.py•11 KiB