Skip to main content
Glama

Image Description MCP Server

by 7etsuo
image-description-mcp_server.py9.5 kB
#!/usr/bin/env python3 """ Simple image-description-mcp MCP Server - AI-powered image analysis using Grok API """ import os import sys import logging import base64 import json from datetime import datetime, timezone import httpx from PIL import Image import io import pytesseract import cv2 import numpy as np from mcp.server.fastmcp import FastMCP # Configure logging to stderr logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', stream=sys.stderr ) logger = logging.getLogger("image-description-mcp-server") # Initialize MCP server - NO PROMPT PARAMETER! mcp = FastMCP("image-description-mcp") # Configuration GROK_API_KEY = os.environ.get("GROK_API_KEY", "") GROK_API_URL = "https://api.x.ai/v1/images/generations" # === UTILITY FUNCTIONS === def encode_image_to_base64(image_path_or_url: str, is_url: bool = False) -> str: """Convert image to base64 string for API transmission.""" try: if is_url: # Download image from URL response = httpx.get(image_path_or_url, timeout=30) response.raise_for_status() image_data = response.content else: # Read local file with open(image_path_or_url, 'rb') as f: image_data = f.read() # Convert to base64 return base64.b64encode(image_data).decode('utf-8') except Exception as e: logger.error(f"Error encoding image: {e}") raise def get_image_metadata(image_path_or_url: str, is_url: bool = False) -> dict: """Extract technical metadata from image.""" try: if is_url: response = httpx.get(image_path_or_url, timeout=30) response.raise_for_status() image_data = response.content img = Image.open(io.BytesIO(image_data)) else: img = Image.open(image_path_or_url) return { "format": img.format, "size": img.size, "mode": img.mode, "file_size_bytes": len(image_data) if 'image_data' in locals() else os.path.getsize(image_path_or_url) } except Exception as e: logger.error(f"Error getting metadata: {e}") return {"error": str(e)} def extract_text_from_image_ocr(image_path_or_url: str, is_url: bool = False) -> str: """Extract text from image using OCR.""" try: if is_url: response = httpx.get(image_path_or_url, timeout=30) response.raise_for_status() image_data = response.content img = Image.open(io.BytesIO(image_data)) else: img = Image.open(image_path_or_url) # Convert PIL to numpy array for OpenCV img_array = np.array(img) # Convert to grayscale if needed if len(img_array.shape) == 3: gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY) else: gray = img_array # Apply OCR text = pytesseract.image_to_string(gray) return text.strip() except Exception as e: logger.error(f"OCR error: {e}") return "" async def call_grok_api(image_base64: str, prompt: str) -> str: """Call Grok API for image analysis.""" if not GROK_API_KEY: return "❌ Error: GROK_API_KEY environment variable not set" try: headers = { "Authorization": f"Bearer {GROK_API_KEY}", "Content-Type": "application/json" } payload = { "model": "grok-4-0709", "messages": [ { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image_base64}" } } ] } ], "max_tokens": 1000 } async with httpx.AsyncClient() as client: response = await client.post( "https://api.x.ai/v1/chat/completions", headers=headers, json=payload, timeout=60 ) response.raise_for_status() result = response.json() return result["choices"][0]["message"]["content"] except httpx.HTTPStatusError as e: return f"❌ API Error: {e.response.status_code} - {e.response.text}" except Exception as e: return f"❌ Error calling Grok API: {str(e)}" # === MCP TOOLS === @mcp.tool() async def describe_image_url(url: str = "", detail_level: str = "basic") -> str: """Analyze image from URL and provide AI-generated description using Grok.""" logger.info(f"Executing describe_image_url with {url}, detail_level: {detail_level}") if not url.strip(): return "❌ Error: URL is required" if not GROK_API_KEY: return "❌ Error: GROK_API_KEY environment variable not set" try: # Get image metadata metadata = get_image_metadata(url, is_url=True) # Encode image image_base64 = encode_image_to_base64(url, is_url=True) # Create prompt based on detail level if detail_level == "comprehensive": prompt = "Provide a comprehensive analysis of this image including: main subjects, colors, composition, mood, technical details, and any notable features. Be detailed and thorough." elif detail_level == "detailed": prompt = "Provide a detailed description of this image including: main subjects, setting, colors, composition, and key features." else: prompt = "Provide a clear, concise description of what you see in this image." # Call Grok API description = await call_grok_api(image_base64, prompt) # Format response result = { "description": description, "metadata": metadata, "source": url, "analysis_level": detail_level } return f"✅ Image Analysis Complete:\n{json.dumps(result, indent=2)}" except Exception as e: logger.error(f"Error: {e}") return f"❌ Error analyzing image: {str(e)}" @mcp.tool() async def describe_image_file(file_path: str = "", detail_level: str = "basic") -> str: """Analyze local image file and provide AI-generated description using Grok.""" logger.info(f"Executing describe_image_file with {file_path}, detail_level: {detail_level}") if not file_path.strip(): return "❌ Error: File path is required" if not os.path.exists(file_path): return f"❌ Error: File not found: {file_path}" if not GROK_API_KEY: return "❌ Error: GROK_API_KEY environment variable not set" try: # Get image metadata metadata = get_image_metadata(file_path, is_url=False) # Encode image image_base64 = encode_image_to_base64(file_path, is_url=False) # Create prompt based on detail level if detail_level == "comprehensive": prompt = "Provide a comprehensive analysis of this image including: main subjects, colors, composition, mood, technical details, and any notable features. Be detailed and thorough." elif detail_level == "detailed": prompt = "Provide a detailed description of this image including: main subjects, setting, colors, composition, and key features." else: prompt = "Provide a clear, concise description of what you see in this image." # Call Grok API description = await call_grok_api(image_base64, prompt) # Format response result = { "description": description, "metadata": metadata, "source": file_path, "analysis_level": detail_level } return f"✅ Image Analysis Complete:\n{json.dumps(result, indent=2)}" except Exception as e: logger.error(f"Error: {e}") return f"❌ Error analyzing image file: {str(e)}" @mcp.tool() async def extract_text_from_image(url: str = "") -> str: """Extract readable text from image using OCR.""" logger.info(f"Executing extract_text_from_image with {url}") if not url.strip(): return "❌ Error: URL is required" try: # Extract text using OCR extracted_text = extract_text_from_image_ocr(url, is_url=True) if not extracted_text: return "⚠️ No text found in the image" # Get basic metadata metadata = get_image_metadata(url, is_url=True) result = { "extracted_text": extracted_text, "metadata": metadata, "source": url, "method": "OCR" } return f"✅ Text Extraction Complete:\n{json.dumps(result, indent=2)}" except Exception as e: logger.error(f"Error: {e}") return f"❌ Error extracting text: {str(e)}" # === SERVER STARTUP === if __name__ == "__main__": logger.info("Starting image-description-mcp MCP server...") # Check for required environment variables if not GROK_API_KEY: logger.warning("GROK_API_KEY not set - image analysis tools will not work") try: mcp.run(transport='stdio') except Exception as e: logger.error(f"Server error: {e}", exc_info=True) sys.exit(1)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/7etsuo/image-description-mcp_server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server