MCP Image Recognition Server

MIT License

Overview InspectNew Schema Related Servers Reviews Score

import logging
import os
from typing import Optional, List, Dict, Any, Union, Literal
import httpx

from openai import OpenAI, AsyncOpenAI
from openai.types.chat import ChatCompletion, ChatCompletionMessageParam
from openai.types.chat.completion_create_params import ResponseFormat

logger = logging.getLogger(__name__)


class OpenAIVision:
    def __init__(self, api_key: Optional[str] = None):
        """Initialize OpenAI Vision client.

        Args:
            api_key: Optional API key. If not provided, will try to get from environment.
        """
        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
        if not self.api_key:
            raise ValueError("OpenAI API key not provided and not found in environment")

        self.base_url = os.getenv("OPENAI_BASE_URL")
        timeout_value = os.getenv("OPENAI_TIMEOUT", 60)
        self.timeout = float(timeout_value)
        
        # Create client with proper timeout configuration
        http_client = httpx.AsyncClient(
            timeout=httpx.Timeout(timeout=self.timeout)
        )
        
        self.client = AsyncOpenAI(
            api_key=self.api_key, 
            base_url=self.base_url,
            http_client=http_client
        )

    async def describe_image(
        self,
        image: str,
        prompt: str = "Please describe this image in detail.",
        mime_type: str = "image/png",
    ) -> str:
        """Describe an image using OpenAI's vision models.

        Args:
            image: String containing base64 encoded image.
            prompt: String containing the prompt.
            mime_type: MIME type of the image.

        Returns:
            str: Description of the image

        Raises:
            Exception: If API call fails
        """
        try:
            # Get model from environment, default to gpt-4o-mini
            model = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
            
            # Prepare messages with content list
            messages: List[ChatCompletionMessageParam] = [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:{mime_type};base64,{image}"
                            },
                        },
                        {"type": "text", "text": prompt},
                    ],
                }
            ]

            # Create completion with proper types
            response: ChatCompletion = await self.client.chat.completions.create(
                model=model,
                messages=messages,
                max_tokens=1024,
            )

            # Extract and return description
            if response.choices and response.choices[0].message.content:
                return response.choices[0].message.content
            return "No description available."

        except httpx.TimeoutException as e:
            logger.error(f"OpenAI API timeout: {str(e)}")
            raise Exception(f"Request timed out: {str(e)}")
        except httpx.RequestError as e:
            logger.error(f"OpenAI API connection error: {str(e)}")
            raise Exception(f"Connection error: {str(e)}")
        except Exception as e:
            logger.error(f"Unexpected error in OpenAI Vision: {str(e)}", exc_info=True)
            raise Exception(f"Error processing image: {str(e)}")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mario-andreschak/mcp-image-recognition'

If you have feedback or need assistance with the MCP directory API, please join our Discord server