Skip to main content
Glama

image_metadata

Extract metadata from images by providing a URL or file path, generating descriptive captions using vision models for analysis and organization.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
caption_overrideNo
config_pathNo
file_pathNo
image_urlNo
modeNodouble

Implementation Reference

  • Core handler for the 'image_metadata' tool. Validates inputs, handles caption overrides and modes ('double': text metadata, 'triple': vision metadata), delegates to runner helpers.
    @mcp.tool()
    def image_metadata(
        image_url: Optional[str] = None,
        file_path: Optional[str] = None,
        caption_override: Optional[str] = None,
        config_path: Optional[str] = None,
        mode: str = "double",
    ) -> dict:
        if not image_url and not file_path:
            raise ValueError("Provide either image_url or file_path")
        if image_url and file_path:
            raise ValueError("Provide only one of image_url or file_path, not both")
        image_ref = image_url or file_path  # type: ignore
    
        if caption_override:
            schema_path = os.path.join(os.path.dirname(__file__), "metadata", "schema.json")
            if mode == "double":
                # Text-only metadata from provided caption
                from cv_mcp.metadata.runner import run_metadata_from_caption
                meta = run_metadata_from_caption(caption_override, schema_path=schema_path)
                alt = run_alt_text(image_ref)
                return {"alt_text": alt, "caption": caption_override, "metadata": meta}
            elif mode == "triple":
                # Vision+caption metadata
                meta = run_structured_json(image_ref, caption_override, schema_path=schema_path)
                alt = run_alt_text(image_ref)
                return {"alt_text": alt, "caption": caption_override, "metadata": meta}
            else:
                raise ValueError("mode must be 'double' or 'triple'")
    
        if mode == "double":
            return run_pipeline_double(
                image_ref,
                config_path=config_path,
                schema_path=os.path.join(os.path.dirname(__file__), "metadata", "schema.json"),
            )
        elif mode == "triple":
            return run_pipeline_triple(
                image_ref,
                config_path=config_path,
                schema_path=os.path.join(os.path.dirname(__file__), "metadata", "schema.json"),
            )
        else:
            raise ValueError("mode must be 'double' or 'triple'")
  • Registers the image_metadata function as an MCP tool using FastMCP decorator.
    @mcp.tool()
  • 'double' mode pipeline: generates alt_text + dense caption using vision LLM, then text-only structured metadata from caption.
    def run_pipeline_double(
        image_ref: str,
        *,
        config_path: Optional[Union[str, Path]] = None,
        schema_path: Union[str, Path] = Path(__file__).with_name("schema.json"),
    ) -> Dict[str, Any]:
        cfg = dict(_CFG)
        if config_path:
            try:
                cfg = json.loads(Path(config_path).read_text(encoding="utf-8"))
            except Exception as e:
                raise RuntimeError(f"Failed to read config from {config_path}: {e}")
        ac = run_alt_and_caption(image_ref, model=cfg.get("caption_model"))
        meta = run_metadata_from_caption(ac["caption"], schema_path=schema_path, model=cfg.get("metadata_text_model"))
        return {"alt_text": ac["alt_text"], "caption": ac["caption"], "metadata": meta}
  • 'triple' mode pipeline: generates alt_text + dense caption using vision LLM, then vision-based structured metadata using image + caption.
    def run_pipeline_triple(
        image_ref: str,
        *,
        config_path: Optional[Union[str, Path]] = None,
        schema_path: Union[str, Path] = Path(__file__).with_name("schema.json"),
    ) -> Dict[str, Any]:
        cfg = dict(_CFG)
        if config_path:
            try:
                cfg = json.loads(Path(config_path).read_text(encoding="utf-8"))
            except Exception as e:
                raise RuntimeError(f"Failed to read config from {config_path}: {e}")
        ac = run_alt_and_caption(image_ref, model=cfg.get("caption_model"))
        meta = run_structured_json(image_ref, ac["caption"], schema_path=schema_path, model=cfg.get("metadata_vision_model"))
        return {"alt_text": ac["alt_text"], "caption": ac["caption"], "metadata": meta}
  • Post-generation validation enforcing metadata schema: caps arrays, ensures required fields (media_type, objects, people, tags), generates tags if missing, cleans empty fields.
    def _post_validate(data: Dict[str, Any]) -> None:
        # Enforce array caps and build tags if missing
        def _cap(key: str, n: int):
            if isinstance(data.get(key), list) and len(data[key]) > n:
                data[key] = data[key][:n]
    
        for k, n in ("objects", 6), ("scene", 3), ("lighting", 3), ("style", 5), ("palette", 6), ("tags", 20):
            _cap(k, n)
    
        # Ensure people fields exist with defaults
        if not isinstance(data.get("people"), dict):
            data["people"] = {"count": 0, "faces_visible": False}
        else:
            data["people"].setdefault("count", 0)
            data["people"].setdefault("faces_visible", False)
    
        # Compute tags union if missing or empty
        if not isinstance(data.get("tags"), list) or not data.get("tags"):
            def norm_list(v):
                return v if isinstance(v, list) else []
            tags = []
            if isinstance(data.get("media_type"), str):
                tags.append(data["media_type"])
            for k in ("scene", "lighting", "style", "palette", "objects"):
                tags.extend(norm_list(data.get(k)))
            # Deduplicate while preserving order
            seen = set()
            uniq = []
            for t in tags:
                if isinstance(t, str) and t not in seen:
                    seen.add(t)
                    uniq.append(t)
            data["tags"] = uniq[:20]
    
        # Always include essential keys; drop empty/null fields for others
        essentials = {"media_type", "objects", "people", "tags"}
        to_delete = []
        for k, v in list(data.items()):
            if k in essentials:
                continue
            if v is None:
                to_delete.append(k)
            elif isinstance(v, list) and len(v) == 0:
                to_delete.append(k)
            elif isinstance(v, dict) and len(v.keys()) == 0:
                to_delete.append(k)
        for k in to_delete:
            try:
                del data[k]
            except Exception:
                pass

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/samhains/cv-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server