image_metadata

Extract metadata from images by providing a URL or file path, generating descriptive captions using vision models for analysis and organization.

Input Schema

TableJSON Schema

Name	Required	Default
`caption_override`	No
`config_path`	No
`file_path`	No
`image_url`	No
`mode`	No	double

Implementation Reference

src/cv_mcp/mcp_server.py:107-152 (handler)

Core handler for the 'image_metadata' tool. Validates inputs, handles caption overrides and modes ('double': text metadata, 'triple': vision metadata), delegates to runner helpers.

@mcp.tool()
def image_metadata(
    image_url: Optional[str] = None,
    file_path: Optional[str] = None,
    caption_override: Optional[str] = None,
    config_path: Optional[str] = None,
    mode: str = "double",
) -> dict:
    if not image_url and not file_path:
        raise ValueError("Provide either image_url or file_path")
    if image_url and file_path:
        raise ValueError("Provide only one of image_url or file_path, not both")
    image_ref = image_url or file_path  # type: ignore

    if caption_override:
        schema_path = os.path.join(os.path.dirname(__file__), "metadata", "schema.json")
        if mode == "double":
            # Text-only metadata from provided caption
            from cv_mcp.metadata.runner import run_metadata_from_caption
            meta = run_metadata_from_caption(caption_override, schema_path=schema_path)
            alt = run_alt_text(image_ref)
            return {"alt_text": alt, "caption": caption_override, "metadata": meta}
        elif mode == "triple":
            # Vision+caption metadata
            meta = run_structured_json(image_ref, caption_override, schema_path=schema_path)
            alt = run_alt_text(image_ref)
            return {"alt_text": alt, "caption": caption_override, "metadata": meta}
        else:
            raise ValueError("mode must be 'double' or 'triple'")

    if mode == "double":
        return run_pipeline_double(
            image_ref,
            config_path=config_path,
            schema_path=os.path.join(os.path.dirname(__file__), "metadata", "schema.json"),
        )
    elif mode == "triple":
        return run_pipeline_triple(
            image_ref,
            config_path=config_path,
            schema_path=os.path.join(os.path.dirname(__file__), "metadata", "schema.json"),
        )
    else:
        raise ValueError("mode must be 'double' or 'triple'")

src/cv_mcp/mcp_server.py:107-107 (registration)
Registers the image_metadata function as an MCP tool using FastMCP decorator.
```
@mcp.tool()
```

src/cv_mcp/metadata/runner.py:405-420 (helper)

'double' mode pipeline: generates alt_text + dense caption using vision LLM, then text-only structured metadata from caption.

def run_pipeline_double(
    image_ref: str,
    *,
    config_path: Optional[Union[str, Path]] = None,
    schema_path: Union[str, Path] = Path(__file__).with_name("schema.json"),
) -> Dict[str, Any]:
    cfg = dict(_CFG)
    if config_path:
        try:
            cfg = json.loads(Path(config_path).read_text(encoding="utf-8"))
        except Exception as e:
            raise RuntimeError(f"Failed to read config from {config_path}: {e}")
    ac = run_alt_and_caption(image_ref, model=cfg.get("caption_model"))
    meta = run_metadata_from_caption(ac["caption"], schema_path=schema_path, model=cfg.get("metadata_text_model"))
    return {"alt_text": ac["alt_text"], "caption": ac["caption"], "metadata": meta}

src/cv_mcp/metadata/runner.py:422-436 (helper)

'triple' mode pipeline: generates alt_text + dense caption using vision LLM, then vision-based structured metadata using image + caption.

def run_pipeline_triple(
    image_ref: str,
    *,
    config_path: Optional[Union[str, Path]] = None,
    schema_path: Union[str, Path] = Path(__file__).with_name("schema.json"),
) -> Dict[str, Any]:
    cfg = dict(_CFG)
    if config_path:
        try:
            cfg = json.loads(Path(config_path).read_text(encoding="utf-8"))
        except Exception as e:
            raise RuntimeError(f"Failed to read config from {config_path}: {e}")
    ac = run_alt_and_caption(image_ref, model=cfg.get("caption_model"))
    meta = run_structured_json(image_ref, ac["caption"], schema_path=schema_path, model=cfg.get("metadata_vision_model"))
    return {"alt_text": ac["alt_text"], "caption": ac["caption"], "metadata": meta}

src/cv_mcp/metadata/runner.py:252-303 (schema)

Post-generation validation enforcing metadata schema: caps arrays, ensures required fields (media_type, objects, people, tags), generates tags if missing, cleans empty fields.

def _post_validate(data: Dict[str, Any]) -> None:
    # Enforce array caps and build tags if missing
    def _cap(key: str, n: int):
        if isinstance(data.get(key), list) and len(data[key]) > n:
            data[key] = data[key][:n]

    for k, n in ("objects", 6), ("scene", 3), ("lighting", 3), ("style", 5), ("palette", 6), ("tags", 20):
        _cap(k, n)

    # Ensure people fields exist with defaults
    if not isinstance(data.get("people"), dict):
        data["people"] = {"count": 0, "faces_visible": False}
    else:
        data["people"].setdefault("count", 0)
        data["people"].setdefault("faces_visible", False)

    # Compute tags union if missing or empty
    if not isinstance(data.get("tags"), list) or not data.get("tags"):
        def norm_list(v):
            return v if isinstance(v, list) else []
        tags = []
        if isinstance(data.get("media_type"), str):
            tags.append(data["media_type"])
        for k in ("scene", "lighting", "style", "palette", "objects"):
            tags.extend(norm_list(data.get(k)))
        # Deduplicate while preserving order
        seen = set()
        uniq = []
        for t in tags:
            if isinstance(t, str) and t not in seen:
                seen.add(t)
                uniq.append(t)
        data["tags"] = uniq[:20]

    # Always include essential keys; drop empty/null fields for others
    essentials = {"media_type", "objects", "people", "tags"}
    to_delete = []
    for k, v in list(data.items()):
        if k in essentials:
            continue
        if v is None:
            to_delete.append(k)
        elif isinstance(v, list) and len(v) == 0:
            to_delete.append(k)
        elif isinstance(v, dict) and len(v.keys()) == 0:
            to_delete.append(k)
    for k in to_delete:
        try:
            del data[k]
        except Exception:
            pass

Computer Vision MCP Server

image_metadata

Input Schema

Implementation Reference

Tool Definition Quality

Other Tools

Latest Blog Posts

MCP directory API