image_metadata

Extract metadata from images by providing a URL or file path, generating descriptive captions using vision models for analysis and organization.

Input Schema

TableJSON Schema

Name	Required	Default
`caption_override`	No
`config_path`	No
`file_path`	No
`image_url`	No
`mode`	No	double

Implementation Reference

src/cv_mcp/mcp_server.py:107-152 (handler)
Core handler for the 'image_metadata' tool. Validates inputs, handles caption overrides and modes ('double': text metadata, 'triple': vision metadata), delegates to runner helpers.
@mcp.tool() def image_metadata( image_url: Optional[str] = None, file_path: Optional[str] = None, caption_override: Optional[str] = None, config_path: Optional[str] = None, mode: str = "double", ) -> dict: if not image_url and not file_path: raise ValueError("Provide either image_url or file_path") if image_url and file_path: raise ValueError("Provide only one of image_url or file_path, not both") image_ref = image_url or file_path # type: ignore if caption_override: schema_path = os.path.join(os.path.dirname(__file__), "metadata", "schema.json") if mode == "double": # Text-only metadata from provided caption from cv_mcp.metadata.runner import run_metadata_from_caption meta = run_metadata_from_caption(caption_override, schema_path=schema_path) alt = run_alt_text(image_ref) return {"alt_text": alt, "caption": caption_override, "metadata": meta} elif mode == "triple": # Vision+caption metadata meta = run_structured_json(image_ref, caption_override, schema_path=schema_path) alt = run_alt_text(image_ref) return {"alt_text": alt, "caption": caption_override, "metadata": meta} else: raise ValueError("mode must be 'double' or 'triple'") if mode == "double": return run_pipeline_double( image_ref, config_path=config_path, schema_path=os.path.join(os.path.dirname(__file__), "metadata", "schema.json"), ) elif mode == "triple": return run_pipeline_triple( image_ref, config_path=config_path, schema_path=os.path.join(os.path.dirname(__file__), "metadata", "schema.json"), ) else: raise ValueError("mode must be 'double' or 'triple'")
src/cv_mcp/mcp_server.py:107-107 (registration)
Registers the image_metadata function as an MCP tool using FastMCP decorator.
@mcp.tool()
src/cv_mcp/metadata/runner.py:405-420 (helper)
'double' mode pipeline: generates alt_text + dense caption using vision LLM, then text-only structured metadata from caption.
def run_pipeline_double( image_ref: str, *, config_path: Optional[Union[str, Path]] = None, schema_path: Union[str, Path] = Path(__file__).with_name("schema.json"), ) -> Dict[str, Any]: cfg = dict(_CFG) if config_path: try: cfg = json.loads(Path(config_path).read_text(encoding="utf-8")) except Exception as e: raise RuntimeError(f"Failed to read config from {config_path}: {e}") ac = run_alt_and_caption(image_ref, model=cfg.get("caption_model")) meta = run_metadata_from_caption(ac["caption"], schema_path=schema_path, model=cfg.get("metadata_text_model")) return {"alt_text": ac["alt_text"], "caption": ac["caption"], "metadata": meta}
src/cv_mcp/metadata/runner.py:422-436 (helper)
'triple' mode pipeline: generates alt_text + dense caption using vision LLM, then vision-based structured metadata using image + caption.
def run_pipeline_triple( image_ref: str, *, config_path: Optional[Union[str, Path]] = None, schema_path: Union[str, Path] = Path(__file__).with_name("schema.json"), ) -> Dict[str, Any]: cfg = dict(_CFG) if config_path: try: cfg = json.loads(Path(config_path).read_text(encoding="utf-8")) except Exception as e: raise RuntimeError(f"Failed to read config from {config_path}: {e}") ac = run_alt_and_caption(image_ref, model=cfg.get("caption_model")) meta = run_structured_json(image_ref, ac["caption"], schema_path=schema_path, model=cfg.get("metadata_vision_model")) return {"alt_text": ac["alt_text"], "caption": ac["caption"], "metadata": meta}
src/cv_mcp/metadata/runner.py:252-303 (schema)
Post-generation validation enforcing metadata schema: caps arrays, ensures required fields (media_type, objects, people, tags), generates tags if missing, cleans empty fields.
def _post_validate(data: Dict[str, Any]) -> None: # Enforce array caps and build tags if missing def _cap(key: str, n: int): if isinstance(data.get(key), list) and len(data[key]) > n: data[key] = data[key][:n] for k, n in ("objects", 6), ("scene", 3), ("lighting", 3), ("style", 5), ("palette", 6), ("tags", 20): _cap(k, n) # Ensure people fields exist with defaults if not isinstance(data.get("people"), dict): data["people"] = {"count": 0, "faces_visible": False} else: data["people"].setdefault("count", 0) data["people"].setdefault("faces_visible", False) # Compute tags union if missing or empty if not isinstance(data.get("tags"), list) or not data.get("tags"): def norm_list(v): return v if isinstance(v, list) else [] tags = [] if isinstance(data.get("media_type"), str): tags.append(data["media_type"]) for k in ("scene", "lighting", "style", "palette", "objects"): tags.extend(norm_list(data.get(k))) # Deduplicate while preserving order seen = set() uniq = [] for t in tags: if isinstance(t, str) and t not in seen: seen.add(t) uniq.append(t) data["tags"] = uniq[:20] # Always include essential keys; drop empty/null fields for others essentials = {"media_type", "objects", "people", "tags"} to_delete = [] for k, v in list(data.items()): if k in essentials: continue if v is None: to_delete.append(k) elif isinstance(v, list) and len(v) == 0: to_delete.append(k) elif isinstance(v, dict) and len(v.keys()) == 0: to_delete.append(k) for k in to_delete: try: del data[k] except Exception: pass

Computer Vision MCP Server

image_metadata

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API