harbor_cleanup_candidates

Read-onlyIdempotent

Identifies Harbor artifacts eligible for deletion—untagged, never pulled, or old versions—to help reclaim storage space.

Instructions

Suggest which artifacts could be deleted to reclaim space.

READ-ONLY — never deletes anything; just produces a list with reasons. Use harbor_delete_artifact / harbor_delete_untagged / harbor_delete_old_artifacts to act on the results.

Reasons emitted: - untagged — artifact has no tags (orphaned layer) - never_pulled — artifact has never been pulled (and is past the keep_latest_per_repo cutoff) - old_version — artifact is older than the keep_latest_per_repo newest tagged artifacts

Input Schema

TableJSON Schema

Name	Required	Description
`project_name`	Yes	Harbor project name.
`include_untagged`	No	Suggest deleting untagged artifacts (orphaned layers).
`include_zero_pulls`	No	Suggest deleting artifacts that have never been pulled.
`keep_latest_per_repo`	No	How many newest artifacts to always keep per repository.

Output Schema

TableJSON Schema

Name	Required	Description	Default
`project`	Yes
`candidates_count`	Yes
`total_reclaimable`	Yes
`total_reclaimable_bytes`	Yes
`candidates`	Yes
`hint`	Yes

Implementation Reference

src/harbor_registry_mcp/tools.py:389-495 (registration)

Tool registration via @mcp.tool decorator with name='harbor_cleanup_candidates', annotations (title, readOnlyHint, destructiveHint, idempotentHint, openWorldHint), and structured_output=True

@mcp.tool(
    name="harbor_cleanup_candidates",
    annotations={
        "title": "Cleanup Candidates",
        "readOnlyHint": True,
        "destructiveHint": False,
        "idempotentHint": True,
        "openWorldHint": True,
    },
    structured_output=True,
)
async def harbor_cleanup_candidates(
    project_name: Annotated[str, Field(min_length=1, max_length=255, description="Harbor project name.")],
    ctx: Context,
    include_untagged: Annotated[
        bool, Field(default=True, description="Suggest deleting untagged artifacts (orphaned layers).")
    ] = True,
    include_zero_pulls: Annotated[
        bool, Field(default=True, description="Suggest deleting artifacts that have never been pulled.")
    ] = True,
    keep_latest_per_repo: Annotated[
        int, Field(default=1, ge=0, le=100, description="How many newest artifacts to always keep per repository.")
    ] = 1,
) -> CleanupCandidatesOutput:
    """Suggest which artifacts could be deleted to reclaim space.

    **READ-ONLY** — never deletes anything; just produces a list with
    reasons. Use ``harbor_delete_artifact`` / ``harbor_delete_untagged`` /
    ``harbor_delete_old_artifacts`` to act on the results.

    Reasons emitted:
        - ``untagged``      — artifact has no tags (orphaned layer)
        - ``never_pulled``  — artifact has never been pulled (and is past
          the ``keep_latest_per_repo`` cutoff)
        - ``old_version``   — artifact is older than the ``keep_latest_per_repo``
          newest tagged artifacts
    """
    try:
        client = get_client()
        await _report(ctx, 0.05, f"listing repositories in {project_name}")
        repos = await asyncio.to_thread(_list_repos, client, project_name)
        candidates: list[CleanupCandidate] = []
        total_reclaimable = 0
        for i, repo in enumerate(repos):
            short_name = repo["name"].replace(f"{project_name}/", "")
            await _report(ctx, 0.1 + 0.85 * (i / max(len(repos), 1)), f"scanning {short_name}")
            artifacts_raw = await asyncio.to_thread(_list_artifacts, client, project_name, short_name)
            sorted_arts = sorted(artifacts_raw, key=lambda a: a.get("push_time") or "", reverse=True)

            for idx, art in enumerate(sorted_arts):
                shaped = _shape_artifact(art)
                reasons: list[str] = []
                is_untagged = not shaped["tags"]
                no_pulls = shaped["pull_time"] is None

                if include_untagged and is_untagged:
                    reasons.append("untagged")
                if idx >= keep_latest_per_repo and include_zero_pulls and no_pulls:
                    reasons.append("never_pulled")
                if idx >= keep_latest_per_repo and not is_untagged and len(sorted_arts) > keep_latest_per_repo:
                    reasons.append("old_version")

                if reasons:
                    total_reclaimable += shaped["size_bytes"]
                    candidates.append(
                        {
                            "repository": short_name,
                            "tags": shaped["tags"] or [],
                            "digest": shaped["digest"],
                            "size": shaped["size"],
                            "size_bytes": shaped["size_bytes"],
                            "push_time": shaped["push_time"],
                            "reasons": reasons,
                        }
                    )

        candidates.sort(key=lambda c: c["size_bytes"], reverse=True)
        await _report(ctx, 1.0, f"{len(candidates)} candidates found")

        result: CleanupCandidatesOutput = {
            "project": project_name,
            "candidates_count": len(candidates),
            "total_reclaimable": size_human(total_reclaimable),
            "total_reclaimable_bytes": total_reclaimable,
            "candidates": candidates,
            "hint": (
                "Use harbor_delete_artifact for individual artifacts, "
                "harbor_delete_untagged for bulk untagged, or "
                "harbor_delete_old_artifacts (dry_run=True) for keep-N policies."
            ),
        }
        header = (
            f"## Cleanup candidates in {project_name}\n\n"
            f"Reclaimable: **{size_human(total_reclaimable)}** "
            f"across {len(candidates)} artifacts\n\n"
        )
        md = header + "\n".join(
            [
                f"- **{c['repository']}** — "
                f"{','.join(c['tags']) or '(untagged)'} — "
                f"{c['size']} ({', '.join(c['reasons'])})"
                for c in candidates[:30]
            ]
        )
        return output.ok(result, md)  # type: ignore[return-value]
    except Exception as exc:
        output.fail(exc, f"finding cleanup candidates in {project_name}")

src/harbor_registry_mcp/tools.py:400-495 (handler)

Main handler function 'harbor_cleanup_candidates' — lists all repositories in a project, fetches all artifacts per repo, evaluates each artifact against include_untagged/include_zero_pulls/keep_latest_per_repo rules, collects candidates with reasons ('untagged', 'never_pulled', 'old_version'), sorts by size descending, and returns CleanupCandidatesOutput with markdown summary.

async def harbor_cleanup_candidates(
    project_name: Annotated[str, Field(min_length=1, max_length=255, description="Harbor project name.")],
    ctx: Context,
    include_untagged: Annotated[
        bool, Field(default=True, description="Suggest deleting untagged artifacts (orphaned layers).")
    ] = True,
    include_zero_pulls: Annotated[
        bool, Field(default=True, description="Suggest deleting artifacts that have never been pulled.")
    ] = True,
    keep_latest_per_repo: Annotated[
        int, Field(default=1, ge=0, le=100, description="How many newest artifacts to always keep per repository.")
    ] = 1,
) -> CleanupCandidatesOutput:
    """Suggest which artifacts could be deleted to reclaim space.

    **READ-ONLY** — never deletes anything; just produces a list with
    reasons. Use ``harbor_delete_artifact`` / ``harbor_delete_untagged`` /
    ``harbor_delete_old_artifacts`` to act on the results.

    Reasons emitted:
        - ``untagged``      — artifact has no tags (orphaned layer)
        - ``never_pulled``  — artifact has never been pulled (and is past
          the ``keep_latest_per_repo`` cutoff)
        - ``old_version``   — artifact is older than the ``keep_latest_per_repo``
          newest tagged artifacts
    """
    try:
        client = get_client()
        await _report(ctx, 0.05, f"listing repositories in {project_name}")
        repos = await asyncio.to_thread(_list_repos, client, project_name)
        candidates: list[CleanupCandidate] = []
        total_reclaimable = 0
        for i, repo in enumerate(repos):
            short_name = repo["name"].replace(f"{project_name}/", "")
            await _report(ctx, 0.1 + 0.85 * (i / max(len(repos), 1)), f"scanning {short_name}")
            artifacts_raw = await asyncio.to_thread(_list_artifacts, client, project_name, short_name)
            sorted_arts = sorted(artifacts_raw, key=lambda a: a.get("push_time") or "", reverse=True)

            for idx, art in enumerate(sorted_arts):
                shaped = _shape_artifact(art)
                reasons: list[str] = []
                is_untagged = not shaped["tags"]
                no_pulls = shaped["pull_time"] is None

                if include_untagged and is_untagged:
                    reasons.append("untagged")
                if idx >= keep_latest_per_repo and include_zero_pulls and no_pulls:
                    reasons.append("never_pulled")
                if idx >= keep_latest_per_repo and not is_untagged and len(sorted_arts) > keep_latest_per_repo:
                    reasons.append("old_version")

                if reasons:
                    total_reclaimable += shaped["size_bytes"]
                    candidates.append(
                        {
                            "repository": short_name,
                            "tags": shaped["tags"] or [],
                            "digest": shaped["digest"],
                            "size": shaped["size"],
                            "size_bytes": shaped["size_bytes"],
                            "push_time": shaped["push_time"],
                            "reasons": reasons,
                        }
                    )

        candidates.sort(key=lambda c: c["size_bytes"], reverse=True)
        await _report(ctx, 1.0, f"{len(candidates)} candidates found")

        result: CleanupCandidatesOutput = {
            "project": project_name,
            "candidates_count": len(candidates),
            "total_reclaimable": size_human(total_reclaimable),
            "total_reclaimable_bytes": total_reclaimable,
            "candidates": candidates,
            "hint": (
                "Use harbor_delete_artifact for individual artifacts, "
                "harbor_delete_untagged for bulk untagged, or "
                "harbor_delete_old_artifacts (dry_run=True) for keep-N policies."
            ),
        }
        header = (
            f"## Cleanup candidates in {project_name}\n\n"
            f"Reclaimable: **{size_human(total_reclaimable)}** "
            f"across {len(candidates)} artifacts\n\n"
        )
        md = header + "\n".join(
            [
                f"- **{c['repository']}** — "
                f"{','.join(c['tags']) or '(untagged)'} — "
                f"{c['size']} ({', '.join(c['reasons'])})"
                for c in candidates[:30]
            ]
        )
        return output.ok(result, md)  # type: ignore[return-value]
    except Exception as exc:
        output.fail(exc, f"finding cleanup candidates in {project_name}")

src/harbor_registry_mcp/models.py:115-131 (schema)

TypedDict schemas CleanupCandidate (repository, tags, digest, size, size_bytes, push_time, reasons) and CleanupCandidatesOutput (project, candidates_count, total_reclaimable, total_reclaimable_bytes, candidates, hint)

class CleanupCandidate(TypedDict):
    repository: str
    tags: list[str]
    digest: str
    size: str
    size_bytes: int
    push_time: str | None
    reasons: list[str]


class CleanupCandidatesOutput(TypedDict):
    project: str
    candidates_count: int
    total_reclaimable: str
    total_reclaimable_bytes: int
    candidates: list[CleanupCandidate]
    hint: str

src/harbor_registry_mcp/tools.py:74-100 (helper)

Helper _shape_artifact — converts Harbor's raw artifact JSON into ArtifactSummary, used to extract tags, digest, size, timestamps for candidate evaluation.

def _shape_artifact(a: dict[str, Any]) -> ArtifactSummary:
    """Convert Harbor's artifact JSON into :class:`ArtifactSummary`.

    Extracts tag list, scan status / vulnerabilities from the first
    available scanner in ``scan_overview``.
    """
    tags = [t["name"] for t in (a.get("tags") or []) if t.get("name")]
    scan_status: str | None = None
    vulnerabilities: dict[str, int] | None = None
    overview = a.get("scan_overview") or {}
    if overview:
        first = next(iter(overview.values()), {})
        scan_status = first.get("scan_status")
        summary = (first.get("summary") or {}).get("summary") or {}
        vulnerabilities = summary or None

    size_bytes = int(a.get("size", 0) or 0)
    return {
        "tags": tags or [],
        "digest": a.get("digest", ""),
        "size": size_human(size_bytes),
        "size_bytes": size_bytes,
        "push_time": _normalize_ts(a.get("push_time")),
        "pull_time": _normalize_ts(a.get("pull_time")),
        "scan_status": scan_status,
        "vulnerabilities": vulnerabilities,
    }

src/harbor_registry_mcp/tools.py:103-117 (helper)

Helper functions _list_artifacts and _list_repos — paginate through all artifacts/repositories via the Harbor API client's get_all_pages method.

def _list_artifacts(client: Any, project_name: str, repository_name: str) -> list[dict[str, Any]]:
    """Fetch every artifact for a repository across all pages."""
    return client.get_all_pages(
        f"/projects/{project_name}/repositories/{encode_repo(repository_name)}/artifacts",
        page_size=100,
        extra_params={"with_tag": True, "with_scan_overview": True},
    )


def _list_repos(client: Any, project_name: str) -> list[dict[str, Any]]:
    """Fetch every repository for a project across all pages."""
    return client.get_all_pages(
        f"/projects/{project_name}/repositories",
        page_size=100,
    )

harbor-registry-mcp

harbor_cleanup_candidates

Instructions

Input Schema

Output Schema

Implementation Reference

Tool Definition Quality

Other Tools

Latest Blog Posts

MCP directory API