harbor_cleanup_candidates
Identifies Harbor artifacts eligible for deletion—untagged, never pulled, or old versions—to help reclaim storage space.
Instructions
Suggest which artifacts could be deleted to reclaim space.
READ-ONLY — never deletes anything; just produces a list with
reasons. Use harbor_delete_artifact / harbor_delete_untagged /
harbor_delete_old_artifacts to act on the results.
Reasons emitted:
- untagged — artifact has no tags (orphaned layer)
- never_pulled — artifact has never been pulled (and is past
the keep_latest_per_repo cutoff)
- old_version — artifact is older than the keep_latest_per_repo
newest tagged artifacts
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| project_name | Yes | Harbor project name. | |
| include_untagged | No | Suggest deleting untagged artifacts (orphaned layers). | |
| include_zero_pulls | No | Suggest deleting artifacts that have never been pulled. | |
| keep_latest_per_repo | No | How many newest artifacts to always keep per repository. |
Output Schema
| Name | Required | Description | Default |
|---|---|---|---|
| project | Yes | ||
| candidates_count | Yes | ||
| total_reclaimable | Yes | ||
| total_reclaimable_bytes | Yes | ||
| candidates | Yes | ||
| hint | Yes |
Implementation Reference
- src/harbor_registry_mcp/tools.py:389-495 (registration)Tool registration via @mcp.tool decorator with name='harbor_cleanup_candidates', annotations (title, readOnlyHint, destructiveHint, idempotentHint, openWorldHint), and structured_output=True
@mcp.tool( name="harbor_cleanup_candidates", annotations={ "title": "Cleanup Candidates", "readOnlyHint": True, "destructiveHint": False, "idempotentHint": True, "openWorldHint": True, }, structured_output=True, ) async def harbor_cleanup_candidates( project_name: Annotated[str, Field(min_length=1, max_length=255, description="Harbor project name.")], ctx: Context, include_untagged: Annotated[ bool, Field(default=True, description="Suggest deleting untagged artifacts (orphaned layers).") ] = True, include_zero_pulls: Annotated[ bool, Field(default=True, description="Suggest deleting artifacts that have never been pulled.") ] = True, keep_latest_per_repo: Annotated[ int, Field(default=1, ge=0, le=100, description="How many newest artifacts to always keep per repository.") ] = 1, ) -> CleanupCandidatesOutput: """Suggest which artifacts could be deleted to reclaim space. **READ-ONLY** — never deletes anything; just produces a list with reasons. Use ``harbor_delete_artifact`` / ``harbor_delete_untagged`` / ``harbor_delete_old_artifacts`` to act on the results. Reasons emitted: - ``untagged`` — artifact has no tags (orphaned layer) - ``never_pulled`` — artifact has never been pulled (and is past the ``keep_latest_per_repo`` cutoff) - ``old_version`` — artifact is older than the ``keep_latest_per_repo`` newest tagged artifacts """ try: client = get_client() await _report(ctx, 0.05, f"listing repositories in {project_name}") repos = await asyncio.to_thread(_list_repos, client, project_name) candidates: list[CleanupCandidate] = [] total_reclaimable = 0 for i, repo in enumerate(repos): short_name = repo["name"].replace(f"{project_name}/", "") await _report(ctx, 0.1 + 0.85 * (i / max(len(repos), 1)), f"scanning {short_name}") artifacts_raw = await asyncio.to_thread(_list_artifacts, client, project_name, short_name) sorted_arts = sorted(artifacts_raw, key=lambda a: a.get("push_time") or "", reverse=True) for idx, art in enumerate(sorted_arts): shaped = _shape_artifact(art) reasons: list[str] = [] is_untagged = not shaped["tags"] no_pulls = shaped["pull_time"] is None if include_untagged and is_untagged: reasons.append("untagged") if idx >= keep_latest_per_repo and include_zero_pulls and no_pulls: reasons.append("never_pulled") if idx >= keep_latest_per_repo and not is_untagged and len(sorted_arts) > keep_latest_per_repo: reasons.append("old_version") if reasons: total_reclaimable += shaped["size_bytes"] candidates.append( { "repository": short_name, "tags": shaped["tags"] or [], "digest": shaped["digest"], "size": shaped["size"], "size_bytes": shaped["size_bytes"], "push_time": shaped["push_time"], "reasons": reasons, } ) candidates.sort(key=lambda c: c["size_bytes"], reverse=True) await _report(ctx, 1.0, f"{len(candidates)} candidates found") result: CleanupCandidatesOutput = { "project": project_name, "candidates_count": len(candidates), "total_reclaimable": size_human(total_reclaimable), "total_reclaimable_bytes": total_reclaimable, "candidates": candidates, "hint": ( "Use harbor_delete_artifact for individual artifacts, " "harbor_delete_untagged for bulk untagged, or " "harbor_delete_old_artifacts (dry_run=True) for keep-N policies." ), } header = ( f"## Cleanup candidates in {project_name}\n\n" f"Reclaimable: **{size_human(total_reclaimable)}** " f"across {len(candidates)} artifacts\n\n" ) md = header + "\n".join( [ f"- **{c['repository']}** — " f"{','.join(c['tags']) or '(untagged)'} — " f"{c['size']} ({', '.join(c['reasons'])})" for c in candidates[:30] ] ) return output.ok(result, md) # type: ignore[return-value] except Exception as exc: output.fail(exc, f"finding cleanup candidates in {project_name}") - src/harbor_registry_mcp/tools.py:400-495 (handler)Main handler function 'harbor_cleanup_candidates' — lists all repositories in a project, fetches all artifacts per repo, evaluates each artifact against include_untagged/include_zero_pulls/keep_latest_per_repo rules, collects candidates with reasons ('untagged', 'never_pulled', 'old_version'), sorts by size descending, and returns CleanupCandidatesOutput with markdown summary.
async def harbor_cleanup_candidates( project_name: Annotated[str, Field(min_length=1, max_length=255, description="Harbor project name.")], ctx: Context, include_untagged: Annotated[ bool, Field(default=True, description="Suggest deleting untagged artifacts (orphaned layers).") ] = True, include_zero_pulls: Annotated[ bool, Field(default=True, description="Suggest deleting artifacts that have never been pulled.") ] = True, keep_latest_per_repo: Annotated[ int, Field(default=1, ge=0, le=100, description="How many newest artifacts to always keep per repository.") ] = 1, ) -> CleanupCandidatesOutput: """Suggest which artifacts could be deleted to reclaim space. **READ-ONLY** — never deletes anything; just produces a list with reasons. Use ``harbor_delete_artifact`` / ``harbor_delete_untagged`` / ``harbor_delete_old_artifacts`` to act on the results. Reasons emitted: - ``untagged`` — artifact has no tags (orphaned layer) - ``never_pulled`` — artifact has never been pulled (and is past the ``keep_latest_per_repo`` cutoff) - ``old_version`` — artifact is older than the ``keep_latest_per_repo`` newest tagged artifacts """ try: client = get_client() await _report(ctx, 0.05, f"listing repositories in {project_name}") repos = await asyncio.to_thread(_list_repos, client, project_name) candidates: list[CleanupCandidate] = [] total_reclaimable = 0 for i, repo in enumerate(repos): short_name = repo["name"].replace(f"{project_name}/", "") await _report(ctx, 0.1 + 0.85 * (i / max(len(repos), 1)), f"scanning {short_name}") artifacts_raw = await asyncio.to_thread(_list_artifacts, client, project_name, short_name) sorted_arts = sorted(artifacts_raw, key=lambda a: a.get("push_time") or "", reverse=True) for idx, art in enumerate(sorted_arts): shaped = _shape_artifact(art) reasons: list[str] = [] is_untagged = not shaped["tags"] no_pulls = shaped["pull_time"] is None if include_untagged and is_untagged: reasons.append("untagged") if idx >= keep_latest_per_repo and include_zero_pulls and no_pulls: reasons.append("never_pulled") if idx >= keep_latest_per_repo and not is_untagged and len(sorted_arts) > keep_latest_per_repo: reasons.append("old_version") if reasons: total_reclaimable += shaped["size_bytes"] candidates.append( { "repository": short_name, "tags": shaped["tags"] or [], "digest": shaped["digest"], "size": shaped["size"], "size_bytes": shaped["size_bytes"], "push_time": shaped["push_time"], "reasons": reasons, } ) candidates.sort(key=lambda c: c["size_bytes"], reverse=True) await _report(ctx, 1.0, f"{len(candidates)} candidates found") result: CleanupCandidatesOutput = { "project": project_name, "candidates_count": len(candidates), "total_reclaimable": size_human(total_reclaimable), "total_reclaimable_bytes": total_reclaimable, "candidates": candidates, "hint": ( "Use harbor_delete_artifact for individual artifacts, " "harbor_delete_untagged for bulk untagged, or " "harbor_delete_old_artifacts (dry_run=True) for keep-N policies." ), } header = ( f"## Cleanup candidates in {project_name}\n\n" f"Reclaimable: **{size_human(total_reclaimable)}** " f"across {len(candidates)} artifacts\n\n" ) md = header + "\n".join( [ f"- **{c['repository']}** — " f"{','.join(c['tags']) or '(untagged)'} — " f"{c['size']} ({', '.join(c['reasons'])})" for c in candidates[:30] ] ) return output.ok(result, md) # type: ignore[return-value] except Exception as exc: output.fail(exc, f"finding cleanup candidates in {project_name}") - TypedDict schemas CleanupCandidate (repository, tags, digest, size, size_bytes, push_time, reasons) and CleanupCandidatesOutput (project, candidates_count, total_reclaimable, total_reclaimable_bytes, candidates, hint)
class CleanupCandidate(TypedDict): repository: str tags: list[str] digest: str size: str size_bytes: int push_time: str | None reasons: list[str] class CleanupCandidatesOutput(TypedDict): project: str candidates_count: int total_reclaimable: str total_reclaimable_bytes: int candidates: list[CleanupCandidate] hint: str - Helper _shape_artifact — converts Harbor's raw artifact JSON into ArtifactSummary, used to extract tags, digest, size, timestamps for candidate evaluation.
def _shape_artifact(a: dict[str, Any]) -> ArtifactSummary: """Convert Harbor's artifact JSON into :class:`ArtifactSummary`. Extracts tag list, scan status / vulnerabilities from the first available scanner in ``scan_overview``. """ tags = [t["name"] for t in (a.get("tags") or []) if t.get("name")] scan_status: str | None = None vulnerabilities: dict[str, int] | None = None overview = a.get("scan_overview") or {} if overview: first = next(iter(overview.values()), {}) scan_status = first.get("scan_status") summary = (first.get("summary") or {}).get("summary") or {} vulnerabilities = summary or None size_bytes = int(a.get("size", 0) or 0) return { "tags": tags or [], "digest": a.get("digest", ""), "size": size_human(size_bytes), "size_bytes": size_bytes, "push_time": _normalize_ts(a.get("push_time")), "pull_time": _normalize_ts(a.get("pull_time")), "scan_status": scan_status, "vulnerabilities": vulnerabilities, } - Helper functions _list_artifacts and _list_repos — paginate through all artifacts/repositories via the Harbor API client's get_all_pages method.
def _list_artifacts(client: Any, project_name: str, repository_name: str) -> list[dict[str, Any]]: """Fetch every artifact for a repository across all pages.""" return client.get_all_pages( f"/projects/{project_name}/repositories/{encode_repo(repository_name)}/artifacts", page_size=100, extra_params={"with_tag": True, "with_scan_overview": True}, ) def _list_repos(client: Any, project_name: str) -> list[dict[str, Any]]: """Fetch every repository for a project across all pages.""" return client.get_all_pages( f"/projects/{project_name}/repositories", page_size=100, )