pod_create
Provision a GPU pod with hard spend caps and quote freshness validation. Use confirm=false for a dry-run preview or confirm=true to execute.
Instructions
Provision a Prime Intellect GPU pod (or preview the provisioning).
With confirm=False: returns a dry-run preview describing what would happen. With confirm=True: validates spend caps + quote freshness, then provisions.
The server enforces:
quote_token must be fresh (TTL 60s)
hourly_usd ≤ PRIME_MAX_HOURLY_USD
hourly_usd × max_lifetime_hours ≤ PRIME_MAX_TOTAL_USD
estimated total ≤ wallet balance
On success, the pod is recorded in local state.json so pod_check_runaway can warn about overdue pods later.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| quote_token | Yes | Token returned by pod_quote. | |
| name | Yes | Human-readable pod name. | |
| max_lifetime_hours | No | Soft budget cap: hourly_usd × this must fit under PRIME_MAX_TOTAL_USD. | |
| confirm | No | Required True to actually provision. False returns a dry-run preview. | |
| env_vars | No | Optional env vars to inject into the pod. |
Output Schema
| Name | Required | Description | Default |
|---|---|---|---|
No arguments | |||
Implementation Reference
- Handler for the pod_create MCP tool. If confirm=False, returns a dry-run PodCreatePreview. If confirm=True: validates max_lifetime_hours, inspects the quote token, checks spend caps against wallet balance, consumes the quote token, builds the pod config with name/env_vars, calls clients.pods.create(), records the tracked pod, and returns the created pod details.
@mcp.tool async def pod_create( quote_token: str = Field(..., description="Token returned by pod_quote."), name: str = Field(..., description="Human-readable pod name."), max_lifetime_hours: int = Field( default=8, description="Soft budget cap: hourly_usd × this must fit under PRIME_MAX_TOTAL_USD.", ), confirm: bool = Field( default=False, description="Required True to actually provision. False returns a dry-run preview.", ), env_vars: dict[str, str] | None = Field( default=None, description="Optional env vars to inject into the pod.", ), ) -> dict[str, Any]: """Provision a Prime Intellect GPU pod (or preview the provisioning). With confirm=False: returns a dry-run preview describing what *would* happen. With confirm=True: validates spend caps + quote freshness, then provisions. The server enforces: - quote_token must be fresh (TTL 60s) - hourly_usd ≤ PRIME_MAX_HOURLY_USD - hourly_usd × max_lifetime_hours ≤ PRIME_MAX_TOTAL_USD - estimated total ≤ wallet balance On success, the pod is recorded in local state.json so pod_check_runaway can warn about overdue pods later. """ if max_lifetime_hours < 1: raise ToolError("max_lifetime_hours must be ≥ 1.") cached = inspect_quote(quote_token) if cached is None: raise ToolError( "Quote token is unknown or expired. Call pod_quote again to get a fresh one." ) hourly = float(cached["hourly_usd"]) estimated_total = hourly * max_lifetime_hours if not confirm: gpu = cached["gpu_summary"] preview = PodCreatePreview( quote=PodQuote( quote_token=quote_token, expires_at_unix=time.time(), # informational gpu_type=gpu["gpu_type"], gpu_count=gpu["gpu_count"], gpu_memory_gb=gpu["gpu_memory"], provider=gpu.get("provider"), region=gpu.get("data_center"), country=gpu.get("country"), is_spot=gpu.get("is_spot"), stock_status=gpu.get("stock_status"), disk_size_gb=cached["request"]["disk_size_gb"], vcpus=cached["request"]["vcpus"], memory_gb=cached["request"]["memory_gb"], image=cached["request"]["image"], hourly_usd=hourly, wallet_balance_usd=None, ), estimated_total_usd=estimated_total, max_lifetime_hours=max_lifetime_hours, ) return preview.model_dump() # Hard caps + wallet check. wallet_balance: float | None = None with contextlib.suppress(Exception): clients_for_wallet = await get_clients() wallet_balance = float(clients_for_wallet.wallet.get(limit=1).balance_usd) try: check_caps(hourly, max_lifetime_hours, wallet_balance) except SpendCapExceeded as exc: append_audit( "create_blocked", reason=str(exc), hourly_usd=hourly, max_lifetime_hours=max_lifetime_hours, ) raise ToolError(str(exc)) from exc # Consume the token (single-use after this point). try: payload = consume_quote(quote_token) except (QuoteExpired, QuoteUnknown) as exc: raise ToolError(str(exc)) from exc # Fill in the name + env_vars on the cached config. pod_config = payload["pod_config"] pod_config["pod"]["name"] = name if env_vars: pod_config["pod"]["envVars"] = env_vars clients = await get_clients() try: pod = clients.pods.create(pod_config) except Exception as e: append_audit("create_failed", error=str(e), hourly_usd=hourly) raise _err(e) from e record_provisioned( TrackedPod( pod_id=pod.id, name=pod.name, hourly_usd=hourly, started_at_unix=time.time(), max_lifetime_hours=max_lifetime_hours, ) ) append_audit( "create_succeeded", pod_id=pod.id, name=pod.name, hourly_usd=hourly, max_lifetime_hours=max_lifetime_hours, ) return pod.model_dump() - Schema for the dry-run preview returned by pod_create when confirm=False.
class PodCreatePreview(BaseModel): """Returned by pod_create when confirm=False — the same data plus a clear gate message.""" quote: PodQuote will_provision: bool = False estimated_total_usd: float = Field( ..., description="hourly_usd × max_lifetime_hours (the budget we'd commit)." ) max_lifetime_hours: int message: str = ( "Dry run only. Re-call pod_create with confirm=True to actually provision." ) - Schema for the quote object returned by pod_quote and embedded in the PodCreatePreview returned by pod_create.
class PodQuote(BaseModel): """A non-binding price + provisioning preview returned by `pod_quote`. The `quote_token` is required by `pod_create` to provision. Tokens expire 60 seconds after issue so the model cannot act on stale prices. """ quote_token: str = Field(..., description="Pass this back to pod_create(confirm=True) to provision.") expires_at_unix: float = Field(..., description="Unix timestamp when this quote stops being valid.") gpu_type: str gpu_count: int gpu_memory_gb: int provider: str | None = None region: str | None = None # data_center country: str | None = None is_spot: bool | None = None stock_status: str | None = None disk_size_gb: int vcpus: int memory_gb: int image: str hourly_usd: float = Field(..., description="Per-hour cost in USD at current pricing.") currency: str = "USD" is_variable_price: bool | None = None wallet_balance_usd: float | None = Field( default=None, description="Current wallet balance in USD; None if not retrievable.", ) estimated_runway_hours: float | None = Field( default=None, description="wallet_balance_usd / hourly_usd, if both known.", ) note: str = Field( default=( "This is a non-binding quote. To provision, call pod_create with this " "quote_token and confirm=True. Quote expires in 60 seconds." ) ) - src/prime_intellect_mcp/server.py:287-288 (registration)Registration of pod_create as an MCP tool via the @mcp.tool decorator on the FastMCP server instance.
@mcp.tool async def pod_create( - Helper imports used by pod_create: quote token management (consume_quote, inspect_quote), spend cap enforcement (check_caps, SpendCapExceeded), and local state tracking (TrackedPod, record_provisioned, append_audit).
cheapest_match, make_clients, normalise_ssh, price_per_hour, translate_api_error, ) from .models import PodCreatePreview, PodQuote, RunawayPod, TerminateResult from .quotes import QuoteExpired, QuoteUnknown, consume_quote, inspect_quote, issue_quote