Skip to main content
Glama
token_budget.py6.38 kB
"""Token budget models for context window management. Defines Pydantic models for tracking and enforcing token limits. Based on data-model.md entity definitions. """ from pydantic import BaseModel, Field, computed_field class TokenBudget(BaseModel): """Per-request token limit tracking and threshold enforcement. Tracks estimated token usage against configured thresholds and determines when summarization should be triggered. Attributes: threshold: Configured token limit (default: 4000) hard_cap: Absolute maximum token limit (default: 12000) estimated_tokens: Estimated tokens in candidate response """ threshold: int = Field( default=4000, gt=0, description="Configured token limit", ) hard_cap: int = Field( default=12000, gt=0, description="Absolute maximum token limit", ) estimated_tokens: int = Field( ge=0, description="Estimated tokens in response", ) @computed_field @property def budget_used(self) -> float: """Calculate percentage of threshold used (0.0-1.0+). Returns value > 1.0 if over threshold. Returns: Budget usage ratio (estimated_tokens / threshold) """ return self.estimated_tokens / self.threshold @computed_field @property def summary_mode(self) -> bool: """Determine if summarization should be triggered. Returns: True if estimated_tokens exceeds threshold """ return self.estimated_tokens > self.threshold @computed_field @property def exceeds_hard_cap(self) -> bool: """Check if response exceeds hard cap. Returns: True if estimated_tokens exceeds hard_cap """ return self.estimated_tokens > self.hard_cap def model_post_init(self, __context) -> None: """Validate that hard_cap >= threshold.""" if self.hard_cap < self.threshold: raise ValueError("hard_cap must be >= threshold") class BudgetMetadata(BaseModel): """Metadata about token budget enforcement. Captures decision-making details for observability. Attributes: threshold_used: Configured threshold for this request estimated_tokens: Actual estimated token count budget_ratio: Ratio of tokens to threshold action_taken: Action taken (e.g., "paginated", "summarized", "allowed") reason: Human-readable explanation """ threshold_used: int = Field(gt=0, description="Threshold applied") estimated_tokens: int = Field(ge=0, description="Estimated tokens") budget_ratio: float = Field(ge=0.0, description="Usage ratio") action_taken: str = Field(description="Action taken") reason: str = Field(description="Explanation") class TokenEstimationResult(BaseModel): """Result of token estimation for a response. Attributes: text_length: Length of text in characters estimated_tokens: Estimated token count estimation_method: Method used (e.g., "character-based") overhead_tokens: Estimated envelope overhead """ text_length: int = Field(ge=0, description="Text length in chars") estimated_tokens: int = Field(ge=0, description="Estimated tokens") estimation_method: str = Field( default="character-based", description="Estimation method", ) overhead_tokens: int = Field( default=200, ge=0, description="Envelope overhead", ) @computed_field @property def total_tokens(self) -> int: """Calculate total tokens including overhead. Returns: Sum of estimated_tokens and overhead_tokens """ return self.estimated_tokens + self.overhead_tokens class TokenBudgetConfig(BaseModel): """Configuration for token budget enforcement. Supports per-endpoint overrides and global defaults. Attributes: output_token_threshold: Default threshold (default: 4000) hard_output_token_cap: Absolute maximum (default: 12000) default_page_size: Default pagination page size (default: 50) max_page_size: Maximum allowed page size (default: 200) enable_summarization: Enable automatic summarization (default: True) enable_pagination: Enable automatic pagination (default: True) """ output_token_threshold: int = Field( default=4000, gt=0, description="Default token threshold", ) hard_output_token_cap: int = Field( default=12000, gt=0, description="Absolute maximum tokens", ) default_page_size: int = Field( default=50, ge=1, le=200, description="Default page size", ) max_page_size: int = Field( default=200, ge=1, description="Maximum page size", ) enable_summarization: bool = Field( default=True, description="Enable summarization", ) enable_pagination: bool = Field( default=True, description="Enable pagination", ) def model_post_init(self, __context) -> None: """Validate configuration constraints.""" if self.hard_output_token_cap < self.output_token_threshold: raise ValueError("hard_output_token_cap must be >= output_token_threshold") if self.max_page_size < self.default_page_size: raise ValueError("max_page_size must be >= default_page_size") class EndpointBudgetOverride(BaseModel): """Per-endpoint token budget override. Allows customizing token limits for specific endpoints. Attributes: endpoint_pattern: Endpoint pattern (e.g., "/api/v1/listings") threshold: Custom threshold for this endpoint hard_cap: Custom hard cap for this endpoint page_size: Custom default page size summarization_enabled: Enable/disable summarization pagination_enabled: Enable/disable pagination """ endpoint_pattern: str = Field(description="Endpoint pattern") threshold: int | None = Field(default=None, gt=0) hard_cap: int | None = Field(default=None, gt=0) page_size: int | None = Field(default=None, ge=1, le=200) summarization_enabled: bool | None = Field(default=None) pagination_enabled: bool | None = Field(default=None)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/darrentmorgan/hostaway-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server