Skip to main content
Glama
keboola

Keboola Explorer MCP Server

list_bucket_info

Retrieve detailed information about all buckets within a Keboola project using the Storage API, enabling efficient management and organization of data resources.

Instructions

List information about all buckets in the project.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault

No arguments

Implementation Reference

  • Handler function that lists all buckets in the project, handling production and development branches, calculating counts by stage, and returning structured BucketDetail objects.
    async def list_buckets(ctx: Context) -> ListBucketsOutput: """Retrieves information about all buckets in the project.""" client = KeboolaClient.from_state(ctx.session.state) links_manager = await ProjectLinksManager.from_client(client) raw_bucket_data = await client.storage_client.bucket_list(include=['metadata']) # group buckets by their ID as it would appear on the production branch buckets_by_prod_id: dict[str, list[BucketDetail]] = defaultdict(list) for raw in raw_bucket_data: bucket = BucketDetail.model_validate(raw) if bucket.branch_id and bucket.branch_id != client.branch_id: # a dev branch bucket from a different branch continue buckets_by_prod_id[bucket.prod_id].append(bucket) buckets: list[BucketDetail] = [] for prod_id, group in buckets_by_prod_id.items(): prod_bucket: BucketDetail | None = None dev_buckets: list[BucketDetail] = [] for b in group: if b.branch_id: dev_buckets.append(b) else: prod_bucket = b if not prod_bucket and not dev_buckets: # should not happen raise Exception(f'No buckets in the group: prod_id={prod_id}') else: bucket = await _combine_buckets(client, links_manager, prod_bucket, next(iter(dev_buckets), None)) buckets.append(bucket.model_copy(update={'links': None})) # no links when listing buckets # Count buckets by stage (only count input, derive output) total_count = len(buckets) input_count = sum(1 for bucket in buckets if bucket.stage == 'in') output_count = total_count - input_count bucket_counts = BucketCounts(total_buckets=total_count, input_buckets=input_count, output_buckets=output_count) return ListBucketsOutput( buckets=buckets, bucket_counts=bucket_counts, links=[links_manager.get_bucket_dashboard_link()] )
  • Output schema defining the structure of the list buckets response: list of BucketDetail, bucket counts, and relevant links.
    class ListBucketsOutput(BaseModel): buckets: list[BucketDetail] = Field(..., description='List of buckets.') bucket_counts: BucketCounts = Field(..., description='Bucket counts by stage.') links: list[Link] = Field(..., description='Links relevant to the bucket listing.')
  • Pydantic model defining detailed bucket information including ID, name, description, stage, size, table count, and links.
    class BucketDetail(BaseModel): id: str = Field(description='Unique identifier for the bucket.') name: str = Field(description='Name of the bucket.') display_name: str = Field( description='The display name of the bucket.', validation_alias=AliasChoices('displayName', 'display_name', 'display-name'), serialization_alias='displayName', ) description: Optional[str] = Field(None, description='Description of the bucket.') stage: str = Field(description='Stage of the bucket (in for input stage, out for output stage).') created: str = Field(description='Creation timestamp of the bucket.') data_size_bytes: Optional[int] = Field( None, description='Total data size of the bucket in bytes.', validation_alias=AliasChoices('dataSizeBytes', 'data_size_bytes', 'data-size-bytes'), serialization_alias='dataSizeBytes', ) tables_count: Optional[int] = Field( default=None, description='Number of tables in the bucket.', validation_alias=AliasChoices('tablesCount', 'tables_count', 'tables-count'), serialization_alias='tablesCount', ) links: Optional[list[Link]] = Field(default=None, description='The links relevant to the bucket.') source_project: str | None = Field( default=None, description='The source Keboola project of the linked bucket, None otherwise.' ) # these are internal fields not meant to be exposed to LLMs branch_id: Optional[str] = Field( default=None, exclude=True, description='The ID of the branch the bucket belongs to.' ) prod_id: str = Field(default='', exclude=True, description='The ID of the production branch bucket.') # TODO: add prod_name too to strip the '{branch_id}-' prefix from the name' def shade_by(self, other: 'BucketDetail', branch_id: str | None, links: list[Link] | None = None) -> 'BucketDetail': if self.branch_id: raise ValueError( f'Dev branch buckets cannot be shaded: ' f'bucket.id={self.id}, bucket.branch_id={self.branch_id}' ) if not other.branch_id: raise ValueError( f'Prod branch buckets cannot shade others: ' f'bucket.id={other.id}, bucket.branch_id={other.branch_id}' ) if other.branch_id != branch_id: raise ValueError( f'Dev branch mismatch: ' f'bucket.id={other.id}, bucket.branch_id={other.branch_id}, branch_id={branch_id}' ) if other.prod_id != self.id: raise ValueError(f'Prod and dev buckets mismatch: prod_bucket.id={self.id}, dev_bucket.id={other.id}') changes: dict[str, int | None | list[Link] | str] = { # TODO: The name and display_name of a branch bucket typically contains the branch ID # and we may not wont to show that. # 'name': other.name, # 'display_name': other.display_name, # 'description': other.description, # TODO: These bytes and counts are approximated by summing the values of the two buckets. 'data_size_bytes': _sum(self.data_size_bytes, other.data_size_bytes), 'tables_count': _sum(self.tables_count, other.tables_count), } if links: changes['links'] = links return self.model_copy(update=changes) @model_validator(mode='before') @classmethod def set_table_count(cls, values: dict[str, Any]) -> dict[str, Any]: if isinstance(values.get('tables'), list): values['tables_count'] = len(values['tables']) else: values['tables_count'] = None return values @model_validator(mode='before') @classmethod def set_description(cls, values: dict[str, Any]) -> dict[str, Any]: values['description'] = _extract_description(values) return values @model_validator(mode='before') @classmethod def set_branch_id(cls, values: dict[str, Any]) -> dict[str, Any]: branch_id = get_metadata_property(values.get('metadata', []), MetadataField.FAKE_DEVELOPMENT_BRANCH) if branch_id: values['branch_id'] = branch_id values['prod_id'] = values['id'].replace(f'c-{branch_id}-', 'c-') else: values['branch_id'] = None values['prod_id'] = values['id'] return values @model_validator(mode='before') @classmethod def set_source_project(cls, values: dict[str, Any]) -> dict[str, Any]: if source_project_raw := cast(dict[str, Any], get_nested(values, 'sourceBucket.project')): values['source_project'] = f'{source_project_raw["name"]} (ID: {source_project_raw["id"]})' return values
  • Registration of the list_buckets tool to the MCP server with read-only hint, custom serializer, and storage tag.
    mcp.add_tool( FunctionTool.from_function( list_buckets, annotations=ToolAnnotations(readOnlyHint=True), serializer=toon_serializer, tags={STORAGE_TOOLS_TAG}, )
  • Helper function to find production and development branch buckets for a given bucket ID.
    async def _find_buckets(client: KeboolaClient, bucket_id: str) -> tuple[BucketDetail | None, BucketDetail | None]: prod_bucket: BucketDetail | None = None dev_bucket: BucketDetail | None = None if raw := await _get_bucket_detail(client.storage_client, bucket_id): bucket = BucketDetail.model_validate(raw) if not bucket.branch_id: prod_bucket = bucket elif bucket.branch_id == client.branch_id: dev_bucket = bucket if client.branch_id: if not dev_bucket: dev_id = bucket_id.replace('c-', f'c-{client.branch_id}-') if raw := await _get_bucket_detail(client.storage_client, dev_id): bucket = BucketDetail.model_validate(raw) if bucket.branch_id == client.branch_id: dev_bucket = bucket if not prod_bucket and f'.c-{client.branch_id}-' in bucket_id: prod_id = bucket_id.replace(f'c-{client.branch_id}-', 'c-') if raw := await _get_bucket_detail(client.storage_client, prod_id): bucket = BucketDetail.model_validate(raw) if not bucket.branch_id: prod_bucket = bucket return prod_bucket, dev_bucket

Other Tools

Related Tools

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/keboola/keboola-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server