list_bucket_info

Name	Required	Description	Default
No arguments

src/keboola_mcp_server/tools/storage.py:428-472 (handler)

Handler function that lists all buckets in the project, handling production and development branches, calculating counts by stage, and returning structured BucketDetail objects.

async def list_buckets(ctx: Context) -> ListBucketsOutput: """Retrieves information about all buckets in the project.""" client = KeboolaClient.from_state(ctx.session.state) links_manager = await ProjectLinksManager.from_client(client) raw_bucket_data = await client.storage_client.bucket_list(include=['metadata']) # group buckets by their ID as it would appear on the production branch buckets_by_prod_id: dict[str, list[BucketDetail]] = defaultdict(list) for raw in raw_bucket_data: bucket = BucketDetail.model_validate(raw) if bucket.branch_id and bucket.branch_id != client.branch_id: # a dev branch bucket from a different branch continue buckets_by_prod_id[bucket.prod_id].append(bucket) buckets: list[BucketDetail] = [] for prod_id, group in buckets_by_prod_id.items(): prod_bucket: BucketDetail | None = None dev_buckets: list[BucketDetail] = [] for b in group: if b.branch_id: dev_buckets.append(b) else: prod_bucket = b if not prod_bucket and not dev_buckets: # should not happen raise Exception(f'No buckets in the group: prod_id={prod_id}') else: bucket = await _combine_buckets(client, links_manager, prod_bucket, next(iter(dev_buckets), None)) buckets.append(bucket.model_copy(update={'links': None})) # no links when listing buckets # Count buckets by stage (only count input, derive output) total_count = len(buckets) input_count = sum(1 for bucket in buckets if bucket.stage == 'in') output_count = total_count - input_count bucket_counts = BucketCounts(total_buckets=total_count, input_buckets=input_count, output_buckets=output_count) return ListBucketsOutput( buckets=buckets, bucket_counts=bucket_counts, links=[links_manager.get_bucket_dashboard_link()] )

src/keboola_mcp_server/tools/storage.py:197-201 (schema)

Output schema defining the structure of the list buckets response: list of BucketDetail, bucket counts, and relevant links.

class ListBucketsOutput(BaseModel): buckets: list[BucketDetail] = Field(..., description='List of buckets.') bucket_counts: BucketCounts = Field(..., description='Bucket counts by stage.') links: list[Link] = Field(..., description='Links relevant to the bucket listing.')

src/keboola_mcp_server/tools/storage.py:91-190 (schema)

Pydantic model defining detailed bucket information including ID, name, description, stage, size, table count, and links.

class BucketDetail(BaseModel): id: str = Field(description='Unique identifier for the bucket.') name: str = Field(description='Name of the bucket.') display_name: str = Field( description='The display name of the bucket.', validation_alias=AliasChoices('displayName', 'display_name', 'display-name'), serialization_alias='displayName', ) description: Optional[str] = Field(None, description='Description of the bucket.') stage: str = Field(description='Stage of the bucket (in for input stage, out for output stage).') created: str = Field(description='Creation timestamp of the bucket.') data_size_bytes: Optional[int] = Field( None, description='Total data size of the bucket in bytes.', validation_alias=AliasChoices('dataSizeBytes', 'data_size_bytes', 'data-size-bytes'), serialization_alias='dataSizeBytes', ) tables_count: Optional[int] = Field( default=None, description='Number of tables in the bucket.', validation_alias=AliasChoices('tablesCount', 'tables_count', 'tables-count'), serialization_alias='tablesCount', ) links: Optional[list[Link]] = Field(default=None, description='The links relevant to the bucket.') source_project: str | None = Field( default=None, description='The source Keboola project of the linked bucket, None otherwise.' ) # these are internal fields not meant to be exposed to LLMs branch_id: Optional[str] = Field( default=None, exclude=True, description='The ID of the branch the bucket belongs to.' ) prod_id: str = Field(default='', exclude=True, description='The ID of the production branch bucket.') # TODO: add prod_name too to strip the '{branch_id}-' prefix from the name' def shade_by(self, other: 'BucketDetail', branch_id: str | None, links: list[Link] | None = None) -> 'BucketDetail': if self.branch_id: raise ValueError( f'Dev branch buckets cannot be shaded: ' f'bucket.id={self.id}, bucket.branch_id={self.branch_id}' ) if not other.branch_id: raise ValueError( f'Prod branch buckets cannot shade others: ' f'bucket.id={other.id}, bucket.branch_id={other.branch_id}' ) if other.branch_id != branch_id: raise ValueError( f'Dev branch mismatch: ' f'bucket.id={other.id}, bucket.branch_id={other.branch_id}, branch_id={branch_id}' ) if other.prod_id != self.id: raise ValueError(f'Prod and dev buckets mismatch: prod_bucket.id={self.id}, dev_bucket.id={other.id}') changes: dict[str, int | None | list[Link] | str] = { # TODO: The name and display_name of a branch bucket typically contains the branch ID # and we may not wont to show that. # 'name': other.name, # 'display_name': other.display_name, # 'description': other.description, # TODO: These bytes and counts are approximated by summing the values of the two buckets. 'data_size_bytes': _sum(self.data_size_bytes, other.data_size_bytes), 'tables_count': _sum(self.tables_count, other.tables_count), } if links: changes['links'] = links return self.model_copy(update=changes) @model_validator(mode='before') @classmethod def set_table_count(cls, values: dict[str, Any]) -> dict[str, Any]: if isinstance(values.get('tables'), list): values['tables_count'] = len(values['tables']) else: values['tables_count'] = None return values @model_validator(mode='before') @classmethod def set_description(cls, values: dict[str, Any]) -> dict[str, Any]: values['description'] = _extract_description(values) return values @model_validator(mode='before') @classmethod def set_branch_id(cls, values: dict[str, Any]) -> dict[str, Any]: branch_id = get_metadata_property(values.get('metadata', []), MetadataField.FAKE_DEVELOPMENT_BRANCH) if branch_id: values['branch_id'] = branch_id values['prod_id'] = values['id'].replace(f'c-{branch_id}-', 'c-') else: values['branch_id'] = None values['prod_id'] = values['id'] return values @model_validator(mode='before') @classmethod def set_source_project(cls, values: dict[str, Any]) -> dict[str, Any]: if source_project_raw := cast(dict[str, Any], get_nested(values, 'sourceBucket.project')): values['source_project'] = f'{source_project_raw["name"]} (ID: {source_project_raw["id"]})' return values

src/keboola_mcp_server/tools/storage.py:42-48 (registration)

Registration of the list_buckets tool to the MCP server with read-only hint, custom serializer, and storage tag.

mcp.add_tool( FunctionTool.from_function( list_buckets, annotations=ToolAnnotations(readOnlyHint=True), serializer=toon_serializer, tags={STORAGE_TOOLS_TAG}, )

src/keboola_mcp_server/tools/storage.py:360-387 (helper)

Helper function to find production and development branch buckets for a given bucket ID.

async def _find_buckets(client: KeboolaClient, bucket_id: str) -> tuple[BucketDetail | None, BucketDetail | None]: prod_bucket: BucketDetail | None = None dev_bucket: BucketDetail | None = None if raw := await _get_bucket_detail(client.storage_client, bucket_id): bucket = BucketDetail.model_validate(raw) if not bucket.branch_id: prod_bucket = bucket elif bucket.branch_id == client.branch_id: dev_bucket = bucket if client.branch_id: if not dev_bucket: dev_id = bucket_id.replace('c-', f'c-{client.branch_id}-') if raw := await _get_bucket_detail(client.storage_client, dev_id): bucket = BucketDetail.model_validate(raw) if bucket.branch_id == client.branch_id: dev_bucket = bucket if not prod_bucket and f'.c-{client.branch_id}-' in bucket_id: prod_id = bucket_id.replace(f'c-{client.branch_id}-', 'c-') if raw := await _get_bucket_detail(client.storage_client, prod_id): bucket = BucketDetail.model_validate(raw) if not bucket.branch_id: prod_bucket = bucket return prod_bucket, dev_bucket

Keboola Explorer MCP Server

Instructions

Input Schema

Implementation Reference

Other Tools

Related Tools

Latest Blog Posts

MCP directory API