Data Intelligence MCP Server

metadata_enrichment.py•14.6 KiB

# Copyright [2025] [IBM] # Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) # See the LICENSE file in the project root for license information. from enum import Enum from typing import Optional from pydantic import BaseModel, Field from app.shared.utils.tool_helper_service import tool_helper_service BASE_URL = str(tool_helper_service.base_url) UI_BASE_URL = str(tool_helper_service.ui_base_url) METADATA_ENRICHMENT_SERVICE_URL = BASE_URL + "/metadata_enrichment/v3" MDE_START_SELECTIVE_ASSETS_TEMPLATE = ( METADATA_ENRICHMENT_SERVICE_URL + "/metadata_enrichment_assets/${mde_id}/start_selective_enrichment" ) CAMS_ASSET_BASE_URL = BASE_URL + "/v2/assets" JOBS_BASE_URL = BASE_URL + "/v2/jobs" MDE_UI_DISPLAY_URL = UI_BASE_URL + "/gov/metadata-enrichments/display" MDE_UI_URL_TEMPLATE = ( MDE_UI_DISPLAY_URL + "/${mde_id}/structured/columns?project_id=${project_id}&context=df" ) class MetadataEnrichmentCreationRequest(BaseModel): project_name: str = Field( ..., description="The name of the project you want to create a metadata enrichment asset into." ) metadata_enrichment_name: str = Field( ..., description="The name of the asset you want to create a metadata enrichment asset for" ) category_names: list[str] | str = Field( ..., description="""Names of the categories for which data quality analysis is required. If a single category name is provided, it should be a string. If multiple categories are specified, they should be provided as a list of strings.""", ) objective_names: list[str] | str = Field( ..., description="""List of names of objectives used in the enrichment job. Supported objectives are 'profile', 'dq_gen_constraints', 'analyze_quality', and 'semantic_expansion""", ) dataset_names: Optional[list[str] | str] = Field( description="Dataset names of target datasets to be enriched with metadata." ) class MetadataEnrichmentExecutionRequest(BaseModel): project_name: str = Field( ..., description="The name of the project you want to execute a metadata enrichment." ) metadata_enrichment_name: str = Field( ..., description="The name of the metadata enrichment you want to execute." ) dataset_names: Optional[list[str] | str] = Field( None, description="Dataset names of the specified datasets to be enriched with metadata." ) class MetadataEnrichmentAnalysisRequest(BaseModel): project_name: str = Field( ..., description="The name of the project for which the analysis is to be performed." ) dataset_names: list[str] | str = Field( ..., description="Dataset names of the specified datasets to be enriched with metadata." ) category_names: list[str] | str = Field( ..., description="""Names of the categories for which data quality analysis is required. If a single category name is provided, it should be a string. If multiple categories are specified, they should be provided as a list of strings.""", ) class MetadataEnrichmentObjective(str, Enum): PROFILE = "profile" DQ_GEN_CONSTRAINTS = "dq_gen_constraints" ANALYZE_QUALITY = "analyze_quality" SEMANTIC_EXPANSION = "semantic_expansion" class MetadataEnrichmentAssetEnrichmentJob(BaseModel): name: str = Field(description="The name of the metadata enrichment job.") class ContainerAssets(BaseModel): metadata_imports: Optional[list[str]] = Field( None, description="A list of metadata import asset identifiers to add to a new metadata enrichment asset.", ) class MetadataEnrichmentAssetDataScope(BaseModel): enrichment_assets: Optional[list[str]] = Field( None, description="A list of data asset identifiers to add to a new Metadata Enrichment Asset.", ) container_assets: Optional[ContainerAssets] = Field( None, description="A set of containers containing assets. Currently, only containers of type metadata import asset are supported.", ) class EnrichmentOptionsStructured(BaseModel): profile: bool = Field( False, description="Flag that indicates whether data profiling should be executed.", ) assign_terms: bool = Field( False, description="Flag that indicates whether term assignment should be executed.", ) analyze_quality: bool = Field( False, description="Flag that indicates whether data quality analysis should be executed.", ) analyze_relationships: bool = Field( False, description="Flag that indicates whether primary key analysis should be executed.", ) semantic_expansion: bool = Field( False, description="Flag that indicates whether semantic expansion should be executed.", ) data_search: bool = Field( False, description="Flag that indicates whether data search should be executed.", ) dq_sla_assessment: bool = Field( False, description="Flag that indicates whether service level agreement assessments should be executed.", ) dq_gen_constraints: bool = Field( False, description="Flag that indicates whether data quality constraints should be generated or not.", ) class EnrichmentOptions(BaseModel): structured: EnrichmentOptionsStructured = Field( EnrichmentOptionsStructured(), description="Enrichment options for structured data.", ) class GovernanceScopeCategoryTypeEnum(str, Enum): CATEGORY = "category" class GovernanceScopeCategory(BaseModel): id: str = Field(description="Identifier of the category.") type: GovernanceScopeCategoryTypeEnum = Field( GovernanceScopeCategoryTypeEnum.CATEGORY, description="A category used in a metadata enrichment asset's governance scope.", ) class SamplingMethodEnum(str, Enum): RANDOM = "random" TOP = "top" class SamplingAnalysisMethodEnum(str, Enum): FIXED = "fixed" PERCENTAGE = "percentage" class SamplingStructuredSampleSizeOptions(BaseModel): row_number: int = Field( description="The maximum number of rows to profile. A missing or zero value indicates that the full set of rows must be profiled." ) classify_value_number: int = Field( 100, description="The maximum size of the various distributions produced by the profiling process. A zero value is mapped to the default value.", ) class SamplingStructuredSampleSizePercentageOptions(BaseModel): decimal_value: float = Field( description="The sample percentage expressed as decimal value." ) row_number_min: int = Field(description="The minimum number of rows to profile.") row_number_max: int = Field( description="The maximum number of rows to profile. A missing or zero value indicates that the full set of rows must be profiled." ) classify_value_number: int = Field( 100, description="The maximum size of the various distributions produced by the profiling process.", ) class SamplingStructuredSampleSize(BaseModel): name: Optional[str] = Field( None, description="An optional name for the sample size configuration." ) options: Optional[SamplingStructuredSampleSizeOptions] = Field( None, description="Sample size options for structured data assets of a metadata enrichment asset. Required if sampling method is 'fixed'.", ) percentage_options: Optional[SamplingStructuredSampleSizePercentageOptions] = Field( None, description="Initial sample size percentage options for structured data assets in a metadata enrichment asset. Required if sampling method is 'percentage'.", ) class SamplingStructured(BaseModel): method: SamplingMethodEnum = Field(description="The sampling method.") analysis_method: SamplingAnalysisMethodEnum = Field( SamplingAnalysisMethodEnum.FIXED, description="The sampling analysis method." ) sample_size: SamplingStructuredSampleSize = Field( description="Initial metadata enrichment asset sample size for structured data assets." ) class Sampling(BaseModel): structured: SamplingStructured = Field( SamplingStructured( method=SamplingMethodEnum.TOP, analysis_method=SamplingAnalysisMethodEnum.FIXED, sample_size=SamplingStructuredSampleSize( options=SamplingStructuredSampleSizeOptions( row_number=1000, classify_value_number=100 ) ), ), description="Initialization information for metadata enrichment asset sampling options for structured data assets.", ) class DatascopeOfRerunsEnum(str, Enum): ALL = "all" DELTA = "delta" class SuggestedDataQualityCheck(BaseModel): id: str = Field(..., description="The id of the suggested data quality check.") enabled: bool = Field( ..., description="The flag whether the suggested data quality check is enabled or not.", ) class QualityOrigins(BaseModel): profiling: bool = Field( ..., description="Flag that indicates whether data profiling should be executed.", ) business_terms: bool = Field( ..., description="Flag that indicates whether business terms should be used for data quality checks.", ) relationships: bool = Field( ..., description="Flag that indicates whether relationships should be used for data quality checks.", ) class DataQualityStructured(BaseModel): dq_checks_suggested: list[SuggestedDataQualityCheck] = Field( [], description="List of suggested Data Quality Checks. Each DQCheck consists of 2 fields id and a flag whether it is enabled or not.", ) quality_origins: QualityOrigins = Field( QualityOrigins(profiling=True, business_terms=False, relationships=False), description="Options that allow to define on which sources suggestions for data quality checks should be based.", ) class DataQuality(BaseModel): structured: DataQualityStructured = Field( DataQualityStructured(), description="Initialization information for the data quality objectives for structured data assets in a metadata enrichment.", ) class MetadataEnrichmentAssetObjective(BaseModel): enrichment_options: EnrichmentOptions = Field( EnrichmentOptions(), description="Enrichment options of metadata enrichment asset.", ) governance_scope: list[GovernanceScopeCategory] = Field( [], description="A list of categories to be used for metadata enrichment." ) sampling: Sampling = Field( Sampling(), description="Initialization information for the metadata enrichment asset sampling options.", ) datascope_of_reruns: DatascopeOfRerunsEnum = Field( DatascopeOfRerunsEnum.ALL, description="The type of data scope to be used in metadata enrichment job reruns after the initial full enrichment.", ) data_quality: DataQuality = Field( DataQuality(), description="Initialization information for the data quality objectives for metadata Enrichment", ) class MetadataEnrichmentAsset(BaseModel): name: str = Field( description="The name of the metadata enrichment asset to be created." ) job: MetadataEnrichmentAssetEnrichmentJob = Field( description="Initialization information for the metadata enrichment asset enrichment job" ) data_scope: MetadataEnrichmentAssetDataScope = Field( MetadataEnrichmentAssetDataScope(), description="Initialization information for a metadata enrichment asset's data scope definition.", ) objective: MetadataEnrichmentAssetObjective = Field( MetadataEnrichmentAssetObjective(), description="Initialization information for metadata enrichment asset objectives.", ) def __init__(self, name: str): super().__init__(name=name, job=MetadataEnrichmentAssetEnrichmentJob(name=name)) class OperationStatusEnum(str, Enum): ACCEPTED = "accepted" NOT_STARTED = "not_started" IN_PROGRESS = "in_progress" FAILED = "failed" CANCELED = "canceled" SUCCEEDED = "succeeded" SUCCEEDED_WITH_ERRORS = "succeeded_with_errors" class DataScopeOperation(BaseModel): id: str = Field(description="The unique identifier of this resource.") status: OperationStatusEnum = Field( description="Status of a metadata enrichment asset operation." ) target_resource_id: Optional[str] = Field( None, description="The identifier of the target resource." ) target_resource_location: Optional[str] = Field( None, description="The target resource location." ) class MetadataEnrichmentAssetObjectivePatch(BaseModel): enrichment_options: EnrichmentOptions = Field( EnrichmentOptions(), description="Patch for the enrichment options of a metadata enrichment asset.", ) governance_scope: list[GovernanceScopeCategory] = Field( [], description="A list of categories to be used for metadata enrichment." ) class MetadataEnrichmentAssetPatch(BaseModel): objective: MetadataEnrichmentAssetObjectivePatch = Field( MetadataEnrichmentAssetObjectivePatch(), description="Objective patch of a metadata enrichment asset.", ) class DataScopeAssetSelection(BaseModel): ids: list[str] = Field(..., description="A list of data asset identifiers.") class MetadataEnrichmentAssetDataScopeUpdateRequest(BaseModel): assets_to_add: DataScopeAssetSelection = Field( ..., description="A subset of assets in a metadata enrichment asset." ) class MetadataEnrichmentAssetPatchResponse(BaseModel): id: str = Field(..., description="The unique identifier of this resource.") name: str = Field(..., description="The name of the metadata enrichment asset.") class MetadataEnrichmentRun(BaseModel): metadata_enrichment_id: str = Field( ..., description="The unique identifier of the parent metadata enrichment." ) job_id: str = Field( ..., description="The unique identifier of the metadata enrichment job." ) job_run_id: str = Field( ..., description="The unique identifier of the metadata enrichment job run." ) project_id: str = Field(..., description="The unique identifier of the project.") metadata_enrichment_ui_url: str = Field( ..., description="The URL to the metadata enrichment asset in the UI." ) class MetadataEnrichmentAssetInfo(BaseModel): metadata_enrichment_id: str = Field( ..., description="The unique identifier of the parent metadata enrichment." ) dataset_ids: list[str] = Field(..., description="The list of dataset identifiers.")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/hingaibm/data-intelligence-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

metadata_enrichment.py•14.6 KiB