CreateDataQualityRule
Define and configure data quality rules to monitor and validate datasets, set thresholds for critical alerts, and automate error handling for ensuring data accuracy and reliability.
Instructions
创建质量规则 *此Tool有MCP Resource,请查看CreateDataQualityRule(MCP Resource)获取更多使用此Tool的示例详情。
Input Schema
Name | Required | Description | Default |
---|---|---|---|
CheckingConfig | No | 样本校验设置 | |
Description | No | 规则描述信息,最长500个字符 | |
Enabled | No | 质量规则是否启用 | |
ErrorHandlers | No | 质量规则校验问题处理器列表 | |
Name | Yes | 规则名称 | |
ProjectId | No | DataWorks工作空间ID | |
SamplingConfig | No | 样本采集所需的设置 | |
Severity | No | 规则对于业务的等级(对应页面上的强弱规则),可选的枚举值:- Normal- High | |
Target | Yes | 规则所监控的对象 | |
TemplateCode | Yes | 规则所引用的规则模板唯一标识 |
Input Schema (JSON Schema)
{
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
"properties": {
"CheckingConfig": {
"additionalProperties": false,
"description": "样本校验设置",
"properties": {
"ReferencedSamplesFilter": {
"description": "有些类型的阈值需要查询出一些参考样本,然后对参考样本的值进行汇总得出进行比较的阈值,这里使用一个表达式来表示参考样本的查询方式",
"type": "string"
},
"Thresholds": {
"additionalProperties": false,
"description": "阈值设置",
"properties": {
"Critical": {
"additionalProperties": false,
"description": "严重警告的阈值设置",
"properties": {
"Expression": {
"description": "阈值表达式。波动率类型规则必须使用表达式方式表示波动阈值。如:- 波动上升大于0.01: $checkValue > 0.01 - 波动下降大于0.01:$checkValue < -0.01 - 波动率绝对值:abs($checkValue) > 0.01固定值类型规则也可以使用表达式方式配置阈值,如果同时配置,表达式优先级高于Operator和Value",
"type": "string"
},
"Operator": {
"description": "比较符:- \\>- \\>=- \\<- \\<=- !=- =",
"type": "string"
},
"Value": {
"description": "阈值数值",
"type": "string"
}
},
"type": "object"
},
"Expected": {
"additionalProperties": false,
"description": "期望的阈值设置",
"properties": {
"Expression": {
"description": "阈值表达式。波动率类型规则必须使用表达式方式表示波动阈值。如:- 波动上升大于0.01: $checkValue > 0.01 - 波动下降大于0.01:$checkValue < -0.01 - 波动率绝对值:abs($checkValue) > 0.01固定值类型规则也可以使用表达式方式配置阈值,如果同时配置,表达式优先级高于Operator和Value",
"type": "string"
},
"Operator": {
"description": "比较符:- \\>- \\>=- \\<- \\<=- !=- =",
"type": "string"
},
"Value": {
"description": "阈值数值",
"type": "string"
}
},
"type": "object"
},
"Warned": {
"additionalProperties": false,
"description": "普通警告的阈值设置",
"properties": {
"Expression": {
"description": "阈值表达式。波动率类型规则必须使用表达式方式表示波动阈值。如:- 波动上升大于0.01: $checkValue > 0.01 - 波动下降大于0.01:$checkValue < -0.01 - 波动率绝对值:abs($checkValue) > 0.01固定值类型规则也可以使用表达式方式配置阈值,如果同时配置,表达式优先级高于Operator和Value",
"type": "string"
},
"Operator": {
"description": "比较符:- \\>- \\>=- \\<- \\<=- !=- =",
"type": "string"
},
"Value": {
"description": "阈值数值",
"type": "string"
}
},
"type": "object"
}
},
"type": "object"
},
"Type": {
"description": "阈值计算方式,使用模版时可不设置。-Fixed-Fluctation-FluctationDiscreate-Auto-Average-Variance",
"type": "string"
}
},
"type": "object"
},
"Description": {
"description": "规则描述信息,最长500个字符",
"type": "string"
},
"Enabled": {
"description": "质量规则是否启用",
"type": "boolean"
},
"ErrorHandlers": {
"description": "质量规则校验问题处理器列表",
"items": {
"additionalProperties": false,
"properties": {
"ErrorDataFilter": {
"description": "如果是自定义SQL规则,需要用户指定SQL来过滤问题数据",
"type": "string"
},
"Type": {
"description": "处理器类型:- SaveErrorData",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"Name": {
"description": "规则名称",
"type": "string"
},
"ProjectId": {
"description": "DataWorks工作空间ID"
},
"SamplingConfig": {
"additionalProperties": false,
"description": "样本采集所需的设置",
"properties": {
"Metric": {
"description": "采样的指标名称,使用模版时可不设置。- Count:表行数- Min:字段最小值- Max:字段最大值- Avg:字段均值- DistinctCount:字段唯一值个数- DistinctPercent:字段唯一值个数与数据行数占比- DuplicatedCount:字段重复值个数- DuplicatedPercent:字段重复值个数与数据行数占比- TableSize:表大小- NullValueCount:字段为空的行数- NullValuePercent:字段为空的比例- GroupCount:按字段值聚合后每个值与对应的数据行数- CountNotIn:枚举值不匹配行数- CountDistinctNotIn:枚举值不匹配唯一值个数- UserDefinedSql:通过自定义SQL做样本采集",
"type": "string"
},
"MetricParameters": {
"description": "样本采集时,所需的参数",
"type": "string"
},
"SamplingFilter": {
"description": "采样时,对不关注的数据进行二次过滤的条件,最多16777215个字符",
"type": "string"
},
"SettingConfig": {
"description": "具体执行采样语句前,插入执行的一些运行时参数设置语句,最长1000个字符。目前只支持MaxCompute",
"type": "string"
}
},
"type": "object"
},
"Severity": {
"description": "规则对于业务的等级(对应页面上的强弱规则),可选的枚举值:- Normal- High",
"type": "string"
},
"Target": {
"additionalProperties": false,
"description": "规则所监控的对象",
"properties": {
"DatabaseType": {
"description": "表类型的数据集,表所属的数据库类型。-maxcompute-emr-cdh-hologres-analyticdb_for_postgresql-analyticdb_for_mysql-starrocks",
"type": "string"
},
"PartitionSpec": {
"description": "分区表的分区设置",
"type": "string"
},
"TableGuid": {
"description": "规则所作用的表在数据地图中的唯一ID",
"type": "string"
},
"Type": {
"description": "监控对象类型。-Table",
"type": "string"
}
},
"required": [
"DatabaseType",
"TableGuid"
],
"type": "object"
},
"TemplateCode": {
"description": "规则所引用的规则模板唯一标识",
"type": "string"
}
},
"required": [
"Name",
"Target",
"TemplateCode"
],
"type": "object"
}