CreateDataQualityEvaluationTask
Create data quality evaluation tasks by defining rules, thresholds, and conditions to monitor and ensure data integrity within DataWorks MCP Server.
Instructions
创建数据质量监控 *此Tool有MCP Resource,请查看CreateDataQualityEvaluationTask(MCP Resource)获取更多使用此Tool的示例详情。
Input Schema
Name | Required | Description | Default |
---|---|---|---|
DataQualityRules | No | 数据质量监控关联的数据质量规则列表。如果设置了DataQualityRule.Id,则把Id对应的规则关联到新建质量监控中;如果没有设置,则用其他字段创建一个新的规则,关联到新建的质量监控中 | |
DataSourceId | No | 数据源ID | |
Description | No | 质量监控任务描述 | |
Hooks | No | 回调设置 | |
Name | Yes | 质量监控任务名称 | |
Notifications | No | 通知订阅配置 | |
ProjectId | No | DataWorks工作空间的ID | |
RuntimeConf | No | 扩展配置,JSON格式的字符串,只对EMR类型的数据质量监控生效。- queue:执行EMR数据质量校验时,使用的yarn队列,默认为本项目配置的队列- sqlEngine:执行EMR的数据校验时,采用的SQL引擎 + HIVE_SQL + SPARK_SQL | |
Target | Yes | 数据质量监控对象 | |
Trigger | No | 数据质量校验任务的触发配置 |
Input Schema (JSON Schema)
{
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
"properties": {
"DataQualityRules": {
"description": "数据质量监控关联的数据质量规则列表。如果设置了DataQualityRule.Id,则把Id对应的规则关联到新建质量监控中;如果没有设置,则用其他字段创建一个新的规则,关联到新建的质量监控中",
"items": {
"additionalProperties": false,
"properties": {
"CheckingConfig": {
"additionalProperties": false,
"description": "样本校验设置",
"properties": {
"ReferencedSamplesFilter": {
"description": "有些类型的阈值需要查询出一些参考样本,然后对参考样本的值进行汇总得出进行比较的阈值,这里使用一个表达式来表示参考样本的查询方式",
"type": "string"
},
"Thresholds": {
"additionalProperties": false,
"description": "校验阈值设置",
"properties": {
"Critical": {
"additionalProperties": false,
"description": "严重警告的阈值设置",
"properties": {
"Expression": {
"description": "阈值表达式。波动率类型规则必须使用表达式方式表示波动阈值。如:- 波动上升大于0.01: $checkValue > 0.01 - 波动下降大于0.01:$checkValue < -0.01 - 波动率绝对值:abs($checkValue) > 0.01固定值类型规则也可以使用表达式方式配置阈值,如果同时配置,表达式优先级高于Operator和Value",
"type": "string"
},
"Operator": {
"description": "比较符-\\>-\\>=-<-<=-!=-=",
"type": "string"
},
"Value": {
"description": "阈值数值",
"type": "string"
}
},
"type": "object"
},
"Expected": {
"additionalProperties": false,
"description": "期望的阈值设置",
"properties": {
"Expression": {
"description": "阈值表达式。波动率类型规则必须使用表达式方式表示波动阈值。如:- 波动上升大于0.01: $checkValue > 0.01 - 波动下降大于0.01:$checkValue < -0.01 - 波动率绝对值:abs($checkValue) > 0.01固定值类型规则也可以使用表达式方式配置阈值,如果同时配置,表达式优先级高于Operator和Value",
"type": "string"
},
"Operator": {
"description": "比较符-\\>-\\>=-<-<=-!=-=",
"type": "string"
},
"Value": {
"description": "阈值数值",
"type": "string"
}
},
"type": "object"
},
"Warned": {
"additionalProperties": false,
"description": "普通警告的阈值设置",
"properties": {
"Expression": {
"description": "阈值表达式。波动率类型规则必须使用表达式方式表示波动阈值。如:- 波动上升大于0.01: $checkValue > 0.01 - 波动下降大于0.01:$checkValue < -0.01 - 波动率绝对值:abs($checkValue) > 0.01固定值类型规则也可以使用表达式方式配置阈值,如果同时配置,表达式优先级高于Operator和Value",
"type": "string"
},
"Operator": {
"description": "比较符-\\>-\\>=-<-<=-!=-=",
"type": "string"
},
"Value": {
"description": "阈值数值",
"type": "string"
}
},
"type": "object"
}
},
"type": "object"
},
"Type": {
"description": "阈值计算方式-Fixed-Fluctation-FluctationDiscreate-Auto-Average",
"type": "string"
}
},
"type": "object"
},
"Description": {
"description": "数据质量规则描述",
"type": "string"
},
"Enabled": {
"description": "质量规则是否启用",
"type": "boolean"
},
"ErrorHandlers": {
"description": "质量规则校验问题处理器列表",
"items": {
"additionalProperties": false,
"properties": {
"ErrorDataFilter": {
"description": "如果是自定义SQL规则,需要用户指定SQL来过滤问题数据",
"type": "string"
},
"Type": {
"description": "处理器类型:- SaveErrorData:保留问题数据",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"Id": {
"description": "规则ID"
},
"Name": {
"description": "数据质量规则名称",
"type": "string"
},
"SamplingConfig": {
"additionalProperties": false,
"description": "样本采集时,所需的参数",
"properties": {
"Metric": {
"description": "采样的指标名称- Count:表行数- Min:字段最小值- Max:字段最大值- Avg:字段均值- DistinctCount:字段唯一值个数- DistinctPercent:字段唯一值个数与数据行数占比- DuplicatedCount:字段重复值个数- DuplicatedPercent:字段重复值个数与数据行数占比- TableSize:表大小- NullValueCount:字段为空的行数- NullValuePercent:字段为空的比例- GroupCount:按字段值聚合后每个值与对应的数据行数- CountNotIn:枚举值不匹配行数- CountDistinctNotIn:枚举值不匹配唯一值个数- UserDefinedSql:通过自定义SQL做样本采集",
"type": "string"
},
"MetricParameters": {
"description": "样本采集时,所需的参数",
"type": "string"
},
"SamplingFilter": {
"description": "采样时,对不关注的数据进行二次过滤的条件,最多16777215个字符",
"type": "string"
},
"SettingConfig": {
"description": "具体执行采样语句前,插入执行的一些运行时参数设置语句,最长1000个字符。目前只支持MaxCompute",
"type": "string"
}
},
"type": "object"
},
"Severity": {
"description": "规则对于业务的等级(对应页面上的强弱规则),可选的枚举值:- Normal- High",
"type": "string"
},
"TemplateCode": {
"description": "规则所引用的规则模板唯一标识",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"DataSourceId": {
"description": "数据源ID"
},
"Description": {
"description": "质量监控任务描述",
"type": "string"
},
"Hooks": {
"description": "回调设置",
"items": {
"additionalProperties": false,
"properties": {
"Condition": {
"description": "Hook触发条件,当满足这个条件时,会触发hook动作。目前只能支持两种条件表达式:1. 只指定一组规则严重类型和规则校验状态,如`${severity} == \"High\" AND ${status} == \"Critical\"`,代表执行的规则中,如果有severity为High的规则校验结果是Critical,则满足条件2. 指定多组规则严重类型和规则校验状态,如`(${severity} == \"High\" AND ${status} == \"Critical\") OR (${severity} == \"Normal\" AND ${status} == \"Critical\") OR (${severity} == \"Normal\" AND ${status} == \"Error\")`,代表执行的规则中,如果有severity为High的规则校验结果是Critical、或者severity为Normal的规则校验结果是Critical、或者severity为Normal的规则校验结果是Error,则满足条件,条件表达式中severity的枚举与DataQualityRule中severity的枚举一致、status的枚举与DataQualityResult中的status一致",
"type": "string"
},
"Type": {
"description": "Hook类型,目前只支持一种:- BlockTaskInstance:阻塞调度任务继续运行,数据质量监控是由调度任务触发的,那么在数据质量监控运行完成之后,会根据Hook.Condition来判断是否阻塞调度任务继续运行",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"Name": {
"description": "质量监控任务名称",
"type": "string"
},
"Notifications": {
"additionalProperties": false,
"description": "通知订阅配置",
"properties": {
"Condition": {
"description": "通知触发条件,当满足这个条件时,会触发消息通知。目前只能支持两种条件表达式:只指定一组规则严重类型和规则校验状态,如`${severity} == \"High\" AND ${status} == \"Critical\"`,代表执行的规则中,如果有severity为High的规则校验结果是Critical,则满足条件指定多组规则严重类型和规则校验状态,如`(${severity} == \"High\" AND ${status} == \"Critical\") OR (${severity} == \"Normal\" AND ${status} == \"Critical\") OR (${severity} == \"Normal\" AND ${status} == \"Error\")`,代表执行的规则中,如果有severity为High的规则校验结果是Critical、或者severity为Normal的规则校验结果是Critical、或者severity为Normal的规则校验结果是Error,则满足条件,条件表达式中severity的枚举与DataQualityRule中severity的枚举一致、status的枚举与DataQualityResult中的status一致",
"type": "string"
},
"Notifications": {
"description": "通知设置",
"items": {
"additionalProperties": false,
"properties": {
"NotificationChannels": {
"description": "通知方式",
"items": {
"additionalProperties": false,
"properties": {
"Channels": {
"description": "通知方式",
"items": {
"type": "string"
},
"type": "array"
}
},
"type": "object"
},
"type": "array"
},
"NotificationReceivers": {
"description": "告警接收人设置",
"items": {
"additionalProperties": false,
"properties": {
"Extension": {
"description": "告警发送时的额外参数设置,json格式,支持的key如下:- atAll:发送钉钉告警时,是否需要在群里@所有人。ReceiverType为DingdingUrl时生效",
"type": "string"
},
"ReceiverType": {
"description": "告警接收人类型- WebhookUrl:自定义webhook地址- FeishuUrl:飞书告警地址- DingdingUrl:钉钉告警地址- WeixinUrl:企业微信告警地址- AliUid:阿里云用户ID",
"type": "string"
},
"ReceiverValues": {
"description": "告警接收人",
"items": {
"type": "string"
},
"type": "array"
}
},
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"ProjectId": {
"description": "DataWorks工作空间的ID"
},
"RuntimeConf": {
"description": "扩展配置,JSON格式的字符串,只对EMR类型的数据质量监控生效。- queue:执行EMR数据质量校验时,使用的yarn队列,默认为本项目配置的队列- sqlEngine:执行EMR的数据校验时,采用的SQL引擎 + HIVE_SQL + SPARK_SQL",
"type": "string"
},
"Target": {
"additionalProperties": false,
"description": "数据质量监控对象",
"properties": {
"DatabaseType": {
"description": "表所属的数据库类型-maxcompute-hologres-cdh-analyticdb_for_mysql-starrocks-emr-analyticdb_for_postgresql",
"type": "string"
},
"PartitionSpec": {
"description": "分区表的分区设置",
"type": "string"
},
"TableGuid": {
"description": "表在数据地图中的唯一ID",
"type": "string"
}
},
"required": [
"DatabaseType",
"TableGuid"
],
"type": "object"
},
"Trigger": {
"additionalProperties": false,
"description": "数据质量校验任务的触发配置",
"properties": {
"TaskIds": {
"description": "调度任务Id列表,在Type为ByScheduledTaskInstance时有效",
"type": "array"
},
"Type": {
"description": "质量监控触发类型:- ByManual:手动触发,默认值- ByScheduledTaskInstance:关联调度任务触发",
"type": "string"
}
},
"type": "object"
}
},
"required": [
"Target",
"Name"
],
"type": "object"
}