UpdateDataQualityEvaluationTask
Modifies data quality monitoring rules by updating thresholds, sampling settings, and notification triggers to ensure accuracy and compliance in DataWorks MCP Server.
Instructions
更新数据质量监控 *此Tool有MCP Resource,请查看UpdateDataQualityEvaluationTask(MCP Resource)获取更多使用此Tool的示例详情。
Input Schema
Name | Required | Description | Default |
---|---|---|---|
DataQualityRules | No | 数据质量监控关联的数据质量规则列表 | |
DataSourceId | No | 数据源ID | |
Description | No | 质量监控任务描述 | |
Hooks | No | 回调设置 | |
Id | No | 数据质量监控ID | |
Name | No | 质量监控任务名称 | |
Notifications | No | 通知订阅配置 | |
ProjectId | No | 项目空间Id | |
RuntimeConf | No | 扩展配置,JSON格式的字符串,只对EMR类型的数据质量监控生效。- queue:执行EMR数据质量校验时,使用的yarn队列,默认为本项目配置的队列- sqlEngine:执行EMR的数据校验时,采用的SQL引擎 + HIVE_SQL + SPARK_SQL | |
Target | No | 数据质量监控对象 | |
Trigger | No | 数据质量校验任务的触发配置 |
Input Schema (JSON Schema)
{
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
"properties": {
"DataQualityRules": {
"description": "数据质量监控关联的数据质量规则列表",
"items": {
"additionalProperties": false,
"properties": {
"CheckingConfig": {
"additionalProperties": false,
"description": "样本校验设置",
"properties": {
"ReferencedSamplesFilter": {
"description": "有些类型的阈值需要查询出一些参考样本,然后对参考样本的值进行汇总得出进行比较的阈值,这里使用一个表达式来表示参考样本的查询方式",
"type": "string"
},
"Thresholds": {
"additionalProperties": false,
"description": "校验阈值设置",
"properties": {
"Critical": {
"additionalProperties": false,
"description": "严重警告的阈值设置",
"properties": {
"Expression": {
"description": "阈值表达式。波动率类型规则必须使用表达式方式表示波动阈值。如:- 波动上升大于0.01: $checkValue > 0.01 - 波动下降大于0.01:$checkValue < -0.01 - 波动率绝对值:abs($checkValue) > 0.01固定值类型规则也可以使用表达式方式配置阈值,如果同时配置,表达式优先级高于Operator和Value",
"type": "string"
},
"Operator": {
"description": "比较符-\\>-\\>=-<-<=-!=-=",
"type": "string"
},
"Value": {
"description": "阈值数值",
"type": "string"
}
},
"type": "object"
},
"Expected": {
"additionalProperties": false,
"description": "期望的阈值设置",
"properties": {
"Expression": {
"description": "阈值表达式。波动率类型规则必须使用表达式方式表示波动阈值。如:- 波动上升大于0.01: $checkValue > 0.01 - 波动下降大于0.01:$checkValue < -0.01 - 波动率绝对值:abs($checkValue) > 0.01固定值类型规则也可以使用表达式方式配置阈值,如果同时配置,表达式优先级高于Operator和Value",
"type": "string"
},
"Operator": {
"description": "比较符-\\>-\\>=-<-<=-!=-=",
"type": "string"
},
"Value": {
"description": "阈值数值",
"type": "string"
}
},
"type": "object"
},
"Warned": {
"additionalProperties": false,
"description": "普通警告的阈值设置",
"properties": {
"Expression": {
"description": "阈值表达式。波动率类型规则必须使用表达式方式表示波动阈值。如:- 波动上升大于0.01: $checkValue > 0.01 - 波动下降大于0.01:$checkValue < -0.01 - 波动率绝对值:abs($checkValue) > 0.01固定值类型规则也可以使用表达式方式配置阈值,如果同时配置,表达式优先级高于Operator和Value",
"type": "string"
},
"Operator": {
"description": "比较符-\\>-\\>=-\\<-\\<=-!=-=",
"type": "string"
},
"Value": {
"description": "阈值数值",
"type": "string"
}
},
"type": "object"
}
},
"type": "object"
},
"Type": {
"description": "阈值计算方式- Fluctation:波动范围校验- Auto:智能阈值校验- FluctationDiscreate:离散值波动范围校验- Average:均值波动范围校验- Fixed:固定值校验",
"type": "string"
}
},
"type": "object"
},
"Description": {
"description": "数据质量规则描述信息",
"type": "string"
},
"Enabled": {
"description": "数据质量规则是否启用",
"type": "boolean"
},
"ErrorHandlers": {
"description": "质量规则校验问题处理器",
"items": {
"additionalProperties": false,
"properties": {
"ErrorDataFilter": {
"description": "如果是自定义SQL规则,需要用户指定SQL来过滤问题数据",
"type": "string"
},
"Type": {
"description": "处理器类型:- SaveErrorData:保留问题数据",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"Id": {
"description": "校验规则的ID"
},
"Name": {
"description": "数据质量规则名称",
"type": "string"
},
"SamplingConfig": {
"additionalProperties": false,
"description": "样本采集时,所需的参数",
"properties": {
"Metric": {
"description": "采样的指标名称- Count:表行数- Min:字段最小值- Max:字段最大值- Avg:字段均值- DistinctCount:字段唯一值个数- DistinctPercent:字段唯一值个数与数据行数占比- DuplicatedCount:字段重复值个数- DuplicatedPercent:字段重复值个数与数据行数占比- TableSize:表大小- NullValueCount:字段为空的行数- NullValuePercent:字段为空的比例- GroupCount:按字段值聚合后每个值与对应的数据行数- CountNotIn:枚举值不匹配行数- CountDistinctNotIn:枚举值不匹配唯一值个数- UserDefinedSql:通过自定义SQL做样本采集",
"type": "string"
},
"MetricParameters": {
"description": "样本采集时,所需的参数",
"type": "string"
},
"SamplingFilter": {
"description": "采样时,对不关注的数据进行二次过滤的条件,最多16777215个字符",
"type": "string"
},
"SettingConfig": {
"description": "具体执行采样语句前,插入执行的一些运行时参数设置语句,最长1000个字符。目前只支持MaxCompute",
"type": "string"
}
},
"type": "object"
},
"Severity": {
"description": "规则对于业务的等级(对应页面上的强弱规则),可选的枚举值:- Normal- High",
"type": "string"
},
"TemplateCode": {
"description": "规则所引用的规则模板唯一标识",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"DataSourceId": {
"description": "数据源ID"
},
"Description": {
"description": "质量监控任务描述",
"type": "string"
},
"Hooks": {
"description": "回调设置",
"items": {
"additionalProperties": false,
"properties": {
"Condition": {
"description": "Hook触发条件,当满足这个条件时,会触发hook动作。目前只能支持两种条件表达式:- 只指定一组规则严重类型和规则校验状态,如`${severity} == \"High\" AND ${status} == \"Critical\"`,代表执行的规则中,如果有severity为High的规则校验结果是Critical,则满足条件。- 指定多组规则严重类型和规则校验状态,如`(${severity} == \"High\" AND ${status} == \"Critical\") OR (${severity} == \"Normal\" AND ${status} == \"Critical\") OR (${severity} == \"Normal\" AND ${status} == \"Error\")`,代表执行的规则中,如果有severity为High的规则校验结果是Critical、或者severity为Normal的规则校验结果是Critical、或者severity为Normal的规则校验结果是Error,则满足条件,条件表达式中severity的枚举与DataQualityRule中severity的枚举一致、status的枚举与DataQualityResult中的status一致",
"type": "string"
},
"Type": {
"description": "数据质量校验结束后,执行的Hook动作- BlockTaskInstance:阻塞调度任务",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"Id": {
"description": "数据质量监控ID"
},
"Name": {
"description": "质量监控任务名称",
"type": "string"
},
"Notifications": {
"additionalProperties": false,
"description": "通知订阅配置",
"properties": {
"Condition": {
"description": "通知触发条件,当满足这个条件时,会触发消息通知。目前只能支持两种条件表达式:- 只指定一组规则严重类型和规则校验状态,如`${severity} == \"High\" AND ${status} == \"Critical\"`,代表执行的规则中,如果有severity为High的规则校验结果是Critical,则满足条件- 指定多组规则严重类型和规则校验状态,如`(${severity} == \"High\" AND ${status} == \"Critical\") OR (${severity} == \"Normal\" AND ${status} == \"Critical\") OR (${severity} == \"Normal\" AND ${status} == \"Error\")`,代表执行的规则中,如果有severity为High的规则校验结果是Critical、或者severity为Normal的规则校验结果是Critical、或者severity为Normal的规则校验结果是Error,则满足条件,条件表达式中severity的枚举与DataQualityRule中severity的枚举一致、status的枚举与DataQualityResult中的status一致",
"type": "string"
},
"Notifications": {
"description": "通知设置",
"items": {
"additionalProperties": false,
"properties": {
"NotificationChannels": {
"description": "通知方式",
"items": {
"additionalProperties": false,
"properties": {
"Channels": {
"description": "通知方式",
"items": {
"type": "string"
},
"type": "array"
}
},
"type": "object"
},
"type": "array"
},
"NotificationReceivers": {
"description": "告警接收人设置",
"items": {
"additionalProperties": false,
"properties": {
"Extension": {
"description": "告警发送时的额外参数设置,json格式,支持的key如下:- atAll:发送钉钉告警时,是否需要在群里@所有人。ReceiverType为DingdingUrl时生效",
"type": "string"
},
"ReceiverType": {
"description": "告警接收人类型",
"type": "string"
},
"ReceiverValues": {
"description": "告警接收人",
"items": {
"type": "string"
},
"type": "array"
}
},
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"ProjectId": {
"description": "项目空间Id"
},
"RuntimeConf": {
"description": "扩展配置,JSON格式的字符串,只对EMR类型的数据质量监控生效。- queue:执行EMR数据质量校验时,使用的yarn队列,默认为本项目配置的队列- sqlEngine:执行EMR的数据校验时,采用的SQL引擎 + HIVE_SQL + SPARK_SQL",
"type": "string"
},
"Target": {
"additionalProperties": false,
"description": "数据质量监控对象",
"properties": {
"DatabaseType": {
"description": "表所属的数据库类型-maxcompute-hologres-cdh-analyticdb_for_mysql-starrocks-emr-analyticdb_for_postgresql",
"type": "string"
},
"PartitionSpec": {
"description": "分区表的分区设置",
"type": "string"
},
"TableGuid": {
"description": "表在数据地图中的唯一ID",
"type": "string"
}
},
"type": "object"
},
"Trigger": {
"additionalProperties": false,
"description": "数据质量校验任务的触发配置",
"properties": {
"TaskIds": {
"description": "调度任务Id列表,在Type为ByScheduledTaskInstance时有效",
"type": "array"
},
"Type": {
"description": "质量监控触发类型。- ByScheduledTaskInstance:关联调度任务触发- ByManual:手动触发",
"type": "string"
}
},
"type": "object"
}
},
"type": "object"
}