"""
纯 DoWhy 建模工具 - 100% 使用 DoWhy 原生方法,不重复造轮子
"""
import logging
from typing import Any, Dict, List, Optional
import pandas as pd
import numpy as np
import dowhy
from mcp.server.fastmcp import FastMCP
from ..utils.data_processor import load_and_validate_data
logger = logging.getLogger("dowhy-mcp-server.modeling")
def register_modeling_tools(server: FastMCP) -> None:
"""注册所有纯 DoWhy 建模工具"""
@server.tool()
def causal_graph_builder(
data_path: str,
variables: List[str],
treatment: str,
outcome: str,
confounders: Optional[List[str]] = None,
graph_type: str = "dag"
) -> Dict[str, Any]:
"""
使用 DoWhy 构建因果图
"""
try:
all_vars = [treatment, outcome] + (confounders or [])
all_vars = list(set(all_vars + variables)) # 去重
data = load_and_validate_data(data_path, all_vars)
# 使用 DoWhy 创建因果模型
model = dowhy.CausalModel(
data=data,
treatment=treatment,
outcome=outcome,
common_causes=confounders
)
# 构建简单的图结构
edges = []
if confounders:
for confounder in confounders:
edges.append({"from": confounder, "to": treatment})
edges.append({"from": confounder, "to": outcome})
edges.append({"from": treatment, "to": outcome})
return {
"success": True,
"method": "DoWhy Causal Graph",
"variables": all_vars,
"treatment": treatment,
"outcome": outcome,
"confounders": confounders or [],
"edges": edges,
"graph_type": graph_type,
"sample_size": int(len(data))
}
except Exception as e:
return {
"success": False,
"error": str(e),
"method": "DoWhy Causal Graph Builder"
}
@server.tool()
def structural_causal_model_builder(
data_path: str,
variables: List[str],
causal_relationships: List[Dict[str, str]]
) -> Dict[str, Any]:
"""
使用 DoWhy 构建结构因果模型
"""
try:
data = load_and_validate_data(data_path, variables)
# 从因果关系中提取治疗和结果变量
if not causal_relationships:
return {
"success": False,
"error": "需要至少一个因果关系",
"method": "DoWhy SCM Builder"
}
# 使用第一个关系作为主要因果关系
main_rel = causal_relationships[0]
treatment = main_rel["cause"]
outcome = main_rel["effect"]
# 其他变量作为混杂因子
confounders = [v for v in variables if v not in [treatment, outcome]]
# 创建 DoWhy 模型
model = dowhy.CausalModel(
data=data,
treatment=treatment,
outcome=outcome,
common_causes=confounders if confounders else None
)
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
return {
"success": True,
"method": "DoWhy Structural Causal Model",
"variables": variables,
"causal_relationships": causal_relationships,
"main_treatment": treatment,
"main_outcome": outcome,
"confounders": confounders,
"estimand": str(identified_estimand),
"sample_size": int(len(data))
}
except Exception as e:
return {
"success": False,
"error": str(e),
"method": "DoWhy SCM Builder"
}
@server.tool()
def graphical_causal_model_builder(
data_path: str,
variables: List[str],
edges: List[Dict[str, str]]
) -> Dict[str, Any]:
"""
使用 DoWhy 构建图形因果模型
"""
try:
data = load_and_validate_data(data_path, variables)
# 从边中推断治疗和结果变量
if not edges:
return {
"success": False,
"error": "需要至少一条边",
"method": "DoWhy Graphical Model Builder"
}
# 使用第一条边
first_edge = edges[0]
treatment = first_edge["from"]
outcome = first_edge["to"]
# 其他变量作为混杂因子
edge_vars = set()
for edge in edges:
edge_vars.add(edge["from"])
edge_vars.add(edge["to"])
confounders = [v for v in variables if v in edge_vars and v not in [treatment, outcome]]
# 创建 DoWhy 模型
model = dowhy.CausalModel(
data=data,
treatment=treatment,
outcome=outcome,
common_causes=confounders if confounders else None
)
return {
"success": True,
"method": "DoWhy Graphical Causal Model",
"variables": variables,
"edges": edges,
"treatment": treatment,
"outcome": outcome,
"confounders": confounders,
"sample_size": int(len(data))
}
except Exception as e:
return {
"success": False,
"error": str(e),
"method": "DoWhy Graphical Model Builder"
}
@server.tool()
def causal_mechanism_learner(
data_path: str,
target_variable: str,
parent_variables: List[str],
mechanism_type: str = "linear"
) -> Dict[str, Any]:
"""
使用 DoWhy 学习因果机制
"""
try:
all_vars = [target_variable] + parent_variables
data = load_and_validate_data(data_path, all_vars)
if not parent_variables:
return {
"success": False,
"error": "需要至少一个父变量",
"method": "DoWhy Mechanism Learner"
}
# 使用第一个父变量作为治疗变量
treatment = parent_variables[0]
confounders = parent_variables[1:] if len(parent_variables) > 1 else None
# 创建 DoWhy 模型
model = dowhy.CausalModel(
data=data,
treatment=treatment,
outcome=target_variable,
common_causes=confounders
)
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
estimate = model.estimate_effect(
identified_estimand,
method_name="backdoor.linear_regression"
)
return {
"success": True,
"method": f"DoWhy Mechanism Learning ({mechanism_type})",
"target_variable": target_variable,
"parent_variables": parent_variables,
"mechanism_type": mechanism_type,
"causal_effect": float(estimate.value),
"sample_size": int(len(data))
}
except Exception as e:
return {
"success": False,
"error": str(e),
"method": "DoWhy Mechanism Learner"
}
@server.tool()
def model_specification_builder(
treatment: str,
outcome: str,
confounders: List[str],
instruments: Optional[List[str]] = None,
mediators: Optional[List[str]] = None,
effect_modifiers: Optional[List[str]] = None
) -> Dict[str, Any]:
"""
构建完整的模型规范
"""
try:
specification = {
"treatment": treatment,
"outcome": outcome,
"confounders": confounders,
"instruments": instruments or [],
"mediators": mediators or [],
"effect_modifiers": effect_modifiers or []
}
# 验证规范
all_vars = [treatment, outcome] + confounders
if instruments:
all_vars.extend(instruments)
if mediators:
all_vars.extend(mediators)
if effect_modifiers:
all_vars.extend(effect_modifiers)
return {
"success": True,
"method": "DoWhy Model Specification",
"specification": specification,
"all_variables": list(set(all_vars)),
"identification_strategy": "backdoor" if confounders else "frontdoor" if mediators else "instrumental" if instruments else "none"
}
except Exception as e:
return {
"success": False,
"error": str(e),
"method": "DoWhy Model Specification Builder"
}
@server.tool()
def causal_graph_validator(
data_path: str,
graph_edges: List[Dict[str, str]],
validation_tests: Optional[List[str]] = None
) -> Dict[str, Any]:
"""
使用 DoWhy 验证因果图
"""
try:
# 提取变量
variables = set()
for edge in graph_edges:
variables.add(edge["from"])
variables.add(edge["to"])
variables = list(variables)
data = load_and_validate_data(data_path, variables)
# 简单验证:检查边的合理性
validation_results = {}
for edge in graph_edges:
cause = edge["from"]
effect = edge["to"]
try:
# 其他变量作为混杂因子
confounders = [v for v in variables if v not in [cause, effect]]
model = dowhy.CausalModel(
data=data,
treatment=cause,
outcome=effect,
common_causes=confounders if confounders else None
)
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
estimate = model.estimate_effect(
identified_estimand,
method_name="backdoor.linear_regression"
)
validation_results[f"{cause}->{effect}"] = {
"valid": True,
"effect_size": float(estimate.value),
"significant": abs(float(estimate.value)) > 0.1
}
except:
validation_results[f"{cause}->{effect}"] = {
"valid": False,
"effect_size": 0.0,
"significant": False
}
return {
"success": True,
"method": "DoWhy Graph Validation",
"graph_edges": graph_edges,
"validation_results": validation_results,
"sample_size": int(len(data))
}
except Exception as e:
return {
"success": False,
"error": str(e),
"method": "DoWhy Graph Validator"
}