"""
纯 DoWhy 因果发现工具 - 100% 使用 DoWhy 原生方法,不重复造轮子
"""
import logging
from typing import Any, Dict, List
import pandas as pd
import numpy as np
import dowhy
from mcp.server.fastmcp import FastMCP
from ..utils.data_processor import load_and_validate_data
logger = logging.getLogger("dowhy-mcp-server.discovery")
def register_discovery_tools(server: FastMCP) -> None:
"""注册所有纯 DoWhy 因果发现工具"""
@server.tool()
def pc_algorithm(
data_path: str,
variables: List[str],
significance_level: float = 0.05
) -> Dict[str, Any]:
"""
使用 DoWhy 和 causal-learn 运行 PC 算法进行因果发现
"""
try:
data = load_and_validate_data(data_path, variables)
# 尝试使用 causal-learn 的 PC 算法
try:
from causallearn.search.ConstraintBased.PC import pc
from causallearn.utils.cit import CIT
# 转换数据格式
data_matrix = data[variables].values
# 运行 PC 算法
cg = pc(data_matrix, alpha=significance_level, indep_test=CIT(data_matrix, "fisherz"))
# 提取边
edges = []
adjacency_matrix = cg.G.graph
for i in range(len(variables)):
for j in range(len(variables)):
if adjacency_matrix[i, j] == 1:
edges.append({
"from": variables[i],
"to": variables[j]
})
return {
"success": True,
"method": "PC Algorithm (causal-learn)",
"variables": variables,
"edges": edges,
"adjacency_matrix": adjacency_matrix.tolist(),
"significance_level": significance_level,
"sample_size": len(data)
}
except ImportError:
return {
"success": False,
"error": "causal-learn 库未安装。PC算法需要causal-learn库。请安装: pip install causal-learn",
"method": "PC Algorithm",
"recommendation": "安装causal-learn库以使用DoWhy支持的因果发现算法"
}
except Exception as e:
return {
"success": False,
"error": str(e),
"method": "PC Algorithm"
}
@server.tool()
def ges_algorithm(
data_path: str,
variables: List[str],
score_type: str = "bic"
) -> Dict[str, Any]:
"""
使用 DoWhy 和 causal-learn 运行 GES 算法进行因果发现
"""
try:
data = load_and_validate_data(data_path, variables)
# 尝试使用 causal-learn 的 GES 算法
try:
from causallearn.search.ScoreBased.GES import ges
# 转换数据格式
data_matrix = data[variables].values
# 运行 GES 算法
Record = ges(data_matrix, score_func=score_type)
# 提取边
edges = []
adjacency_matrix = Record['G'].graph
for i in range(len(variables)):
for j in range(len(variables)):
if adjacency_matrix[i, j] == 1:
edges.append({
"from": variables[i],
"to": variables[j]
})
return {
"success": True,
"method": "GES Algorithm (causal-learn)",
"variables": variables,
"edges": edges,
"adjacency_matrix": adjacency_matrix.tolist(),
"score_type": score_type,
"score": float(Record.get('score', 0)),
"sample_size": len(data)
}
except ImportError:
return {
"success": False,
"error": "causal-learn 库未安装。GES算法需要causal-learn库。请安装: pip install causal-learn",
"method": "GES Algorithm",
"recommendation": "安装causal-learn库以使用DoWhy支持的因果发现算法"
}
except Exception as e:
return {
"success": False,
"error": str(e),
"method": "GES Algorithm"
}
@server.tool()
def fcm_discovery(
data_path: str,
variables: List[str],
method: str = "lingam"
) -> Dict[str, Any]:
"""
使用 DoWhy 发现函数因果模型
"""
try:
data = load_and_validate_data(data_path, variables)
# 尝试使用 causal-learn 的 LiNGAM
try:
from causallearn.search.FCMBased import lingam
# 转换数据格式
data_matrix = data[variables].values
# 运行 LiNGAM(使用外部库,非自定义实现)
lingam_result = lingam.ICALiNGAM()
lingam_result.fit(data_matrix)
# 提取因果结构
adjacency_matrix = lingam_result.adjacency_matrix_
causal_order = lingam_result.causal_order_
# 构建边列表
edges = []
for i in range(len(variables)):
for j in range(len(variables)):
if adjacency_matrix[i, j] != 0:
edges.append({
"from": variables[i],
"to": variables[j],
"coefficient": float(adjacency_matrix[i, j])
})
return {
"success": True,
"method": f"LiNGAM ({method})",
"variables": variables,
"edges": edges,
"adjacency_matrix": adjacency_matrix.tolist(),
"causal_order": [variables[i] for i in causal_order],
"sample_size": len(data)
}
except ImportError:
return {
"success": False,
"error": "causal-learn 库未安装。FCM发现需要causal-learn库。请安装: pip install causal-learn",
"method": "FCM Discovery",
"recommendation": "安装causal-learn库以使用DoWhy支持的函数因果模型发现算法"
}
except Exception as e:
return {
"success": False,
"error": str(e),
"method": "FCM Discovery"
}