genome-mcp

Overview Schema Related Servers Score Discussions

tools.py•23.7 KiB

#!/usr/bin/env python3 """ MCP工具模块 - 实现所有MCP工具接口包含主要的基因组数据查询和分析工具 """ import asyncio from typing import Any from fastmcp import FastMCP from .errors import ValidationError, format_simple_error from .evolution_tools import analyze_gene_evolution as _analyze_gene_evolution_internal from .evolution_tools import ( build_phylogenetic_profile as _build_phylogenetic_profile_internal, ) from .query_executor import QueryExecutor from .query_parser import QueryParser from .types import ( AdvancedQueryResult, EvolutionResult, KEGGResult, PhylogeneticProfileResult, SearchResult, ToolResult, ) from .validation import ( validate_common_params, validate_gene_params, validate_kegg_params, validate_search_params, ) # 全局查询执行器实例 _query_executor = QueryExecutor() def _format_simple_result(result: ToolResult) -> ToolResult: """格式化简单结果""" if "error" in result: return result # 处理批量查询结果 if "batch_size" in result and "results" in result: successful_count = len( [r for r in result["results"].values() if "error" not in r] ) filtered_results = { k: v for k, v in result["results"].items() if "error" not in v } return { "batch_size": result["batch_size"], "successful_count": successful_count, "results": filtered_results, } # 根据查询类型简化结果 if result.get("source") == "cache": return {"gene_id": result["gene_id"], "data": result["data"]} if result.get("source") == "ncbi": gene_data = result.get("data", {}) summary = gene_data.get("summary", "") # 截断长摘要 if len(summary) > 200: summary = summary[:200] + "..." return { "gene_id": result.get("gene_id"), "name": gene_data.get("name", ""), "description": gene_data.get("description", ""), "chromosome": gene_data.get("chromosome", ""), "summary": summary, } if result.get("source") == "integrated": return { "gene_query": result.get("gene_query"), "gene_found": result.get("integration_info", {}).get("gene_found", False), "protein_count": result.get("integration_info", {}).get("protein_count", 0), } return result def _apply_filters(query: str, filters: dict[str, Any] | None = None) -> str: """应用搜索过滤器""" if not filters: return query filter_parts = [] # 物种过滤 if "species" in filters: species = filters["species"].lower() if species != "human": filter_parts.append(f"{species}[organism]") # 基因类型过滤 if "gene_type" in filters: gene_type = filters["gene_type"] if gene_type == "protein_coding": filter_parts.append("protein_coding[Properties]") # 合并过滤器 if filter_parts: return f"{query} AND {' AND '.join(filter_parts)}" return query def create_mcp_resources(mcp: FastMCP) -> None: """创建并注册MCP资源""" @mcp.resource("genome://status/databases") async def database_status() -> str: """获取数据库状态信息 Args: 无参数 Returns: str: 包含数据库状态的JSON字符串 """ import json status = { "ncbi_gene": { "name": "NCBI Gene", "status": "operational", "description": "Comprehensive gene information database", "last_checked": "2025-10-28", }, "uniprot": { "name": "UniProt", "status": "operational", "description": "Protein sequence and functional information", "last_checked": "2025-10-28", }, "ensembl": { "name": "Ensembl", "status": "operational", "description": "Vertebrate genomics and homology data", "last_checked": "2025-10-28", }, "kegg": { "name": "KEGG", "status": "operational", "description": "Pathway and metabolic network analysis", "last_checked": "2025-10-28", }, } return json.dumps(status, indent=2, ensure_ascii=False) @mcp.resource("genome://help/id-formats") async def id_formats() -> str: """获取支持的ID格式说明 Args: 无参数 Returns: str: 包含ID格式说明的JSON字符串 """ import json formats = { "gene_identifiers": { "gene_symbol": { "format": "TP53, BRCA1", "description": "Standard gene symbols (case-sensitive)", "examples": ["TP53", "BRCA1", "EGFR"], }, "entrez_id": { "format": "7157, 672", "description": "NCBI Entrez Gene ID", "examples": ["7157", "672", "1956"], }, "ensembl_id": { "format": "ENSG00000141510", "description": "Ensembl Gene ID", "examples": ["ENSG00000141510", "ENSG00000012048"], }, }, "protein_identifiers": { "uniprot_accession": { "format": "P04637, P38398", "description": "UniProt accession number", "examples": ["P04637", "P38398", "P00533"], }, "uniprot_id": { "format": "P53_HUMAN, EGFR_HUMAN", "description": "UniProt identifier", "examples": ["P53_HUMAN", "EGFR_HUMAN"], }, }, "species_codes": { "common_names": [ "human", "mouse", "rat", "zebrafish", "fruitfly", "worm", ], "taxid_codes": ["9606", "10090", "10116", "7955", "7227", "6239"], "kegg_codes": ["hsa", "mmu", "rno", "dre", "dme", "cel"], }, } return json.dumps(formats, indent=2, ensure_ascii=False) @mcp.resource("genome://help/query-examples") async def query_examples() -> str: """获取查询示例 Args: 无参数 Returns: str: 包含查询示例的JSON字符串 """ import json examples = { "basic_gene_queries": ["TP53", "BRCA1", "EGFR"], "protein_queries": ["P04637", "P38398", "TP53_HUMAN"], "functional_searches": [ "tumor suppressor", "protein kinase", "DNA repair", "cell cycle", ], "genomic_regions": [ "chr17:7565097-7590856", "chr13:32315082-32400266", "chrX:153694058-153697843", ], "batch_queries": ["TP53, BRCA1, BRCA2", "P04637, P38398, P00533"], "complex_queries": [ "breast cancer genes on chromosome 17", "TP53 protein interactions", "DNA repair pathways", ], } return json.dumps(examples, indent=2, ensure_ascii=False) def create_mcp_tools(mcp: FastMCP) -> None: """创建并注册所有MCP工具""" @mcp.tool() async def get_data( query: str | list[str], query_type: str = "auto", data_type: str = "gene", format: str = "simple", species: str = "human", max_results: int = 20, ) -> ToolResult: """ 智能数据获取接口 - 统一处理所有查询类型自动识别查询类型： - "TP53" → 基因信息查询 - "P04637" → 蛋白质详细信息查询 - "cancer" → 基因搜索 - "protein kinase" → 蛋白质功能搜索 - "chr17:7565097-7590856" → 区域搜索 - "TP53, BRCA1" → 批量基因信息 - "breast cancer genes" → 智能搜索 - "TP53 homologs" → 同源基因查询 - "evolutionary conservation" → 进化分析查询 Args: query: 查询内容（可以是基因ID、蛋白质ID、搜索词、区域、ID列表、进化相关查询） query_type: 查询类型（auto/info/search/region/protein/gene_protein/ortholog/evolution） data_type: 数据类型（gene/protein/gene_protein/ortholog/evolution） format: 返回格式（simple/detailed/raw） species: 物种（默认：human，支持9606/human/mouse/rat等） max_results: 最大结果数（默认：20） Returns: 查询结果字典，包含基因和/或蛋白质信息 Examples: # 基因信息查询 get_data("TP53") get_data("TP53", format="detailed") # 批量查询 get_data(["TP53", "BRCA1", "BRCA2"]) # 区域搜索 get_data("chr17:7565097-7590856") # 蛋白质查询 get_data("P04637", data_type="protein") # 基因-蛋白质整合查询 get_data("TP53", data_type="gene_protein") # 蛋白质功能搜索 get_data("tumor suppressor", data_type="protein") """ try: # 验证通用参数 validated_max_results, validated_species, validated_query_type = ( validate_common_params( max_results=max_results, species=species, query_type=query_type ) ) # 根据data_type参数调整查询类型 if data_type == "protein" and validated_query_type == "auto": validated_query_type = "protein" elif data_type == "gene_protein" and validated_query_type == "auto": validated_query_type = "gene_protein" elif data_type == "ortholog" and validated_query_type == "auto": validated_query_type = "ortholog" elif data_type == "evolution" and validated_query_type == "auto": validated_query_type = "evolution" elif data_type == "gene" and validated_query_type == "auto": validated_query_type = "auto" # 保持原有的自动识别 # 解析查询意图 parsed = QueryParser.parse(query, validated_query_type) # 使用验证后的物种信息 if "organism" not in parsed.params: parsed.params["organism"] = validated_species # 执行查询 result = await _query_executor.execute( parsed, max_results=validated_max_results ) # 格式化结果 if format == "simple": return _format_simple_result(result) elif format == "detailed": return result else: return result except ValidationError as e: return format_simple_error(e, query=query, operation="get_data") except Exception as e: return format_simple_error(e, query=query, operation="get_data") @mcp.tool() async def advanced_query( queries: list[dict[str, Any]], strategy: str = "parallel", delay: float = 0.34, # NCBI API频率限制 ) -> AdvancedQueryResult: """ 高级批量查询 - 支持复杂查询策略 Args: queries: 查询列表，每个元素包含 {"query": str, "type": str} strategy: 执行策略（parallel/sequential） delay: 查询间隔（秒） Returns: 批量查询结果 Examples: advanced_query([ {"query": "TP53", "type": "info"}, {"query": "BRCA1", "type": "info"}, {"query": "cancer", "type": "search"} ]) """ results = {} async def execute_single_query(index: int, query_dict: dict[str, Any]): try: parsed = QueryParser.parse_by_type( query_dict["query"], query_dict.get("type", "auto") ) result = await _query_executor.execute(parsed, **query_dict) results[index] = result await asyncio.sleep(delay) # 遵守频率限制 except ValidationError as e: results[index] = format_simple_error( e, query=query_dict.get("query", ""), operation="advanced_query" ) except Exception as e: results[index] = format_simple_error( e, query=query_dict.get("query", ""), operation="advanced_query" ) if strategy == "parallel": # 并发查询 await asyncio.gather( *[execute_single_query(i, q) for i, q in enumerate(queries)] ) else: # 顺序查询（适用于依赖查询） for i, query_dict in enumerate(queries): try: parsed = QueryParser.parse_by_type( query_dict["query"], query_dict.get("type", "auto") ) result = await _query_executor.execute(parsed, **query_dict) results[i] = result await asyncio.sleep(delay) # 遵守频率限制 except ValidationError as e: results[i] = format_simple_error( e, query=query_dict.get("query", ""), operation="advanced_query" ) except Exception as e: results[i] = format_simple_error( e, query=query_dict.get("query", ""), operation="advanced_query" ) return { "strategy": strategy, "total_queries": len(queries), "successful": len([r for r in results.values() if "error" not in r]), "results": results, } @mcp.tool() async def smart_search( description: str, context: str = "genomics", filters: dict[str, Any] = None, max_results: int = 20, ) -> SearchResult: """ 智能语义搜索 - 理解自然语言描述并执行相应查询语义理解示例： - "breast cancer genes on chromosome 17" → 查找17号染色体上的乳腺癌基因 - "TP53 protein interactions" → 查找TP53蛋白相互作用 - "tumor suppressor genes" → 查找肿瘤抑制基因 - "genes related to DNA repair" → 查找DNA修复相关基因 Args: description: 自然语言描述 context: 搜索上下文（genomics/proteomics/pathway） filters: 过滤条件 max_results: 最大结果数 Returns: 智能搜索结果 Examples: smart_search("breast cancer genes on chromosome 17") smart_search("TP53 protein interactions", context="proteomics") smart_search("DNA repair genes", filters={"species": "human"}) """ try: # 验证搜索参数 validated_description, validated_context, validated_max_results = ( validate_search_params( description=description, context=context, max_results=max_results ) ) # 智能解析查询意图 query = _apply_filters(validated_description, filters) # 根据上下文调整查询 if validated_context == "proteomics": query_type = "protein" elif validated_context == "pathway": query_type = "search" else: query_type = "auto" # 解析查询意图 parsed = QueryParser.parse(query, query_type) # 执行查询（直接使用查询执行器，避免MCP工具间调用） result = await _query_executor.execute( parsed, max_results=validated_max_results ) # 添加智能解析信息 result["smart_search_info"] = { "description": validated_description, "context": validated_context, "parsed_query": query, "filters_applied": filters is not None, } return result except ValidationError as e: return format_simple_error(e, query=description, operation="smart_search") except Exception as e: return format_simple_error(e, query=description, operation="smart_search") @mcp.tool() async def analyze_gene_evolution( gene_symbol: str, target_species: list[str] = None, analysis_level: str = "Eukaryota", include_sequence_info: bool = True, ) -> EvolutionResult: """ 基因进化分析工具 - MCP接口包装 Args: gene_symbol: 基因符号（如 TP53, BRCA1） target_species: 目标物种列表（如 ["mouse", "rat", "zebrafish"]） analysis_level: 分析层级（如 Eukaryota, Metazoa, Vertebrata） include_sequence_info: 是否包含序列信息 Returns: 进化分析结果 Examples: # 分析 TP53 在哺乳动物中的进化 analyze_gene_evolution("TP53", ["human", "mouse", "rat", "dog"]) """ try: # 验证基因分析参数 ( validated_gene_symbol, validated_target_species, validated_analysis_level, ) = validate_gene_params( gene_symbol=gene_symbol, target_species=target_species, analysis_level=analysis_level, ) return await _analyze_gene_evolution_internal( validated_gene_symbol, validated_target_species, validated_analysis_level, include_sequence_info, _query_executor, ) except ValidationError as e: return format_simple_error( e, query=gene_symbol, operation="analyze_gene_evolution" ) except Exception as e: return format_simple_error( e, query=gene_symbol, operation="analyze_gene_evolution" ) @mcp.tool() async def build_phylogenetic_profile( gene_symbols: list[str], species_set: list[str] = None, include_domain_info: bool = True, ) -> PhylogeneticProfileResult: """ 系统发育图谱构建工具 - MCP接口包装 Args: gene_symbols: 基因符号列表 species_set: 物种集合（默认包含常用模式生物） include_domain_info: 是否包含结构域信息 Returns: 系统发育图谱数据 Examples: # 分析p53家族在脊椎动物中的分布 build_phylogenetic_profile(["TP53", "TP63", "TP73"], ["human", "mouse", "zebrafish"]) """ try: return await _build_phylogenetic_profile_internal( gene_symbols, species_set, include_domain_info, _query_executor ) except ValidationError as e: return format_simple_error( e, query=str(gene_symbols), operation="build_phylogenetic_profile" ) except Exception as e: return format_simple_error( e, query=str(gene_symbols), operation="build_phylogenetic_profile" ) @mcp.tool() async def kegg_pathway_enrichment( gene_list: list[str], organism: str = "hsa", pvalue_threshold: float = 0.05, min_gene_count: int = 2, ) -> KEGGResult: """ KEGG通路富集分析工具 - MVP版本分析基因列表在KEGG通路中的富集情况，识别显著相关的生物学通路 Args: gene_list: 基因列表（如 ["TP53", "BRCA1", "BRCA2"]） organism: 生物体代码（默认 "hsa" 人类） pvalue_threshold: p值显著性阈值（默认 0.05） min_gene_count: 通路中最小基因数量（默认 2） Returns: 通路富集分析结果，包含： - 显著富集的通路列表 - p值和FDR校正后的统计显著性 - 富集倍数和基因数量信息 - 分析参数和元数据 Examples: # 分析癌症相关基因的通路富集 kegg_pathway_enrichment(["TP53", "BRCA1", "BRCA2", "EGFR"]) # 分析小鼠基因的通路富集 kegg_pathway_enrichment(["Trp53", "Brca1"], organism="mmu") # 使用更严格的显著性阈值 kegg_pathway_enrichment(["TP53", "BRCA1"], pvalue_threshold=0.01) """ try: # 验证KEGG分析参数 ( validated_gene_list, validated_organism, validated_pvalue_threshold, validated_min_gene_count, ) = validate_kegg_params( gene_list=gene_list, organism=organism, pvalue_threshold=pvalue_threshold, min_gene_count=min_gene_count, ) # 使用QueryParser解析为通路富集查询 parsed = QueryParser.parse( validated_gene_list, query_type="pathway_enrichment" ) # 更新参数 parsed.params.update( { "gene_list": validated_gene_list, "organism": validated_organism, "pvalue_threshold": validated_pvalue_threshold, "min_gene_count": validated_min_gene_count, } ) # 执行查询 result = await _query_executor.execute(parsed) # 格式化结果 if "result" in result: enrichment_data = result["result"] # 添加查询信息 enrichment_data["query_info"] = { "gene_list": validated_gene_list, "analysis_date": "2025-10-24", "organism": validated_organism, "method": "KEGG Pathway Enrichment", "parameters": { "pvalue_threshold": validated_pvalue_threshold, "min_gene_count": validated_min_gene_count, }, } return enrichment_data elif "error" in result: return { "error": result["error"], "query_genes": gene_list, "organism": organism, "suggestions": [ "检查基因ID格式是否正确", "确认生物体代码是否支持", "验证网络连接是否正常", ], } else: return { "error": "Unknown error occurred during pathway enrichment analysis", "query_genes": gene_list, "organism": organism, } except ValidationError as e: return format_simple_error( e, query=str(gene_list), operation="kegg_pathway_enrichment" ) except Exception as e: return format_simple_error( e, query=str(gene_list), operation="kegg_pathway_enrichment" )

Loading blob content...

Implementation Reference

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/gqy20/genome-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

tools.py•23.7 KiB