find_protein_structures_tool

Search, retrieve, and validate protein structures from the Protein Data Bank (PDB) using keywords, categories, or specific PDB IDs for bioinformatics research.

Instructions

蛋白质结构发现工具 - 搜索、示例、验证的统一入口

这是蛋白质研究的起点，帮助你发现和验证PDB结构。

Args: keywords: 搜索关键词 (如: "hemoglobin", "kinase", "DNA") category: 预设类别 ("癌症靶点", "病毒蛋白", "酶类", "抗体", "膜蛋白", "核糖体") pdb_id: 直接验证或查看特定PDB ID (如: "1A3N") max_results: 搜索结果最大数量 (默认10，最大100) ctx: FastMCP Context，用于进度反馈和日志记录

Returns: 包含PDB结构列表、验证结果、示例数据的综合响应

Examples: # 搜索血红蛋白相关结构 find_protein_structures(keywords="hemoglobin")

# 获取癌症靶点示例
find_protein_structures(category="癌症靶点")

# 验证PDB ID
find_protein_structures(pdb_id="1A3N")

Input Schema

TableJSON Schema

Name	Required	Description	Default
`keywords`	No
`category`	No
`pdb_id`	No
`max_results`	No

Implementation Reference

src/protein_mcp/tools.py:603-636 (handler)

The primary handler function for the 'find_protein_structures_tool' MCP tool. It defines the input schema via type hints and docstring, uses @mcp.tool() decorator for registration, and delegates execution to the core helper function.

async def find_protein_structures_tool(
    keywords: str | None = None,
    category: str | None = None,
    pdb_id: str | None = None,
    max_results: int = 10,
    ctx: Context | None = None,
) -> dict[str, Any]:
    """
    蛋白质结构发现工具 - 搜索、示例、验证的统一入口

    这是蛋白质研究的起点，帮助你发现和验证PDB结构。

    Args:
        keywords: 搜索关键词 (如: "hemoglobin", "kinase", "DNA")
        category: 预设类别 ("癌症靶点", "病毒蛋白", "酶类", "抗体", "膜蛋白", "核糖体")
        pdb_id: 直接验证或查看特定PDB ID (如: "1A3N")
        max_results: 搜索结果最大数量 (默认10，最大100)
        ctx: FastMCP Context，用于进度反馈和日志记录

    Returns:
        包含PDB结构列表、验证结果、示例数据的综合响应

    Examples:
        # 搜索血红蛋白相关结构
        find_protein_structures(keywords="hemoglobin")

        # 获取癌症靶点示例
        find_protein_structures(category="癌症靶点")

        # 验证PDB ID
        find_protein_structures(pdb_id="1A3N")
    """
    return await find_protein_structures(keywords, category, pdb_id, max_results, ctx)

src/protein_mcp/tools.py:90-269 (helper)

Core helper function implementing the tool logic: PDB ID validation, category-based examples, RCSB search, and default popular examples. Handles all execution paths and error formatting.

async def find_protein_structures(
    keywords: str | None = None,
    category: str | None = None,
    pdb_id: str | None = None,
    max_results: int = 10,
    ctx: Context | None = None,
) -> dict[str, Any]:
    """
    蛋白质结构发现工具 - 搜索、示例、验证的统一入口

    这是蛋白质研究的起点，帮助你发现和验证PDB结构。

    Args:
        keywords: 搜索关键词 (如: "hemoglobin", "kinase", "DNA")
        category: 预设类别 ("癌症靶点", "病毒蛋白", "酶类", "抗体", "膜蛋白", "核糖体")
        pdb_id: 直接验证或查看特定PDB ID (如: "1A3N")
        max_results: 搜索结果最大数量 (默认10，最大100)
        ctx: FastMCP Context，用于进度反馈和日志记录

    Returns:
        包含PDB结构列表、验证结果、示例数据的综合响应
    """
    try:
        if ctx:
            await ctx.info(f"🔍 开始蛋白质结构搜索...")
        # 限制max_results范围
        max_results = min(max(max_results, 1), 100)

        # 模式1: 验证特定PDB ID
        if pdb_id:
            if ctx:
                await ctx.info(f"📋 验证 PDB ID: {pdb_id}")

            if not validate_pdb_id(pdb_id):
                if ctx:
                    await ctx.error(f"❌ 无效的PDB ID格式: {pdb_id}")
                return format_error_response(
                    "无效的PDB ID格式",
                    f"期望格式: 4位字符 (首位数字，后三位可数字可字母)，实际: {pdb_id}",
                )

            if _validate_pdb_exists(pdb_id):
                if ctx:
                    await ctx.info(f"🔍 获取蛋白质详细信息...")
                entry_info = _get_entry_info(pdb_id)
                title = "未知标题"
                if entry_info and "struct" in entry_info:
                    title = entry_info["struct"].get("title", "未知标题")

                if ctx:
                    await ctx.info(f"✅ PDB ID {pdb_id} 验证成功")
                return format_success_response(
                    {
                        "mode": "validation",
                        "pdb_id": pdb_id,
                        "exists": True,
                        "title": title,
                        "validation_result": "PDB ID有效且存在于RCSB数据库中",
                    },
                    f"PDB ID {pdb_id} 验证成功，结构存在于RCSB数据库",
                )
            else:
                if ctx:
                    await ctx.error(f"❌ PDB ID {pdb_id} 不存在")
                return format_error_response(
                    "PDB ID不存在", f"PDB ID {pdb_id} 在RCSB数据库中未找到"
                )

        # 模式2: 分类示例
        elif category:
            if ctx:
                await ctx.info(f"📚 获取 {category} 类别的蛋白质示例...")

            # 预定义的分类示例数据库
            category_examples = {
                "癌症靶点": {
                    "EGFR": ["1M14", "4MNF"],
                    "KRAS": ["4OBE", "6OIM"],
                    "p53": ["1TUP", "2OCJ"],
                    "BCR-ABL": ["1IEP", "1XBB"],
                },
                "病毒蛋白": {
                    "SARS-CoV-2刺突蛋白": ["6VSB", "6Y2E"],
                    "HIV蛋白酶": ["1HHP", "3PHV"],
                    "流感病毒血凝素": ["1RU7", "4WE8"],
                },
                "酶类": {
                    "溶菌酶": ["1HEW", "1LZ1"],
                    "DNA聚合酶": ["1KLN", "3K0M"],
                    "激酶": ["1ATP", "2SRC"],
                },
                "抗体": {"单克隆抗体": ["1HZH", "1IGT"], "抗体片段": ["1FVC", "3HFM"]},
                "膜蛋白": {"G蛋白偶联受体": ["1U19", "3DQB"], "离子通道": ["1BL8", "2A9W"]},
                "核糖体": {"核糖体亚基": ["1FJG", "2VQE"], "核糖体相关因子": ["1EF1", "2AW4"]},
            }

            if category not in category_examples:
                available_categories = list(category_examples.keys())
                return format_error_response(
                    "不支持的类别", f"可用类别: {', '.join(available_categories)}"
                )

            examples = category_examples[category]
            if ctx:
                await ctx.info(f"✅ 找到 {len(examples)} 个 {category} 示例")
            return format_success_response(
                {
                    "mode": "category_examples",
                    "category": category,
                    "examples": examples,
                    "total_proteins": len(examples),
                },
                f"获取 {category} 类别的蛋白质结构示例",
            )

        # 模式3: 关键词搜索
        elif keywords:
            if ctx:
                await ctx.info(f"🔍 搜索关键词: {keywords}")

            results = _search_rcsb_structures(keywords, max_results)
            if results:
                if ctx:
                    await ctx.info(f"✅ 找到 {len(results)} 个匹配的结构")
                return format_success_response(
                    {
                        "mode": "search",
                        "keywords": keywords,
                        "results": results,
                        "total_found": len(results),
                    },
                    f"找到 {len(results)} 个匹配的结构",
                )
            else:
                if ctx:
                    await ctx.error(f"❌ 使用关键词 '{keywords}' 未找到匹配结果")
                return format_error_response(
                    "未找到匹配结果", f"使用关键词 '{keywords}' 未找到匹配的PDB结构"
                )

        # 模式4: 默认 - 返回热门示例
        else:
            if ctx:
                await ctx.info("📋 提供热门蛋白质结构示例...")

            popular_examples = {
                "经典蛋白质": {
                    "血红蛋白": ["1A3N", "2HHB"],
                    "胰岛素": ["1ZNJ", "4INS"],
                    "溶菌酶": ["1HEW", "1LZ1"],
                    "DNA聚合酶": ["1KLN", "3K0M"],
                },
                "热门研究领域": {
                    "癌症研究": ["1M14", "4MNF", "1TUP"],
                    "病毒研究": ["6VSB", "6Y2E", "1HHP"],
                    "神经科学": ["2BEG", "5O3L", "1XQ8"],
                },
            }

            if ctx:
                await ctx.info("✅ 返回热门示例和使用指南")
            return format_success_response(
                {
                    "mode": "default_examples",
                    "popular_examples": popular_examples,
                    "total_categories": len(popular_examples),
                    "usage_tips": {
                        "search": "使用 keywords 参数搜索特定蛋白质",
                        "category": "使用 category 参数获取分类示例",
                        "validate": "使用 pdb_id 参数验证特定结构",
                    },
                },
                "提供热门蛋白质结构示例和使用指南",
            )

    except Exception as e:
        if ctx:
            await ctx.error(f"❌ 搜索失败: {str(e)}")
        return format_error_response("工具执行错误", f"find_protein_structures 执行失败: {str(e)}")

src/protein_mcp/tools.py:588-715 (registration)

The register_all_tools function defines and registers the find_protein_structures_tool (and others) using the @mcp.tool() decorator on the handler function.

def register_all_tools(mcp) -> None:
    """
    注册3个核心整合工具到FastMCP服务器

    优化后的工具设计：
    1. find_protein_structures - 蛋白质结构发现工具
    2. get_protein_data - 蛋白质综合数据工具
    3. download_structure - 结构文件工具

    Args:
        mcp: FastMCP服务器实例
    """

    # 工具1: 蛋白质结构发现工具 - 整合搜索、示例、验证功能
    @mcp.tool()
    async def find_protein_structures_tool(
        keywords: str | None = None,
        category: str | None = None,
        pdb_id: str | None = None,
        max_results: int = 10,
        ctx: Context | None = None,
    ) -> dict[str, Any]:
        """
        蛋白质结构发现工具 - 搜索、示例、验证的统一入口

        这是蛋白质研究的起点，帮助你发现和验证PDB结构。

        Args:
            keywords: 搜索关键词 (如: "hemoglobin", "kinase", "DNA")
            category: 预设类别 ("癌症靶点", "病毒蛋白", "酶类", "抗体", "膜蛋白", "核糖体")
            pdb_id: 直接验证或查看特定PDB ID (如: "1A3N")
            max_results: 搜索结果最大数量 (默认10，最大100)
            ctx: FastMCP Context，用于进度反馈和日志记录

        Returns:
            包含PDB结构列表、验证结果、示例数据的综合响应

        Examples:
            # 搜索血红蛋白相关结构
            find_protein_structures(keywords="hemoglobin")

            # 获取癌症靶点示例
            find_protein_structures(category="癌症靶点")

            # 验证PDB ID
            find_protein_structures(pdb_id="1A3N")
        """
        return await find_protein_structures(keywords, category, pdb_id, max_results, ctx)

    # 工具2: 蛋白质综合数据工具 - 一次获取所有蛋白质信息
    @mcp.tool()
    async def get_protein_data_tool(
        pdb_id: str,
        data_types: list[str] | None = None,
        chain_id: str | None = None,
        ctx: Context | None = None,
    ) -> dict[str, Any]:
        """
        蛋白质综合数据工具 - 获取完整蛋白质信息包

        这个工具是蛋白质数据获取的核心，一次性获取你需要的所有信息。

        Args:
            pdb_id: PDB ID (例如: "5G53")
            data_types: 需要的数据类型列表
                - "basic": 基本信息 (标题、方法、分辨率等)
                - "sequence": 氨基酸序列信息
                - "structure": 二级结构分析
                - "all": 获取所有数据
            chain_id: 特定链ID (例如: "A"，可选)
            ctx: FastMCP Context，用于进度反馈和日志记录

        Returns:
            完整的蛋白质数据包，包含请求的所有数据类型

        Examples:
            # 获取所有数据
            get_protein_data("5G53", ["all"])

            # 只获取基本信息和序列
            get_protein_data("1A3N", ["basic", "sequence"])

            # 获取特定链的数据
            get_protein_data("2HHB", ["all"], "A")
        """
        # 如果没有指定数据类型，默认获取基本数据
        if data_types is None:
            data_types = ["basic", "sequence", "structure"]
        return await get_protein_data(pdb_id, data_types, chain_id, ctx)

    # 工具3: 结构文件工具 - 下载和管理蛋白质结构文件
    @mcp.tool()
    async def download_structure_tool(
        pdb_id: str,
        file_format: str = "pdb",
        save_local: bool = False,
        ctx: Context | None = None,
    ) -> dict[str, Any]:
        """
        结构文件工具 - 下载和管理蛋白质结构文件

        这个工具处理所有文件相关的操作，从下载到格式说明。

        Args:
            pdb_id: PDB ID (例如: "5G53")
            file_format: 文件格式
                - "pdb": 标准PDB格式 (推荐，人类可读)
                - "mmcif": 大分子晶体信息文件格式 (现代标准)
                - "cif": 晶体信息文件格式
                - "mmtf": 大分子传输格式 (二进制，速度快)
            save_local: 是否保存到本地文件 (默认False返回内容)
            ctx: FastMCP Context，用于进度反馈和日志记录

        Returns:
            文件内容或下载信息 + 格式说明和使用指南

        Examples:
            # 获取PDB文件内容
            download_structure("1A3N")

            # 下载mmCIF格式并保存到本地
            download_structure("2HHB", "mmcif", True)

            # 获取快速MMTF格式
            download_structure("6VSB", "mmtf")
        """
        return await download_structure(pdb_id, file_format, save_local, ctx)

src/protein_mcp/server.py:17-20 (registration)
Invocation of register_all_tools(mcp) in the create_server function, which performs the actual tool registrations.
```
# 注册所有工具
register_all_tools(mcp)

return mcp
```

Protein MCP Server