Skip to main content
Glama

PubChem Chemical Safety MCP Server

by liueic
analyze_toxicity_structure.py5.58 kB
""" 分析PubChem Toxicity数据的详细结构 """ import json from pathlib import Path def find_toxicity_section(sections, depth=0): """递归查找Toxicity相关的Section""" toxicity_data = {} for section in sections: heading = section.get("TOCHeading", "") if "Toxicity" in heading: print("\n" + "="*80) print(f"发现Toxicity Section: {heading}") print("="*80) # 如果有Information字段,打印详细内容 if "Information" in section: print(f"\n直接包含 {len(section['Information'])} 条Information:") for i, info in enumerate(section["Information"][:5]): # 只显示前5条 print(f"\n [{i+1}] Name: {info.get('Name', 'N/A')}") print(f" Description: {info.get('Description', 'N/A')[:100]}") print(f" StringValue: {info.get('StringValue', 'N/A')[:100]}") # 检查Value字段 if "Value" in info: value = info["Value"] print(f" Value类型: {type(value)}") if isinstance(value, dict): print(f" Value keys: {list(value.keys())}") if "StringWithMarkup" in value: markup = value["StringWithMarkup"] if isinstance(markup, list) and markup: print(f" 第一个StringWithMarkup内容:") first_markup = markup[0] if isinstance(first_markup, dict): print(f" String: {first_markup.get('String', 'N/A')[:100]}") # 检查ReferenceNumber字段 if "ReferenceNumber" in info: print(f" ReferenceNumber: {info['ReferenceNumber']}") # 递归检查子Section if "Section" in section: subsections = section["Section"] print(f"\n包含 {len(subsections)} 个子Section:") for subsection in subsections: sub_heading = subsection.get("TOCHeading", "") info_count = len(subsection.get("Information", [])) subsub_count = len(subsection.get("Section", [])) print(f" - {sub_heading} (Info: {info_count}, SubSections: {subsub_count})") # 递归处理 find_toxicity_section(subsections, depth + 1) def main(): # 分析Ethanol的数据(最详细) json_file = "pubchem_raw_702_Ethanol.json" print(f"分析文件: {json_file}") with open(json_file, 'r', encoding='utf-8') as f: data = json.load(f) record = data.get("Record", {}) sections = record.get("Section", []) find_toxicity_section(sections) print("\n" + "="*80) print("现在提取一个具体的Toxicity Information示例:") print("="*80) # 找到Toxicity Section并提取详细数据 for section in sections: if section.get("TOCHeading") == "Toxicity": # 保存整个Toxicity Section到单独的文件 with open("toxicity_section_sample.json", "w", encoding="utf-8") as f: json.dump(section, f, ensure_ascii=False, indent=2) print("完整的Toxicity Section已保存到: toxicity_section_sample.json") # 获取子sections subsections = section.get("Section", []) for subsection in subsections: sub_heading = subsection.get("TOCHeading", "") # 找到Human Toxicity Values或Non-Human Toxicity Values if "Non-Human Toxicity Values" in sub_heading: print(f"\n找到 {sub_heading}:") with open("non_human_toxicity_values_sample.json", "w", encoding="utf-8") as f: json.dump(subsection, f, ensure_ascii=False, indent=2) print(f" 已保存到: non_human_toxicity_values_sample.json") # 打印前3条记录 if "Information" in subsection: print(f"\n 包含 {len(subsection['Information'])} 条记录,前3条:") for i, info in enumerate(subsection["Information"][:3]): print(f"\n 记录 {i+1}:") print(f" Name: {info.get('Name', 'N/A')}") print(f" Description: {info.get('Description', 'N/A')[:150]}") # 打印Value内容 if "Value" in info: value = info["Value"] if isinstance(value, dict) and "StringWithMarkup" in value: markup_list = value["StringWithMarkup"] if isinstance(markup_list, list): for markup in markup_list[:1]: # 只看第一个 if isinstance(markup, dict): print(f" Value String: {markup.get('String', 'N/A')[:200]}") if __name__ == "__main__": main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/liueic/PubChem-MCP-Server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server