debug_pubchem.py•3.45 kB
#!/usr/bin/env python3
"""
调试PubChem API数据结构
"""
import asyncio
import aiohttp
import json
async def debug_pubchem_data():
"""调试PubChem API返回的数据结构"""
print("🔍 调试PubChem API数据结构")
print("=" * 50)
cid = 11057 # Crystal Violet
url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/{cid}/JSON"
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
if response.status == 200:
data = await response.json()
print(f"🔬 化合物CID: {cid}")
print(f"📊 数据结构分析:")
# 分析Record结构
record = data.get("Record", {})
print(f"\n📋 Record键: {list(record.keys())}")
# 分析Section结构
sections = record.get("Section", [])
print(f"\n📑 Section数量: {len(sections)}")
for i, section in enumerate(sections):
heading = section.get("TOCHeading", "Unknown")
print(f"\n{i+1}. Section: {heading}")
# 检查是否有子Section
sub_sections = section.get("Section", [])
if sub_sections:
print(f" 子Section数量: {len(sub_sections)}")
for j, sub_section in enumerate(sub_sections[:3]): # 只显示前3个
sub_heading = sub_section.get("TOCHeading", "Unknown")
print(f" {j+1}. 子Section: {sub_heading}")
# 检查Information
info_items = sub_section.get("Information", [])
if info_items:
print(f" 信息项数量: {len(info_items)}")
for k, info in enumerate(info_items[:2]): # 只显示前2个
name = info.get("Name", "Unknown")
value = info.get("StringValue", "")
print(f" {k+1}. {name}: {str(value)[:50]}...")
else:
# 直接检查Information
info_items = section.get("Information", [])
if info_items:
print(f" 信息项数量: {len(info_items)}")
for j, info in enumerate(info_items[:3]): # 只显示前3个
name = info.get("Name", "Unknown")
value = info.get("StringValue", "")
print(f" {j+1}. {name}: {str(value)[:50]}...")
# 保存完整数据到文件用于分析
with open(f"debug_cid_{cid}.json", "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
print(f"\n💾 完整数据已保存到 debug_cid_{cid}.json")
else:
print(f"❌ API请求失败: {response.status}")
text = await response.text()
print(f"错误信息: {text}")
if __name__ == "__main__":
asyncio.run(debug_pubchem_data())