Skip to main content
Glama
gqy20

Europe PMC Literature Search MCP Server

test_six_tools_integration.py26.4 kB
#!/usr/bin/env python3 """ 6工具架构集成测试 测试完整的端到端工作流程 """ import asyncio import json import sys import tempfile from pathlib import Path from unittest.mock import Mock from unittest.mock import patch # 添加src目录到Python路径 project_root = Path(__file__).parent.parent src_path = project_root / "src" if str(src_path) not in sys.path: sys.path.insert(0, str(src_path)) import pytest # noqa: E402 from article_mcp.cli import create_mcp_server # noqa: E402 from tests.utils.test_helpers import TestTimer # noqa: E402 class TestEndToEndWorkflow: """端到端工作流程测试""" @pytest.mark.integration @pytest.mark.slow def test_complete_literature_search_workflow(self): """测试完整的文献搜索工作流程""" with TestTimer() as timer: # 1. 创建MCP服务器 with patch.multiple( "article_mcp.cli", create_europe_pmc_service=Mock(return_value=Mock()), create_pubmed_service=Mock(return_value=Mock()), CrossRefService=Mock(), OpenAlexService=Mock(), create_reference_service=Mock(return_value=Mock()), create_literature_relation_service=Mock(return_value=Mock()), create_arxiv_service=Mock(return_value=Mock()), ): mcp = create_mcp_server() # 验证服务器创建 assert mcp is not None # 验证创建时间 assert timer.stop() < 5.0 @pytest.mark.integration def test_search_to_export_workflow(self): """测试从搜索到导出的完整工作流程""" # 模拟完整的搜索结果 search_results = { "success": True, "keyword": "machine learning healthcare", "sources_used": ["europe_pmc", "pubmed"], "merged_results": [ { "title": "Machine Learning in Healthcare: A Systematic Review", "authors": ["AI Researcher", "Healthcare Specialist"], "doi": "10.1234/ml.healthcare.2023", "journal": "Journal of Medical AI", "publication_date": "2023-06-15", "abstract": "This systematic review examines the applications of machine learning in healthcare...", "pmid": "37891234", "source": "europe_pmc", }, { "title": "Deep Learning for Medical Image Analysis", "authors": ["Computer Vision Expert", "Radiologist"], "doi": "10.5678/dl.medical.2023", "journal": "Medical Imaging Today", "publication_date": "2023-04-20", "abstract": "Deep learning techniques have revolutionized medical image analysis...", "pmid": "37654321", "source": "pubmed", }, ], "total_count": 2, "search_time": 1.23, } # 2. 获取文章详情(使用第一个结果) first_article = search_results["merged_results"][0] article_details = { "success": True, "identifier": first_article["doi"], "id_type": "doi", "article": { **first_article, "full_text_available": True, "open_access": True, "quality_metrics": { "impact_factor": 8.5, "quartile": "Q1", "jci": 2.1, }, }, } # 3. 获取参考文献 references = { "success": True, "identifier": first_article["doi"], "id_type": "doi", "references_by_source": { "europe_pmc": [ { "title": "Foundations of Machine Learning", "authors": ["ML Pioneer"], "doi": "10.1111/foundations.2020", "journal": "Machine Learning Journal", "publication_date": "2020-01-01", } ] }, "merged_references": [ { "title": "Foundations of Machine Learning", "authors": ["ML Pioneer"], "doi": "10.1111/foundations.2020", "journal": "Machine Learning Journal", "publication_date": "2020-01-01", "source": "europe_pmc", } ], "total_count": 15, "processing_time": 0.89, } # 4. 获取期刊质量信息 journal_quality = { "success": True, "journal_name": first_article["journal"], "quality_metrics": { "impact_factor": 8.5, "quartile": "Q1", "jci": 2.1, "分区": "中科院一区", "citescore": 12.3, }, "ranking_info": { "field": "Medicine", "rank": 25, "total_journals": 150, "percentile": 83.3, }, } # 5. 导出结果 with tempfile.TemporaryDirectory() as temp_dir: export_path = Path(temp_dir) / "literature_search_results.json" # 模拟导出功能 export_data = { "search_results": search_results, "article_details": article_details, "references": references, "journal_quality": journal_quality, "export_metadata": { "export_time": "2023-12-07 10:30:00", "total_records": len(search_results["merged_results"]), "workflow": "search_to_export", }, } # 写入文件 with open(export_path, "w", encoding="utf-8") as f: json.dump(export_data, f, ensure_ascii=False, indent=2) # 验证导出结果 assert export_path.exists() with open(export_path, encoding="utf-8") as f: loaded_data = json.load(f) assert "search_results" in loaded_data assert "article_details" in loaded_data assert "references" in loaded_data assert "journal_quality" in loaded_data assert loaded_data["search_results"]["total_count"] == 2 @pytest.mark.integration def test_literature_relation_analysis_workflow(self): """测试文献关系分析工作流程""" # 1. 输入多篇文献 seed_articles = [ {"identifier": "10.1234/seed1.2023", "title": "Seed Article 1"}, {"identifier": "10.5678/seed2.2023", "title": "Seed Article 2"}, ] # 2. 获取文献关系 relations = { "success": True, "identifiers": [article["identifier"] for article in seed_articles], "relations": { "references": [ { "title": "Common Reference Article", "doi": "10.9999/common.ref.2020", "authors": ["Common Author"], "journal": "Prestigious Journal", "publication_date": "2020-03-15", } ], "similar": [ { "title": "Similar Research Article", "doi": "10.8888/similar.2023", "authors": ["Similar Researcher"], "journal": "Related Journal", "publication_date": "2023-02-28", } ], "citing": [ { "title": "Citing Article 2023", "doi": "10.7777/citing.2023", "authors": ["Citing Researcher"], "journal": "Modern Journal", "publication_date": "2023-11-01", } ], }, "statistics": { "references_count": 1, "similar_count": 1, "citing_count": 1, "total_relations": 3, }, } # 3. 构建网络分析 network_analysis = { "success": True, "network_data": { "nodes": [ {"id": "10.1234/seed1.2023", "type": "seed", "label": "Seed Article 1"}, {"id": "10.5678/seed2.2023", "type": "seed", "label": "Seed Article 2"}, { "id": "10.9999/common.ref.2020", "type": "reference", "label": "Common Reference", }, {"id": "10.8888/similar.2023", "type": "similar", "label": "Similar Research"}, {"id": "10.7777/citing.2023", "type": "citing", "label": "Citing Article"}, ], "edges": [ {"source": 0, "target": 2, "type": "references", "weight": 1.0}, {"source": 1, "target": 2, "type": "references", "weight": 1.0}, {"source": 0, "target": 3, "type": "similar", "weight": 0.8}, {"source": 1, "target": 4, "type": "citing", "weight": 0.9}, ], "clusters": { "seed_papers": [0, 1], "references": [2], "similar": [3], "citing": [4], }, }, "analysis_metrics": { "total_nodes": 5, "total_edges": 4, "average_degree": 1.6, "network_density": 0.4, "cluster_count": 4, }, } # 验证关系分析结果 assert relations["success"] assert len(relations["relations"]) == 3 assert relations["statistics"]["total_relations"] == 3 # 验证网络分析结果 assert network_analysis["success"] assert len(network_analysis["network_data"]["nodes"]) == 5 assert len(network_analysis["network_data"]["edges"]) == 4 assert network_analysis["analysis_metrics"]["average_degree"] == 1.6 @pytest.mark.integration def test_quality_assessment_workflow(self): """测试质量评估工作流程""" # 1. 批量文章数据 articles_batch = [ { "title": "High Impact Research", "journal": "Nature", "doi": "10.1038/high.2023", "publication_date": "2023-01-15", }, { "title": "Medium Impact Study", "journal": "Regional Journal", "doi": "10.5678/medium.2023", "publication_date": "2023-03-20", }, { "title": "Emerging Research", "journal": "New Journal", "doi": "10.9999/emerging.2023", "publication_date": "2023-06-10", }, ] # 2. 批量质量评估 quality_assessment = { "success": True, "total_articles": len(articles_batch), "evaluated_articles": [ { "article": articles_batch[0], "quality_score": 95.5, "quality_metrics": { "impact_factor": 42.5, "quartile": "Q1", "jci": 25.8, "分区": "中科院一区", }, "quality_grade": "Excellent", }, { "article": articles_batch[1], "quality_score": 65.2, "quality_metrics": { "impact_factor": 3.2, "quartile": "Q2", "jci": 1.1, "分区": "中科院二区", }, "quality_grade": "Good", }, { "article": articles_batch[2], "quality_score": 45.8, "quality_metrics": { "impact_factor": 1.5, "quartile": "Q3", "jci": 0.4, "分区": "中科院三区", }, "quality_grade": "Fair", }, ], "quality_distribution": { "excellent": 1, "good": 1, "fair": 1, "poor": 0, }, "average_quality_score": 68.8, } # 3. 领域排名分析 field_ranking = { "success": True, "field_name": "Computer Science", "ranking_type": "journal_impact", "top_journals": [ { "name": "Nature", "impact_factor": 42.5, "rank": 1, "quartile": "Q1", }, { "name": "Regional Journal", "impact_factor": 3.2, "rank": 45, "quartile": "Q2", }, { "name": "New Journal", "impact_factor": 1.5, "rank": 120, "quartile": "Q3", }, ], "field_statistics": { "total_journals": 200, "average_impact_factor": 2.8, "median_impact_factor": 1.9, }, } # 验证质量评估结果 assert quality_assessment["success"] assert quality_assessment["total_articles"] == 3 assert len(quality_assessment["evaluated_articles"]) == 3 assert quality_assessment["average_quality_score"] == 68.8 # 验证领域排名结果 assert field_ranking["success"] assert len(field_ranking["top_journals"]) == 3 assert field_ranking["field_statistics"]["total_journals"] == 200 @pytest.mark.integration def test_error_handling_integration(self): """测试错误处理集成""" # 1. 搜索错误 search_error = { "success": False, "error": "API rate limit exceeded", "error_type": "RateLimitError", "retry_after": 60, "suggestion": "请稍后重试或减少搜索频率", } # 2. 详情获取错误 details_error = { "success": False, "error": "Article not found", "error_type": "NotFoundError", "identifier": "10.9999/nonexistent.2023", "suggestion": "请检查文献标识符是否正确", } # 3. 导出错误 export_error = { "success": False, "error": "Insufficient disk space", "error_type": "StorageError", "required_space": "50MB", "available_space": "10MB", "suggestion": "请清理磁盘空间或选择其他导出位置", } # 验证错误处理一致性 errors = [search_error, details_error, export_error] for error in errors: assert not error["success"] assert "error" in error assert "error_type" in error assert "suggestion" in error # 验证错误传播 def propagate_error(source_error): return { "success": False, "original_error": source_error["error"], "error_type": source_error["error_type"], "user_message": source_error["suggestion"], "context": "integration_test", } propagated_search_error = propagate_error(search_error) assert propagated_search_error["original_error"] == "API rate limit exceeded" assert "integration_test" in propagated_search_error["context"] class TestPerformanceIntegration: """性能集成测试""" @pytest.mark.integration @pytest.mark.slow def test_large_batch_processing_performance(self): """测试大批量处理性能""" with TestTimer() as timer: # 模拟大批量数据 large_batch = { "success": True, "merged_results": [ { "title": f"Research Article {i}", "authors": [f"Author {i}"], "doi": f"10.1234/article.{i}.2023", "journal": f"Journal {i % 10}", # 10种不同期刊 "publication_date": f"2023-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}", } for i in range(100) # 100篇文章 ], "total_count": 100, "processing_time": 0, } # 模拟批量处理 processed_results = [] for article in large_batch["merged_results"]: # 模拟文章处理 processed_article = { **article, "processed": True, "quality_score": 50 + (hash(article["doi"]) % 50), # 伪随机分数 } processed_results.append(processed_article) processing_time = timer.stop() # 验证性能指标 assert len(processed_results) == 100 assert processing_time < 5.0 # 应该在5秒内完成 assert processing_time < len(processed_results) * 0.1 # 每篇文章处理时间少于0.1秒 @pytest.mark.integration @pytest.mark.slow def test_concurrent_tool_execution(self): """测试并发工具执行""" async def simulate_concurrent_execution(): with TestTimer() as timer: # 模拟并发执行多个工具 tasks = [ asyncio.sleep(0.1), # 搜索工具 asyncio.sleep(0.15), # 详情工具 asyncio.sleep(0.12), # 参考文献工具 asyncio.sleep(0.08), # 关系分析工具 asyncio.sleep(0.05), # 质量评估工具 asyncio.sleep(0.03), # 导出工具 ] await asyncio.gather(*tasks) return timer.stop() # 运行并发测试 execution_time = asyncio.run(simulate_concurrent_execution()) # 验证并发执行性能 # 串行执行时间:0.1 + 0.15 + 0.12 + 0.08 + 0.05 + 0.03 = 0.53秒 # 并发执行应该接近最慢的任务时间 assert execution_time < 0.2 # 并发应该比串行快很多 assert execution_time > 0.15 # 但不会比最慢任务更快 @pytest.mark.integration def test_memory_usage_optimization(self): """测试内存使用优化""" import os import psutil process = psutil.Process(os.getpid()) initial_memory = process.memory_info().rss / 1024 / 1024 # MB with TestTimer() as timer: # 模拟大数据集处理 large_datasets = [] for i in range(10): # 10个数据集 dataset = { "results": [ { "title": f"Article {j}", "content": "x" * 1000, # 1KB每篇文章 "metadata": {"key": "value" * 100}, # 额外元数据 } for j in range(1000) # 1000篇文章 ], "dataset_id": i, } large_datasets.append(dataset) # 模拟处理并清理 processed_count = 0 for dataset in large_datasets: # 处理数据 for article in dataset["results"]: article["processed"] = True processed_count += 1 # 模拟内存清理(删除原始数据) del dataset final_memory = process.memory_info().rss / 1024 / 1024 # MB memory_increase = final_memory - initial_memory processing_time = timer.stop() # 验证内存使用 assert processed_count == 10000 # 处理了10000篇文章 assert memory_increase < 100 # 内存增长应该小于100MB assert processing_time < 10.0 # 处理时间应该合理 class TestRealWorldScenarios: """真实世界场景测试""" @pytest.mark.integration def test_researcher_literature_review_scenario(self): """测试研究者文献综述场景""" # 研究者想要进行机器学习在医疗领域应用的文献综述 # 1. 初步搜索 initial_search = { "keyword": "machine learning healthcare diagnosis", "max_results": 50, "search_type": "comprehensive", "sources": ["europe_pmc", "pubmed", "arxiv"], } # 2. 高质量筛选 quality_filter = { "min_impact_factor": 2.0, "min_publication_year": 2020, "include_only_open_access": True, "language": "english", } # 3. 关系分析 relation_analysis = { "include_references": True, "include_citing": True, "max_depth": 2, "focus_on_recent": True, } # 4. 导出报告 export_requirements = { "format": "excel", "include_abstracts": True, "include_quality_metrics": True, "include_network_analysis": True, "group_by_journal": True, } # 验证场景完整性 assert "machine learning" in initial_search["keyword"] assert "healthcare" in initial_search["keyword"] assert initial_search["max_results"] == 50 assert quality_filter["min_impact_factor"] == 2.0 assert relation_analysis["max_depth"] == 2 assert export_requirements["format"] == "excel" @pytest.mark.integration def test_student_assignment_scenario(self): """测试学生作业场景""" # 学生需要完成关于"人工智能伦理"的课程作业 # 1. 基础搜索 student_search = { "keyword": "artificial intelligence ethics bias fairness", "max_results": 20, "search_type": "recent", # 重视最新研究 "sources": ["europe_pmc", "arxiv"], # 学术预印本和期刊 } # 2. 简化筛选 simple_filter = { "include_review_articles": True, "min_publication_year": 2021, "prefer_open_access": True, } # 3. 基础关系分析 simple_relations = { "include_references": False, # 不需要深入参考文献 "include_similar": True, "max_depth": 1, } # 4. 简单导出 simple_export = { "format": "csv", "include_abstracts": True, "include_basic_info": True, "exclude_technical_details": True, } # 验证学生场景 assert student_search["max_results"] == 20 # 适中的数量 assert simple_filter["min_publication_year"] == 2021 # 较新的研究 assert simple_relations["max_depth"] == 1 # 浅层分析 assert simple_export["format"] == "csv" # 简单格式 @pytest.mark.integration def test_clinician_evidence_search_scenario(self): """测试临床医生证据搜索场景""" # 临床医生需要查找特定治疗方法的最新证据 # 1. 精确临床搜索 clinical_search = { "keyword": "immunotherapy lung cancer checkpoint inhibitors", "max_results": 30, "search_type": "clinical_focus", "sources": ["pubmed", "europe_pmc"], # 主要医学数据库 "filter_clinical_trials": True, } # 2. 临床质量筛选 clinical_filter = { "min_evidence_level": "randomized_controlled_trial", "min_sample_size": 100, "prefer_recent": True, "exclude_preprints": True, } # 3. 临床关系分析 clinical_relations = { "include_clinical_guidelines": True, "include_systematic_reviews": True, "focus_on_treatment_outcomes": True, } # 4. 临床报告导出 clinical_export = { "format": "json", "include_patient_outcomes": True, "include_adverse_effects": True, "include_dosage_info": True, "clinical_summary": True, } # 验证临床场景 assert "immunotherapy" in clinical_search["keyword"] assert "lung cancer" in clinical_search["keyword"] assert clinical_filter["min_evidence_level"] == "randomized_controlled_trial" assert clinical_relations["include_clinical_guidelines"] is True assert clinical_export["clinical_summary"] is True

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/gqy20/article-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server