Skip to main content
Glama
h-lu
by h-lu
test_repec.py11.3 kB
# tests/test_repec.py """ RePEc/IDEAS Searcher 测试 测试通过 IDEAS 前端 (ideas.repec.org) 搜索经济学论文的功能。 """ import unittest import requests from paper_search_mcp.academic_platforms.repec import RePECSearcher def check_ideas_accessible(): """检查 IDEAS 网站是否可访问""" try: response = requests.get( "https://ideas.repec.org/", timeout=10, headers={'User-Agent': 'Mozilla/5.0'} ) return response.status_code == 200 except: return False class TestRePECSearcher(unittest.TestCase): """RePEc/IDEAS 搜索器测试""" @classmethod def setUpClass(cls): cls.ideas_accessible = check_ideas_accessible() if not cls.ideas_accessible: print("\nWarning: IDEAS website is not accessible, some tests will be skipped") def setUp(self): self.searcher = RePECSearcher() def test_search_basic(self): """测试基本搜索功能""" if not self.ideas_accessible: self.skipTest("IDEAS website is not accessible") papers = self.searcher.search("machine learning", max_results=5) print(f"\nFound {len(papers)} papers for query 'machine learning':") for i, paper in enumerate(papers, 1): print(f"{i}. {paper.title[:60]}...") print(f" ID: {paper.paper_id}") print(f" Authors: {', '.join(paper.authors[:2]) if paper.authors else 'N/A'}") print(f" Year: {paper.published_date.year if paper.published_date else 'N/A'}") print() self.assertTrue(len(papers) > 0, "Should return at least one paper") if papers: self.assertTrue(papers[0].title, "Paper should have a title") self.assertEqual(papers[0].source, "repec", "Source should be 'repec'") def test_search_with_year_filter(self): """测试年份过滤""" if not self.ideas_accessible: self.skipTest("IDEAS website is not accessible") papers = self.searcher.search( "inflation", max_results=3, year_from=2023, year_to=2025 ) print(f"\nFound {len(papers)} papers with year filter (2023-2025):") for paper in papers: year = paper.published_date.year if paper.published_date else None print(f" - {paper.title[:50]}... (Year: {year})") # 年份过滤可能不是100%准确(依赖于网站数据) self.assertTrue(len(papers) >= 0, "Search should complete without error") def test_search_empty_query(self): """测试空查询""" papers = self.searcher.search("", max_results=5) self.assertEqual(len(papers), 0, "Empty query should return no papers") def test_download_not_supported(self): """测试下载功能返回适当的错误信息""" result = self.searcher.download_pdf("RePEc:nbr:nberwo:32000", "./downloads") self.assertIn("does not host PDF", result) self.assertIn("ALTERNATIVES", result) print(f"\nDownload message:\n{result}") def test_read_not_supported(self): """测试阅读功能返回适当的错误信息""" result = self.searcher.read_paper("RePEc:nbr:nberwo:32000", "./downloads") self.assertIn("cannot be read directly", result) self.assertIn("ALTERNATIVES", result) print(f"\nRead message:\n{result}") def test_extract_repec_handle(self): """测试 RePEc handle 提取""" # 测试工作论文 URL url1 = "https://ideas.repec.org/p/nbr/nberwo/32000.html" handle1 = self.searcher._extract_repec_handle(url1) self.assertEqual(handle1, "RePEc:nbr:nberwo:32000") # 测试期刊文章 URL url2 = "https://ideas.repec.org/a/aea/aecrev/v110y2020i1p1-40.html" handle2 = self.searcher._extract_repec_handle(url2) self.assertEqual(handle2, "RePEc:aea:aecrev:v110y2020i1p1-40") print(f"\nExtracted handles:\n {url1} -> {handle1}\n {url2} -> {handle2}") def test_extract_year(self): """测试年份提取""" self.assertEqual(self.searcher._extract_year("Published in 2023"), 2023) self.assertEqual(self.searcher._extract_year("Working Paper 2020-01"), 2020) self.assertIsNone(self.searcher._extract_year("No year here")) def test_session_headers(self): """测试 Session 包含正确的请求头""" user_agent = self.searcher.session.headers.get('User-Agent', '') self.assertTrue(len(user_agent) > 0, "Should have User-Agent header") self.assertIn("Mozilla", user_agent, "User-Agent should look like a browser") def test_search_field_options(self): """测试搜索字段选项常量""" # 验证所有搜索字段选项存在 expected_fields = ['all', 'abstract', 'keywords', 'title', 'author'] for field in expected_fields: self.assertIn(field, self.searcher.SEARCH_FIELDS, f"Missing search field: {field}") print(f"\nAvailable search fields: {list(self.searcher.SEARCH_FIELDS.keys())}") def test_sort_options(self): """测试排序选项常量""" # 验证主要排序选项存在 expected_sorts = ['relevance', 'newest', 'oldest', 'citations'] for sort in expected_sorts: self.assertIn(sort, self.searcher.SORT_OPTIONS, f"Missing sort option: {sort}") print(f"\nAvailable sort options: {list(self.searcher.SORT_OPTIONS.keys())}") def test_doc_type_options(self): """测试文档类型选项常量""" # 验证所有文档类型存在 expected_types = ['all', 'articles', 'papers', 'chapters', 'books', 'software'] for doc_type in expected_types: self.assertIn(doc_type, self.searcher.DOC_TYPES, f"Missing doc type: {doc_type}") print(f"\nAvailable doc types: {list(self.searcher.DOC_TYPES.keys())}") def test_search_with_sort_option(self): """测试排序选项""" if not self.ideas_accessible: self.skipTest("IDEAS website is not accessible") papers = self.searcher.search( "inflation", max_results=3, sort_by='newest' ) print(f"\nFound {len(papers)} papers sorted by newest:") for paper in papers: print(f" - {paper.title[:50]}...") self.assertTrue(len(papers) >= 0, "Search with sort should complete without error") def test_search_with_doc_type(self): """测试文档类型过滤""" if not self.ideas_accessible: self.skipTest("IDEAS website is not accessible") papers = self.searcher.search( "monetary policy", max_results=3, doc_type='papers' # 仅工作论文 ) print(f"\nFound {len(papers)} working papers for 'monetary policy':") for paper in papers: # 检查 URL 是否包含 /p/ (working papers 路径) if '/p/' in paper.url: print(f" ✓ {paper.title[:50]}...") else: print(f" ? {paper.title[:50]}... (URL: {paper.url[:50]})") self.assertTrue(len(papers) >= 0, "Search with doc_type should complete without error") def test_search_by_author(self): """测试按作者搜索""" if not self.ideas_accessible: self.skipTest("IDEAS website is not accessible") papers = self.searcher.search( "Acemoglu", max_results=3, search_field='author' ) print(f"\nFound {len(papers)} papers by author 'Acemoglu':") for paper in papers: print(f" - {paper.title[:60]}...") self.assertTrue(len(papers) >= 0, "Search by author should complete without error") def test_get_paper_details_from_url(self): """测试从 URL 获取论文详情""" if not self.ideas_accessible: self.skipTest("IDEAS website is not accessible") # 测试一个已知的论文页面 url = "https://ideas.repec.org/a/sae/inrsre/v49y2026i1p62-90.html" paper = self.searcher.get_paper_details(url) print(f"\nPaper details from URL:") if paper: print(f" Title: {paper.title[:60]}...") print(f" Authors: {paper.authors}") print(f" Abstract: {paper.abstract[:100]}..." if paper.abstract else " Abstract: N/A") print(f" Keywords: {paper.keywords}") print(f" URL: {paper.url}") self.assertTrue(paper.title, "Paper should have a title") self.assertTrue(paper.abstract, "Paper should have an abstract") self.assertTrue(len(paper.authors) > 0, "Paper should have authors") self.assertEqual(paper.source, "repec") else: self.fail("get_paper_details should return a paper") def test_get_paper_details_extracts_metadata(self): """测试详情页提取完整元数据""" if not self.ideas_accessible: self.skipTest("IDEAS website is not accessible") url = "https://ideas.repec.org/a/sae/inrsre/v49y2026i1p62-90.html" paper = self.searcher.get_paper_details(url) if paper: # 验证各字段类型 self.assertIsInstance(paper.authors, list) self.assertIsInstance(paper.keywords, list) self.assertIsInstance(paper.abstract, str) # 应该有多个作者 self.assertGreater(len(paper.authors), 0, "Should have at least one author") # 应该有关键词 if paper.keywords: print(f"\nKeywords found: {paper.keywords}") def test_series_options(self): """测试系列选项常量""" # 验证主要机构存在 expected_series = ['nber', 'imf', 'fed', 'aer', 'qje'] for s in expected_series: self.assertIn(s, self.searcher.SERIES, f"Missing series: {s}") print(f"\nAvailable series: {list(self.searcher.SERIES.keys())[:10]}...") def test_search_with_series_nber(self): """测试 NBER 系列过滤""" if not self.ideas_accessible: self.skipTest("IDEAS website is not accessible") papers = self.searcher.search( "inflation", max_results=5, series='nber' ) print(f"\nFound {len(papers)} NBER papers for 'inflation':") all_nber = True for paper in papers: is_nber = '/p/nbr/nberwo/' in paper.url print(f" {'✓' if is_nber else '?'} {paper.title[:50]}...") if not is_nber: all_nber = False self.assertTrue(len(papers) > 0, "Should find NBER papers") self.assertTrue(all_nber, "All papers should be from NBER") if __name__ == '__main__': unittest.main(verbosity=2)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/h-lu/paper-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server