arXiv MCP Server

test_services.py•12.2 kB

"""Integration tests for arXiv services. These tests make real API calls to arXiv.org. They are slower but ensure the service works with the actual API. """ import shutil import tempfile from pathlib import Path import pytest from arxiv.models import ArxivEntry, ArxivSearchResult, Author from arxiv.services import ArxivService @pytest.fixture def temp_dir(): """Create a temporary directory for test downloads.""" tmp = tempfile.mkdtemp() yield tmp shutil.rmtree(tmp) @pytest.fixture def service(temp_dir): """Create an ArxivService instance with a temporary download directory.""" return ArxivService(download_dir=temp_dir, rate_limit_delay=0.5) class TestArxivServiceSearch: """Tests for the search functionality.""" def test_search_basic(self, service): """Test basic search returns results.""" result = service.search(query="ti:attention is all you need", max_results=5) assert isinstance(result, ArxivSearchResult) assert result.total_results > 0 assert len(result.entries) > 0 assert result.items_per_page == 5 def test_search_by_title(self, service): """Test searching by title.""" result = service.search(query="ti:transformer", max_results=3) assert len(result.entries) <= 3 for entry in result.entries: assert isinstance(entry, ArxivEntry) assert entry.title assert entry.summary assert len(entry.authors) > 0 def test_search_by_author(self, service): """Test searching by author.""" result = service.search(query="au:Hinton", max_results=5) assert len(result.entries) > 0 # At least one entry should have Hinton as an author found_hinton = False for entry in result.entries: for author in entry.authors: if "Hinton" in author.name: found_hinton = True break assert found_hinton, "Expected to find Hinton in author list" def test_search_by_category(self, service): """Test searching by category.""" result = service.search(query="cat:cs.AI", max_results=5) assert len(result.entries) > 0 for entry in result.entries: assert "cs.AI" in entry.categories or entry.primary_category == "cs.AI" def test_search_combined_query(self, service): """Test combined search query.""" result = service.search( query="ti:neural AND cat:cs.LG", max_results=3 ) assert len(result.entries) <= 3 for entry in result.entries: assert "cs.LG" in entry.categories or entry.primary_category == "cs.LG" def test_search_pagination(self, service): """Test search pagination.""" result1 = service.search(query="ti:neural", max_results=2, start=0) result2 = service.search(query="ti:neural", max_results=2, start=2) assert result1.start_index == 0 assert result2.start_index == 2 assert len(result1.entries) == 2 assert len(result2.entries) == 2 # Entries should be different ids1 = {e.arxiv_id for e in result1.entries} ids2 = {e.arxiv_id for e in result2.entries} assert ids1 != ids2, "Pagination should return different results" def test_search_sort_by_date(self, service): """Test sorting by submission date.""" result = service.search( query="cat:cs.AI", max_results=5, sort_by="submittedDate", sort_order="descending", ) assert len(result.entries) > 0 # Verify dates are in descending order dates = [entry.published for entry in result.entries] assert dates == sorted(dates, reverse=True) def test_search_no_results(self, service): """Test search with no results.""" # Search for something very unlikely to exist result = service.search( query='ti:"xyzabcdefghijklmnopqrstuvwxyz123456789"', max_results=5, ) assert result.total_results == 0 assert len(result.entries) == 0 class TestArxivServiceGet: """Tests for the get functionality.""" def test_get_by_id(self, service): """Test getting a specific paper by ID.""" # Use a well-known paper: "Attention Is All You Need" arxiv_id = "1706.03762" entry = service.get(arxiv_id, download_pdf=False) assert isinstance(entry, ArxivEntry) assert arxiv_id in entry.arxiv_id assert "Attention" in entry.title or "attention" in entry.title.lower() assert len(entry.authors) > 0 assert entry.pdf_url is not None def test_get_with_arxiv_prefix(self, service): """Test getting a paper with arXiv: prefix.""" entry = service.get("arXiv:1706.03762", download_pdf=False) assert isinstance(entry, ArxivEntry) assert "1706.03762" in entry.arxiv_id def test_get_with_version(self, service): """Test getting a paper with version number.""" entry = service.get("1706.03762v1", download_pdf=False) assert isinstance(entry, ArxivEntry) assert "1706.03762" in entry.arxiv_id def test_get_nonexistent_id(self, service): """Test getting a non-existent paper ID.""" with pytest.raises(ValueError, match="No paper found"): service.get("9999.99999", download_pdf=False) def test_get_with_download(self, service, temp_dir): """Test getting a paper with PDF download.""" arxiv_id = "1706.03762" entry = service.get(arxiv_id, download_pdf=True) # Check entry is returned assert isinstance(entry, ArxivEntry) # Check PDF was downloaded pdf_path = Path(temp_dir) / f"{arxiv_id}.pdf" assert pdf_path.exists() assert pdf_path.stat().st_size > 0 def test_get_download_exists_no_force(self, service, temp_dir): """Test that existing PDFs are not re-downloaded without force.""" arxiv_id = "1706.03762" # First download entry1 = service.get(arxiv_id, download_pdf=True, force_download=False) pdf_path = Path(temp_dir) / f"{arxiv_id}.pdf" first_size = pdf_path.stat().st_size first_mtime = pdf_path.stat().st_mtime # Second download without force - should not re-download entry2 = service.get(arxiv_id, download_pdf=True, force_download=False) # File should be the same assert pdf_path.stat().st_size == first_size assert pdf_path.stat().st_mtime == first_mtime def test_get_download_exists_with_force(self, service, temp_dir): """Test that force flag causes re-download.""" arxiv_id = "1706.03762" # First download entry1 = service.get(arxiv_id, download_pdf=True, force_download=False) pdf_path = Path(temp_dir) / f"{arxiv_id}.pdf" first_mtime = pdf_path.stat().st_mtime # Wait a moment to ensure mtime would differ import time time.sleep(0.1) # Second download with force - should re-download entry2 = service.get(arxiv_id, download_pdf=True, force_download=True) # File should have been updated assert pdf_path.stat().st_mtime > first_mtime class TestArxivServiceDownloadPDF: """Tests for the download_pdf functionality.""" def test_download_pdf_by_id(self, service, temp_dir): """Test downloading a PDF by arXiv ID.""" arxiv_id = "1706.03762" pdf_path = service.download_pdf(arxiv_id) assert pdf_path is not None assert pdf_path.exists() assert pdf_path.name == f"{arxiv_id}.pdf" assert pdf_path.stat().st_size > 0 def test_download_pdf_force(self, service, temp_dir): """Test force downloading a PDF.""" arxiv_id = "1706.03762" # First download pdf_path1 = service.download_pdf(arxiv_id, force_download=False) first_mtime = pdf_path1.stat().st_mtime # Wait a moment import time time.sleep(0.1) # Force download pdf_path2 = service.download_pdf(arxiv_id, force_download=True) assert pdf_path2.stat().st_mtime > first_mtime def test_download_pdf_invalid_id(self, service): """Test downloading PDF with invalid ID.""" result = service.download_pdf("9999.99999") assert result is None class TestArxivEntry: """Tests for ArxivEntry model.""" def test_entry_properties(self, service): """Test that entry properties are correctly populated.""" entry = service.get("1706.03762", download_pdf=False) # Basic fields assert entry.id assert entry.title assert entry.summary assert len(entry.summary) > 100 # Should have substantial content # Authors assert len(entry.authors) > 0 for author in entry.authors: assert isinstance(author, Author) assert author.name # Dates assert entry.published assert entry.updated # Categories assert entry.primary_category assert len(entry.categories) > 0 # Links assert len(entry.links) > 0 assert entry.pdf_url def test_entry_arxiv_id_property(self, service): """Test arxiv_id property extraction.""" entry = service.get("1706.03762", download_pdf=False) # Should extract clean ID from URL assert "1706.03762" in entry.arxiv_id assert not entry.arxiv_id.startswith("http") def test_entry_str_representation(self, service): """Test string representation of entry.""" entry = service.get("1706.03762", download_pdf=False) entry_str = str(entry) # Should contain key information assert "ID:" in entry_str assert "Title:" in entry_str assert "Authors:" in entry_str assert "Published:" in entry_str assert "Categories:" in entry_str assert "Abstract:" in entry_str class TestArxivSearchResult: """Tests for ArxivSearchResult model.""" def test_search_result_properties(self, service): """Test search result properties.""" result = service.search(query="ti:neural", max_results=3) assert result.total_results > 0 assert result.start_index >= 0 assert result.items_per_page == 3 assert len(result.entries) <= 3 def test_search_result_str_representation(self, service): """Test string representation of search result.""" result = service.search(query="ti:transformer", max_results=2) result_str = str(result) # Should contain summary information assert "Total results:" in result_str assert "Showing:" in result_str assert "Items per page:" in result_str assert "Result 1:" in result_str class TestRateLimiting: """Tests for rate limiting functionality.""" def test_rate_limiting_enforced(self, temp_dir): """Test that rate limiting delays are enforced.""" import time service = ArxivService(download_dir=temp_dir, rate_limit_delay=0.5) start_time = time.time() # Make two consecutive searches service.search(query="ti:neural", max_results=1) service.search(query="ti:machine", max_results=1) elapsed = time.time() - start_time # Should take at least rate_limit_delay seconds assert elapsed >= 0.5 class TestDownloadDirectory: """Tests for download directory management.""" def test_download_dir_created(self): """Test that download directory is created if it doesn't exist.""" with tempfile.TemporaryDirectory() as tmpdir: download_dir = Path(tmpdir) / "downloads" assert not download_dir.exists() service = ArxivService(download_dir=str(download_dir)) assert download_dir.exists() assert download_dir.is_dir() def test_download_dir_default(self): """Test default download directory.""" service = ArxivService() assert service.download_dir == Path("./.arxiv")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/LiamConnell/arxiv_for_agents'

If you have feedback or need assistance with the MCP directory API, please join our Discord server