Readwise MCP Server

test_server.py•24.4 KiB

#!/usr/bin/env python3 # Copyright (c) 2026 ngpestelos # Licensed under the MIT License - see LICENSE file for details """ Unit and integration tests for Readwise MCP Server """ import json import pytest from datetime import datetime from pathlib import Path from unittest.mock import Mock, patch, mock_open import tempfile import os # Import functions from server import sys sys.path.insert(0, str(Path(__file__).parent)) from server import ( load_state, write_state, optimize_backfill, scan_existing_documents, sanitize_filename, extract_id_from_url, format_document_markdown, save_document ) # ============================================================================ # UNIT TESTS # ============================================================================ class TestTimestampFormat: """Test ISO 8601 timestamp format correctness""" def test_load_state_default_timestamp_format(self, tmp_path): """Test that default state has valid ISO 8601 timestamp without double timezone""" state_file = tmp_path / "nonexistent.json" with patch('server.STATE_FILE', state_file): state = load_state() timestamp = state["last_import_timestamp"] # Should not have both +00:00 and Z assert not ("+00:00Z" in timestamp), f"Malformed timestamp: {timestamp}" # Should be parseable as ISO 8601 try: datetime.fromisoformat(timestamp.replace('Z', '+00:00')) except ValueError: pytest.fail(f"Invalid ISO 8601 timestamp: {timestamp}") def test_timestamp_has_timezone_info(self, tmp_path): """Test that generated timestamps include timezone information""" state_file = tmp_path / "nonexistent.json" with patch('server.STATE_FILE', state_file): state = load_state() timestamp = state["last_import_timestamp"] # Should have either +00:00 or Z, but not both has_offset = "+00:00" in timestamp has_z = timestamp.endswith("Z") assert has_offset or has_z, f"Timestamp missing timezone info: {timestamp}" assert not (has_offset and has_z), f"Timestamp has both offset and Z: {timestamp}" def test_written_state_has_valid_timestamp(self, tmp_path): """Test that written state files contain valid timestamps""" state_file = tmp_path / "state.json" # Create state with current timestamp (simulating what the server does) from datetime import timezone test_state = { "last_import_timestamp": datetime.now(timezone.utc).isoformat(), "synced_ranges": [] } with patch('server.STATE_FILE', state_file): write_state(test_state) # Read back and validate with open(state_file, 'r') as f: loaded = json.load(f) timestamp = loaded["last_import_timestamp"] # Should not have malformed double timezone assert not ("+00:00Z" in timestamp), f"Written state has malformed timestamp: {timestamp}" # Should be parseable try: datetime.fromisoformat(timestamp.replace('Z', '+00:00')) except ValueError: pytest.fail(f"Written timestamp not valid ISO 8601: {timestamp}") class TestStateManagement: """Test state file reading and writing""" def test_load_state_existing(self, tmp_path): """Test loading existing state file""" state_file = tmp_path / "state.json" test_state = { "last_import_timestamp": "2026-01-22T00:00:00Z", "synced_ranges": [{"start": "2026-01-01T00:00:00Z", "end": "2026-01-21T00:00:00Z", "doc_count": 614}] } with open(state_file, 'w') as f: json.dump(test_state, f) # Mock STATE_FILE with patch('server.STATE_FILE', state_file): state = load_state() assert state["last_import_timestamp"] == "2026-01-22T00:00:00Z" assert len(state["synced_ranges"]) == 1 assert state["synced_ranges"][0]["doc_count"] == 614 def test_load_state_missing(self, tmp_path): """Test loading when state file doesn't exist""" state_file = tmp_path / "nonexistent.json" with patch('server.STATE_FILE', state_file): state = load_state() assert "last_import_timestamp" in state assert "synced_ranges" in state assert state["synced_ranges"] == [] def test_write_state(self, tmp_path): """Test writing state file""" state_file = tmp_path / "state.json" test_state = { "last_import_timestamp": "2026-01-22T00:00:00Z", "synced_ranges": [] } with patch('server.STATE_FILE', state_file): write_state(test_state) assert state_file.exists() with open(state_file, 'r') as f: loaded = json.load(f) assert loaded["last_import_timestamp"] == "2026-01-22T00:00:00Z" class TestOptimization: """Test synced range optimization logic""" def test_optimize_no_ranges(self): """Test optimization with no synced ranges""" should_proceed, optimized_after = optimize_backfill("2026-01-15", []) assert should_proceed == True assert optimized_after is None def test_optimize_target_within_range(self): """Test optimization when target is within synced range""" synced_ranges = [ { "start": "2026-01-01T00:00:00+00:00", "end": "2026-01-21T00:00:00+00:00", "doc_count": 614 } ] should_proceed, optimized_after = optimize_backfill("2026-01-15", synced_ranges) assert should_proceed == False assert optimized_after is None def test_optimize_target_before_range(self): """Test optimization when target is before synced range""" synced_ranges = [ { "start": "2026-01-15T00:00:00+00:00", "end": "2026-01-21T00:00:00+00:00", "doc_count": 100 } ] should_proceed, optimized_after = optimize_backfill("2026-01-10", synced_ranges) assert should_proceed == True assert optimized_after == "2026-01-21T00:00:00+00:00" def test_optimize_target_after_range(self): """Test optimization when target is after synced range""" synced_ranges = [ { "start": "2026-01-01T00:00:00+00:00", "end": "2026-01-10T00:00:00+00:00", "doc_count": 50 } ] should_proceed, optimized_after = optimize_backfill("2026-01-20", synced_ranges) assert should_proceed == True assert optimized_after is None class TestFilenameHandling: """Test filename sanitization and ID extraction""" def test_sanitize_filename_basic(self): """Test basic filename sanitization""" result = sanitize_filename("Simple Title") assert result == "Simple Title.md" def test_sanitize_filename_special_chars(self): """Test sanitization with special characters""" result = sanitize_filename("Title / With : Special <Chars>") assert "/" not in result assert ":" not in result assert "<" not in result assert ">" not in result assert result.endswith(".md") def test_sanitize_filename_long(self): """Test truncation of long filenames""" long_title = "A" * 150 result = sanitize_filename(long_title) assert len(result) <= 104 # 100 chars + ".md" def test_extract_id_from_url(self): """Test ID extraction from Readwise URL""" url = "https://readwise.io/reader/document/123456" assert extract_id_from_url(url) == "123456" url_with_slash = "https://readwise.io/reader/document/789012/" assert extract_id_from_url(url_with_slash) == "789012" def test_extract_id_none(self): """Test ID extraction with None URL""" assert extract_id_from_url(None) is None def test_extract_id_empty(self): """Test ID extraction with empty URL""" assert extract_id_from_url("") is None class TestFilenameQmdValidation: r""" Test filename sanitization fixes for qmd indexer validation. The qmd indexer requires filenames to contain at least one alphanumeric character (/[\p{L}\p{N}]/u regex). These tests verify the fix for documents with titles containing only special characters (emoji, ellipsis, etc.). Regression prevention for issue: handelize: path has no valid filename content """ def test_ellipsis_only_title(self): """Test that ellipsis-only title generates valid filename""" doc = { "title": "…", "author": "Take Action!", "saved_at": "2025-12-08T00:00:00Z", "category": "tweet" } result = sanitize_filename("…", doc) # Should use fallback with author and date assert result == "Tweet by Take Action! - 2025-12-08.md" # Should pass qmd validation (has alphanumeric characters) filename_without_ext = result[:-3] # Remove .md assert any(c.isalnum() for c in filename_without_ext), \ f"Filename '{result}' has no alphanumeric characters" def test_emoji_only_title(self): """Test that emoji-only title generates valid filename""" doc = { "title": "🍿🍿", "author": "Elon Musk", "saved_at": "2025-12-06T00:00:00Z", "category": "tweet" } result = sanitize_filename("🍿🍿", doc) assert result == "Tweet by Elon Musk - 2025-12-06.md" # Verify qmd validation filename_without_ext = result[:-3] assert any(c.isalnum() for c in filename_without_ext), \ f"Filename '{result}' has no alphanumeric characters" def test_empty_title(self): """Test that empty title generates valid filename""" doc = { "title": "", "author": "x.com", "saved_at": "2025-12-05T00:00:00Z", "category": "tweet" } result = sanitize_filename("", doc) assert result == "Tweet by x.com - 2025-12-05.md" # Verify qmd validation filename_without_ext = result[:-3] assert any(c.isalnum() for c in filename_without_ext), \ f"Filename '{result}' has no alphanumeric characters" def test_whitespace_only_title(self): """Test that whitespace-only title generates valid filename""" doc = { "title": " ", "author": "TestUser", "saved_at": "2025-12-01T00:00:00Z", "category": "tweet" } result = sanitize_filename(" ", doc) # Should use fallback assert "Tweet by TestUser - 2025-12-01.md" == result # Verify qmd validation filename_without_ext = result[:-3] assert any(c.isalnum() for c in filename_without_ext) def test_special_chars_only_title(self): """Test title with only special characters""" doc = { "title": "!@#$%^&*()", "author": "SpecialUser", "saved_at": "2025-12-02T00:00:00Z", "category": "tweet" } result = sanitize_filename("!@#$%^&*()", doc) # Should use fallback assert "Tweet by SpecialUser - 2025-12-02.md" == result # Verify qmd validation filename_without_ext = result[:-3] assert any(c.isalnum() for c in filename_without_ext) def test_article_category_fallback(self): """Test fallback uses category-specific label for articles""" doc = { "title": "…", "author": "Blog Author", "saved_at": "2025-12-03T00:00:00Z", "category": "article" } result = sanitize_filename("…", doc) assert result == "Article by Blog Author - 2025-12-03.md" assert any(c.isalnum() for c in result[:-3]) def test_pdf_category_fallback(self): """Test fallback uses category-specific label for PDFs""" doc = { "title": "🔥", "author": "PDF Author", "saved_at": "2025-12-04T00:00:00Z", "category": "pdf" } result = sanitize_filename("🔥", doc) assert result == "Pdf by PDF Author - 2025-12-04.md" assert any(c.isalnum() for c in result[:-3]) def test_no_doc_fallback(self): """Test fallback when doc parameter is not provided""" result = sanitize_filename("…", None) # Should use generic fallback with timestamp assert result.startswith("Untitled - ") assert result.endswith(".md") # Should have date in format YYYY-MM-DD filename_without_ext = result[:-3] assert any(c.isalnum() for c in filename_without_ext) def test_author_with_special_chars(self): """Test that author names with special chars are sanitized""" doc = { "title": "…", "author": "User/Name:Test", "saved_at": "2025-12-05T00:00:00Z", "category": "tweet" } result = sanitize_filename("…", doc) # Author special chars should be removed assert "/" not in result assert ":" not in result assert "Tweet by UserNameTest - 2025-12-05.md" == result def test_very_long_author_name(self): """Test that author names are truncated to 30 chars""" doc = { "title": "…", "author": "A" * 50, # 50 character author name "saved_at": "2025-12-06T00:00:00Z", "category": "tweet" } result = sanitize_filename("…", doc) # Author should be truncated to 30 chars expected = f"Tweet by {'A' * 30} - 2025-12-06.md" assert result == expected def test_mixed_valid_invalid_chars(self): """Test title with mix of valid and invalid chars still uses original""" # This should NOT use fallback because it has some alphanumeric result = sanitize_filename("Hello 🍿 World", None) # Should use original title (cleaned up) assert result == "Hello 🍿 World.md" assert any(c.isalnum() for c in result[:-3]) def test_qmd_validation_regression(self): """ Regression test: Verify all problematic cases that caused qmd errors now produce valid filenames. This prevents regression of the bug where qmd indexer failed with: "handelize: path has no valid filename content" """ problematic_cases = [ ("…", {"author": "User1", "saved_at": "2025-12-08", "category": "tweet"}), ("🍿🍿", {"author": "User2", "saved_at": "2025-12-06", "category": "tweet"}), ("", {"author": "User3", "saved_at": "2025-12-05", "category": "tweet"}), (" ", {"author": "User4", "saved_at": "2025-12-04", "category": "tweet"}), ("...", {"author": "User5", "saved_at": "2025-12-03", "category": "tweet"}), ("---", {"author": "User6", "saved_at": "2025-12-02", "category": "tweet"}), ] for title, doc in problematic_cases: result = sanitize_filename(title, doc) filename_without_ext = result[:-3] # Critical: Must have at least one alphanumeric character has_alnum = any(c.isalnum() for c in filename_without_ext) assert has_alnum, \ f"REGRESSION: Title '{title}' produced invalid filename '{result}' " \ f"with no alphanumeric characters" def test_save_document_with_invalid_title(self, tmp_path): """Integration test: Verify save_document works with invalid titles""" doc = { "title": "…", "author": "Test Author", "saved_at": "2025-12-08T00:00:00Z", "category": "tweet", "content": "Test content" } filepath = save_document(doc, tmp_path) # Should create file with fallback name assert filepath.exists() assert filepath.name == "Tweet by Test Author - 2025-12-08.md" # Verify qmd would accept this filename filename_without_ext = filepath.name[:-3] assert any(c.isalnum() for c in filename_without_ext), \ f"Saved file '{filepath.name}' would fail qmd validation" class TestDocumentScanning: """Test filesystem scanning for deduplication""" def test_scan_empty_directory(self, tmp_path): """Test scanning empty directory""" with patch('server.DOCUMENTS_DIR', tmp_path), \ patch('server.ARCHIVES_DIR', tmp_path / "archives"), \ patch('server.DAILY_REVIEWS_DIR', tmp_path / "reviews"): known_ids, known_filenames = scan_existing_documents() assert len(known_ids) == 0 assert len(known_filenames) == 0 def test_scan_with_documents(self, tmp_path): """Test scanning directory with documents""" # Create test document doc_content = """--- title: "Test Document" readwise_url: "https://readwise.io/reader/document/test123" --- Content here """ doc_file = tmp_path / "Test Document.md" with open(doc_file, 'w') as f: f.write(doc_content) with patch('server.DOCUMENTS_DIR', tmp_path), \ patch('server.ARCHIVES_DIR', tmp_path / "archives"): known_ids, known_filenames = scan_existing_documents() assert "test123" in known_ids assert "Test Document.md" in known_filenames class TestMarkdownFormatting: """Test markdown document formatting""" def test_format_document_basic(self): """Test basic document formatting""" doc = { "title": "Test Title", "author": "Test Author", "category": "tweet", "saved_at": "2026-01-22T00:00:00Z", "readwise_url": "https://readwise.io/reader/document/123", "content": "This is the content" } markdown = format_document_markdown(doc) assert "---" in markdown assert "title: Test Title" in markdown assert "author: Test Author" in markdown assert "## Content" in markdown assert "This is the content" in markdown def test_format_document_with_summary(self): """Test document formatting with summary""" doc = { "title": "Test Title", "content": "Content here", "summary": "This is a summary" } markdown = format_document_markdown(doc) assert "## Summary" in markdown assert "This is a summary" in markdown def test_format_document_with_notes(self): """Test document formatting with notes""" doc = { "title": "Test Title", "content": "Content here", "notes": "My personal notes" } markdown = format_document_markdown(doc) assert "## Notes" in markdown assert "My personal notes" in markdown class TestDocumentSaving: """Test document saving to filesystem""" def test_save_document(self, tmp_path): """Test saving document to file""" doc = { "title": "Test Document", "content": "Test content" } filepath = save_document(doc, tmp_path) assert filepath.exists() assert filepath.name == "Test Document.md" # Verify content with open(filepath, 'r') as f: content = f.read() assert "Test Document" in content assert "Test content" in content def test_save_document_collision(self, tmp_path): """Test handling filename collisions""" doc = { "title": "Test Document", "content": "First version" } # Save first document filepath1 = save_document(doc, tmp_path) assert filepath1.name == "Test Document.md" # Save second document with same title doc2 = { "title": "Test Document", "content": "Second version" } filepath2 = save_document(doc2, tmp_path) assert filepath2.name == "Test Document (1).md" assert filepath1 != filepath2 # ============================================================================ # INTEGRATION TESTS (require mocked API) # ============================================================================ class TestAPIIntegration: """Test API integration with mocked responses""" @pytest.mark.asyncio @patch('server.fetch_api') async def test_import_recent_with_dedup(self, mock_fetch, tmp_path): """Test importing recent documents with deduplication""" # Mock API response mock_fetch.return_value = { "results": [ { "title": "New Document", "content": "New content", "readwise_url": "https://readwise.io/reader/document/new123", "saved_at": "2026-01-22T00:00:00Z" }, { "title": "Existing Document", "content": "Existing content", "readwise_url": "https://readwise.io/reader/document/existing456", "saved_at": "2026-01-21T00:00:00Z" } ] } # Create existing document to test deduplication existing_doc = tmp_path / "Existing Document.md" with open(existing_doc, 'w') as f: f.write('---\nreadwise_url: "https://readwise.io/reader/document/existing456"\n---\n') # Note: Full integration test would require mocking the async tool # This is a simplified test to verify the mock setup assert mock_fetch.return_value["results"][0]["title"] == "New Document" def test_state_timestamp_regression(self, tmp_path): """Regression test: Verify timestamps never have both +00:00 and Z""" from datetime import timezone # This test prevents regression of the bug where we appended Z to .isoformat() # The bug was: datetime.now(timezone.utc).isoformat() + "Z" # This produced: 2026-01-23T02:16:59.102761+00:00Z (malformed) # Correct implementation (after fix): timestamp = datetime.now(timezone.utc).isoformat() # Verify fix: timestamp should NOT have both +00:00 and Z assert not timestamp.endswith("+00:00Z"), \ f"REGRESSION: Timestamp has both +00:00 and Z: {timestamp}" # Verify correct format has timezone info assert "+00:00" in timestamp or timestamp.endswith("Z"), \ f"Timestamp missing timezone info: {timestamp}" # Verify Readwise API would accept this format # (It rejected +00:00Z with 400 Bad Request) try: datetime.fromisoformat(timestamp.replace('Z', '+00:00')) except ValueError: pytest.fail(f"Timestamp format invalid for API: {timestamp}") # Also test the old buggy way would produce malformed format buggy_timestamp = datetime.now(timezone.utc).isoformat() + "Z" assert buggy_timestamp.endswith("+00:00Z"), \ "Test verification: buggy implementation should produce +00:00Z" # ============================================================================ # FIXTURES # ============================================================================ @pytest.fixture def sample_state(): """Sample state file data""" return { "last_import_timestamp": "2026-01-22T00:00:00Z", "oldest_imported_date": "2026-01-01", "synced_ranges": [ { "start": "2026-01-01T00:00:00+00:00", "end": "2026-01-21T00:00:00+00:00", "doc_count": 614, "verified_at": "2026-01-21T00:00:00Z" } ], "backfill_in_progress": False } @pytest.fixture def sample_document(): """Sample Readwise document""" return { "title": "Sample Tweet Thread", "author": "@testuser", "source": "Twitter", "category": "tweet", "saved_at": "2026-01-22T00:00:00Z", "updated_at": "2026-01-22T00:00:00Z", "readwise_url": "https://readwise.io/reader/document/sample123", "source_url": "https://twitter.com/testuser/status/123456", "content": "This is a sample tweet thread content.", "summary": "A summary of the tweet thread", "tags": ["testing", "sample"] } if __name__ == "__main__": pytest.main([__file__, "-v"])

Loading blob content...

Latest Blog Posts

MCP isn't dead–it's maturing
By punkpeye on January 20, 2026.
mcp
Google's AI Overview Has Been Sending Me the Wrong Customers for 6 Months
By punkpeye on January 20, 2026.
google
ai
startups
Expose Your Local MCP Server to the Internet
By punkpeye on January 19, 2026.
MCP Inspector
mcp
tutorial

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ngpestelos/readwise-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_server.py•24.4 KiB