Skip to main content
Glama
juanqui
by juanqui
test_intelligent_cache.py12.5 kB
"""Tests for the intelligent cache management system.""" import json import tempfile from pathlib import Path from unittest.mock import patch import pytest from src.pdfkb.config import ServerConfig from src.pdfkb.intelligent_cache import IntelligentCacheManager class TestIntelligentCacheManager: """Test cases for IntelligentCacheManager.""" def setup_method(self): """Set up test fixtures.""" self.temp_dir = Path(tempfile.mkdtemp()) self.cache_dir = self.temp_dir / "cache" # Mock environment variables for ServerConfig with patch.dict( "os.environ", { "OPENAI_API_KEY": "sk-test-key-123", }, ): self.config = ServerConfig.from_env() self.config.cache_dir = self.cache_dir self.cache_manager = IntelligentCacheManager(self.config, self.cache_dir) def teardown_method(self): """Clean up test fixtures.""" import shutil if self.temp_dir.exists(): shutil.rmtree(self.temp_dir) def test_step_specific_fingerprints(self): """Test that step-specific fingerprints are generated correctly.""" # Test parsing fingerprint parsing_fp = self.cache_manager.get_parsing_fingerprint() assert isinstance(parsing_fp, str) assert len(parsing_fp) == 64 # SHA-256 hex length # Test chunking fingerprint chunking_fp = self.cache_manager.get_chunking_fingerprint() assert isinstance(chunking_fp, str) assert len(chunking_fp) == 64 # Test embedding fingerprint embedding_fp = self.cache_manager.get_embedding_fingerprint() assert isinstance(embedding_fp, str) assert len(embedding_fp) == 64 # Fingerprints should be different for different config aspects assert parsing_fp != chunking_fp assert chunking_fp != embedding_fp assert parsing_fp != embedding_fp def test_fingerprint_consistency(self): """Test that fingerprints are consistent for same configuration.""" # Get fingerprints multiple times parsing_fp1 = self.cache_manager.get_parsing_fingerprint() parsing_fp2 = self.cache_manager.get_parsing_fingerprint() chunking_fp1 = self.cache_manager.get_chunking_fingerprint() chunking_fp2 = self.cache_manager.get_chunking_fingerprint() embedding_fp1 = self.cache_manager.get_embedding_fingerprint() embedding_fp2 = self.cache_manager.get_embedding_fingerprint() # Should be identical assert parsing_fp1 == parsing_fp2 assert chunking_fp1 == chunking_fp2 assert embedding_fp1 == embedding_fp2 def test_fingerprint_changes_with_config(self): """Test that fingerprints change when configuration changes.""" # Get initial fingerprints initial_parsing = self.cache_manager.get_parsing_fingerprint() initial_chunking = self.cache_manager.get_chunking_fingerprint() initial_embedding = self.cache_manager.get_embedding_fingerprint() # Change parsing config - use a different parser than the default self.config.pdf_parser = "unstructured" # Changed from "pymupdf4llm" to "unstructured" new_parsing = self.cache_manager.get_parsing_fingerprint() assert new_parsing != initial_parsing # Change chunking config self.config.chunk_size = 2000 new_chunking = self.cache_manager.get_chunking_fingerprint() assert new_chunking != initial_chunking # Change embedding config self.config.embedding_model = "text-embedding-ada-002" # Changed to a clearly different model new_embedding = self.cache_manager.get_embedding_fingerprint() assert new_embedding != initial_embedding def test_detect_config_changes_first_run(self): """Test change detection on first run (no saved fingerprints).""" changes = self.cache_manager.detect_config_changes() # All should be True on first run assert changes["parsing"] is True assert changes["chunking"] is True assert changes["embedding"] is True def test_detect_config_changes_after_save(self): """Test change detection after saving fingerprints.""" # Save fingerprints self.cache_manager.update_fingerprints() # Check changes - should be False now changes = self.cache_manager.detect_config_changes() assert changes["parsing"] is False assert changes["chunking"] is False assert changes["embedding"] is False def test_detect_config_changes_after_modification(self): """Test change detection after modifying configuration.""" # Save initial fingerprints self.cache_manager.update_fingerprints() # Modify parsing config - use a different parser than the default self.config.pdf_parser = "unstructured" # Changed from "pymupdf4llm" to "unstructured" changes = self.cache_manager.detect_config_changes() assert changes["parsing"] is True assert changes["chunking"] is False # Should be unchanged assert changes["embedding"] is False # Should be unchanged def test_fingerprint_file_structure(self): """Test that fingerprint files are created with correct structure.""" self.cache_manager.update_fingerprints() # Check that files exist parsing_file = self.cache_manager._get_fingerprint_path("parsing") chunking_file = self.cache_manager._get_fingerprint_path("chunking") embedding_file = self.cache_manager._get_fingerprint_path("embedding") assert parsing_file.exists() assert chunking_file.exists() assert embedding_file.exists() # Check file contents with open(parsing_file, "r") as f: parsing_data = json.load(f) assert "fingerprint" in parsing_data assert "timestamp" in parsing_data assert "config_version" in parsing_data assert "config" in parsing_data assert parsing_data["config_version"] == "1.0.0" assert "pdf_parser" in parsing_data["config"] assert "unstructured_pdf_processing_strategy" in parsing_data["config"] def test_corrupted_fingerprint_handling(self): """Test handling of corrupted fingerprint files.""" # Create corrupted fingerprint file fingerprint_path = self.cache_manager._get_fingerprint_path("parsing") fingerprint_path.parent.mkdir(parents=True, exist_ok=True) with open(fingerprint_path, "w") as f: f.write("invalid json content") # Should handle gracefully changes = self.cache_manager.detect_config_changes() assert changes["parsing"] is True # Should treat as changed def test_cache_validation_methods(self): """Test cache validation methods.""" # Initially should be invalid (no fingerprints saved) assert not self.cache_manager.is_parsing_cache_valid("test_doc") assert not self.cache_manager.is_chunking_cache_valid("test_doc") assert not self.cache_manager.is_embedding_cache_valid("test_doc") # After saving fingerprints, should be valid self.cache_manager.update_fingerprints() assert self.cache_manager.is_parsing_cache_valid("test_doc") assert self.cache_manager.is_chunking_cache_valid("test_doc") assert self.cache_manager.is_embedding_cache_valid("test_doc") def test_clear_fingerprints(self): """Test clearing fingerprint files.""" # Save fingerprints self.cache_manager.update_fingerprints() # Clear one stage self.cache_manager.clear_stage_fingerprint("parsing") changes = self.cache_manager.detect_config_changes() assert changes["parsing"] is True assert changes["chunking"] is False assert changes["embedding"] is False # Clear all self.cache_manager.clear_all_fingerprints() changes = self.cache_manager.detect_config_changes() assert changes["parsing"] is True assert changes["chunking"] is True assert changes["embedding"] is True class TestServerConfigIntegration: """Test ServerConfig integration with IntelligentCacheManager.""" def setup_method(self): """Set up test fixtures.""" self.temp_dir = Path(tempfile.mkdtemp()) with patch.dict( "os.environ", { "OPENAI_API_KEY": "sk-test-key-123", }, ): self.config = ServerConfig.from_env() self.config.cache_dir = self.temp_dir / "cache" def teardown_method(self): """Clean up test fixtures.""" import shutil if self.temp_dir.exists(): shutil.rmtree(self.temp_dir) def test_config_step_specific_methods(self): """Test that ServerConfig step-specific methods work.""" parsing_fp = self.config.get_parsing_fingerprint() chunking_fp = self.config.get_chunking_fingerprint() embedding_fp = self.config.get_embedding_fingerprint() assert isinstance(parsing_fp, str) assert isinstance(chunking_fp, str) assert isinstance(embedding_fp, str) assert len(parsing_fp) == 64 assert len(chunking_fp) == 64 assert len(embedding_fp) == 64 def test_config_change_detection_methods(self): """Test ServerConfig change detection methods.""" changes = self.config.detect_config_changes() assert isinstance(changes, dict) assert "parsing" in changes assert "chunking" in changes assert "embedding" in changes # Test individual change methods assert self.config.has_parsing_config_changed() assert self.config.has_chunking_config_changed() assert self.config.has_embedding_config_changed() def test_backward_compatibility(self): """Test that current methods work correctly.""" # Test that all new methods work parsing_fp = self.config.get_parsing_fingerprint() chunking_fp = self.config.get_chunking_fingerprint() embedding_fp = self.config.get_embedding_fingerprint() assert isinstance(parsing_fp, str) assert isinstance(chunking_fp, str) assert isinstance(embedding_fp, str) assert len(parsing_fp) == 64 assert len(chunking_fp) == 64 assert len(embedding_fp) == 64 def test_intelligent_fingerprint_update(self): """Test the new intelligent fingerprint update method.""" # Should not raise any errors self.config.update_intelligent_fingerprints() # After updating, changes should be False changes = self.config.detect_config_changes() assert not changes["parsing"] assert not changes["chunking"] assert not changes["embedding"] def test_integration_with_real_config(): """Test integration with a real configuration scenario.""" with tempfile.TemporaryDirectory() as temp_dir: cache_dir = Path(temp_dir) / "cache" with patch.dict( "os.environ", { "OPENAI_API_KEY": "sk-test-key-123", "CHUNK_SIZE": "1500", "PDF_PARSER": "mineru", "EMBEDDING_MODEL": "text-embedding-3-small", }, ): config = ServerConfig.from_env() config.cache_dir = cache_dir # Test that all methods work with real config cache_manager = config.get_intelligent_cache_manager() # Get fingerprints cache_manager.get_parsing_fingerprint() cache_manager.get_chunking_fingerprint() cache_manager.get_embedding_fingerprint() # Save fingerprints cache_manager.update_fingerprints() # Verify files were created fingerprints_dir = cache_dir / "metadata" / "fingerprints" assert (fingerprints_dir / "parsing.json").exists() assert (fingerprints_dir / "chunking.json").exists() assert (fingerprints_dir / "embedding.json").exists() # Test change detection changes = cache_manager.detect_config_changes() assert not any(changes.values()) # No changes after saving if __name__ == "__main__": pytest.main([__file__])

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/juanqui/pdfkb-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server