Skip to main content
Glama
test_payload_integration.py17.7 kB
# tests/db/test_payload_integration.py # Copyright © 2025 Dr.-Ing. Paul Wilhelm <paul@wilhelm.dev> # This file is part of Archive Agent. See LICENSE for details. import logging from unittest.mock import Mock from qdrant_client.models import ScoredPoint from archive_agent.db.QdrantSchema import QdrantPayload, parse_payload from archive_agent.util.format import get_point_page_line_info, get_point_reference_info from archive_agent.ai.query.AiQuery import AiQuery logger = logging.getLogger(__name__) class TestPayloadIntegrationFormatUtils: """Test payload integration with format utilities.""" @staticmethod def create_mock_point(payload_dict: dict) -> Mock: """Helper to create a mock ScoredPoint with payload.""" point = Mock(spec=ScoredPoint) point.payload = payload_dict point.score = 0.85 return point def test_get_point_page_line_info_with_page_range(self): """Test get_point_page_line_info with page_range payload.""" payload_dict = { "file_path": "/home/user/document.pdf", "file_mtime": 1640995200.0, "chunk_index": 0, "chunks_total": 5, "chunk_text": "PDF content from pages 2-4.", "version": "v5.1.0", "page_range": [2, 4], "line_range": None } point = self.create_mock_point(payload_dict) result = get_point_page_line_info(point) assert result == "pages 2–4" def test_get_point_page_line_info_with_single_page(self): """Test get_point_page_line_info with single page.""" payload_dict = { "file_path": "/home/user/document.pdf", "file_mtime": 1640995200.0, "chunk_index": 0, "chunks_total": 5, "chunk_text": "PDF content from page 7.", "version": "v5.1.0", "page_range": [7], "line_range": None } point = self.create_mock_point(payload_dict) result = get_point_page_line_info(point) assert result == "page 7" def test_get_point_page_line_info_with_line_range(self): """Test get_point_page_line_info with line_range payload.""" payload_dict = { "file_path": "/home/user/document.txt", "file_mtime": 1640995200.0, "chunk_index": 2, "chunks_total": 8, "chunk_text": "Text content from lines 15-20.", "version": "v5.1.0", "page_range": None, "line_range": [15, 20] } point = self.create_mock_point(payload_dict) result = get_point_page_line_info(point) assert result == "lines 15–20" def test_get_point_page_line_info_with_single_line(self): """Test get_point_page_line_info with single line.""" payload_dict = { "file_path": "/home/user/document.txt", "file_mtime": 1640995200.0, "chunk_index": 2, "chunks_total": 8, "chunk_text": "Text content from line 42.", "version": "v5.1.0", "page_range": None, "line_range": [42] } point = self.create_mock_point(payload_dict) result = get_point_page_line_info(point) assert result == "line 42" def test_get_point_page_line_info_no_ranges(self): """Test get_point_page_line_info with no ranges (legacy payload).""" legacy_payload_dict = { "file_path": "/home/user/document.txt", "file_mtime": 1640995200.0, "chunk_index": 2, "chunks_total": 8, "chunk_text": "Legacy chunk content.", # No version, page_range, line_range fields } point = self.create_mock_point(legacy_payload_dict) result = get_point_page_line_info(point) assert result is None def test_get_point_page_line_info_empty_ranges(self): """Test get_point_page_line_info with empty ranges.""" payload_dict = { "file_path": "/home/user/document.txt", "file_mtime": 1640995200.0, "chunk_index": 2, "chunks_total": 8, "chunk_text": "Chunk with empty ranges.", "version": "v5.1.0", "page_range": [], # Empty "line_range": None } point = self.create_mock_point(payload_dict) result = get_point_page_line_info(point) assert result is None def test_get_point_reference_info_with_pages(self): """Test get_point_reference_info with page-based document.""" payload_dict = { "file_path": "/home/user/My Document.pdf", "file_mtime": 1640995200.0, "chunk_index": 3, "chunks_total": 10, "chunk_text": "Chunk from PDF pages.", "version": "v5.1.0", "page_range": [5, 7], "line_range": None } point = self.create_mock_point(payload_dict) result = get_point_reference_info(logger=logger, point=point, verbose=True) # Should contain escaped file path and page info assert "file:///home/user/My%20Document.pdf" in result assert "pages 5–7" in result def test_get_point_reference_info_with_lines(self): """Test get_point_reference_info with line-based document.""" payload_dict = { "file_path": "/home/user/notes.txt", "file_mtime": 1640995200.0, "chunk_index": 1, "chunks_total": 4, "chunk_text": "Chunk from text lines.", "version": "v5.1.0", "page_range": None, "line_range": [10, 15] } point = self.create_mock_point(payload_dict) result = get_point_reference_info(logger=logger, point=point, verbose=True) assert "file:///home/user/notes.txt" in result assert "lines 10–15" in result def test_get_point_reference_info_verbose_mode(self): """Test get_point_reference_info with verbose=True.""" payload_dict = { "file_path": "/home/user/document.pdf", "file_mtime": 1640995200.0, "chunk_index": 2, "chunks_total": 10, "chunk_text": "Test chunk content.", "version": "v5.1.0", "page_range": [3, 5], "line_range": None } point = self.create_mock_point(payload_dict) result = get_point_reference_info(logger=logger, point=point, verbose=True) # Verbose mode should include chunk info and timestamp assert "chunk 3/10" in result assert "pages 3–5" in result assert "2022-01-01" in result # Formatted timestamp def test_get_point_reference_info_legacy_payload(self): """Test get_point_reference_info with legacy payload (no ranges).""" legacy_payload_dict = { "file_path": "/home/user/old_document.txt", "file_mtime": 1640995200.0, "chunk_index": 0, "chunks_total": 3, "chunk_text": "Legacy chunk without ranges.", } point = self.create_mock_point(legacy_payload_dict) result = get_point_reference_info(logger=logger, point=point, verbose=True) # Should fall back to chunk info only assert "file:///home/user/old_document.txt" in result assert "chunk 1/3" in result class TestPayloadIntegrationAiQuery: """Test payload integration with AiQuery utilities.""" @staticmethod def create_mock_point(payload_dict: dict) -> Mock: """Helper to create a mock ScoredPoint with payload.""" point = Mock(spec=ScoredPoint) point.payload = payload_dict point.score = 0.85 return point def test_get_point_hash_with_all_fields(self): """Test AiQuery.get_point_hash with complete payload.""" payload_dict = { "file_path": "/home/user/test.pdf", "file_mtime": 1640995200.0, "chunk_index": 5, "chunks_total": 10, "chunk_text": "Test chunk content for hashing.", "version": "v5.1.0", "page_range": [2, 4], "line_range": None } point = self.create_mock_point(payload_dict) hash_result = AiQuery.get_point_hash(point) # Should be 16-character hex string assert isinstance(hash_result, str) assert len(hash_result) == 16 assert all(c in '0123456789abcdef' for c in hash_result) def test_get_point_hash_with_legacy_payload(self): """Test AiQuery.get_point_hash with legacy payload (no optional fields).""" legacy_payload_dict = { "file_path": "/home/user/legacy.txt", "file_mtime": 1609459200.0, # Different timestamp "chunk_index": 2, "chunks_total": 6, "chunk_text": "Legacy chunk content.", # No version, page_range, line_range } point = self.create_mock_point(legacy_payload_dict) hash_result = AiQuery.get_point_hash(point) # Should still work and produce valid hash assert isinstance(hash_result, str) assert len(hash_result) == 16 assert all(c in '0123456789abcdef' for c in hash_result) def test_get_point_hash_consistency(self): """Test that identical payloads produce identical hashes.""" payload_dict = { "file_path": "/home/user/consistent.txt", "file_mtime": 1640995200.0, "chunk_index": 3, "chunks_total": 7, "chunk_text": "Consistent content.", "version": "v1.0.0", "page_range": None, "line_range": [10, 15] } point1 = self.create_mock_point(payload_dict.copy()) point2 = self.create_mock_point(payload_dict.copy()) hash1 = AiQuery.get_point_hash(point1) hash2 = AiQuery.get_point_hash(point2) assert hash1 == hash2 def test_get_point_hash_different_for_different_payloads(self): """Test that different payloads produce different hashes.""" payload_dict1 = { "file_path": "/home/user/file1.txt", "file_mtime": 1640995200.0, "chunk_index": 1, "chunks_total": 5, "chunk_text": "Content 1.", "version": "v1.0.0", "page_range": None, "line_range": [5, 10] } payload_dict2 = { "file_path": "/home/user/file2.txt", # Different file "file_mtime": 1640995200.0, "chunk_index": 1, "chunks_total": 5, "chunk_text": "Content 1.", "version": "v1.0.0", "page_range": None, "line_range": [5, 10] } point1 = self.create_mock_point(payload_dict1) point2 = self.create_mock_point(payload_dict2) hash1 = AiQuery.get_point_hash(point1) hash2 = AiQuery.get_point_hash(point2) assert hash1 != hash2 def test_get_context_from_points(self): """Test AiQuery.get_context_from_points with multiple points.""" payload1 = { "file_path": "/home/user/doc1.txt", "file_mtime": 1640995200.0, "chunk_index": 0, "chunks_total": 2, "chunk_text": "First chunk content.", "version": "v1.0.0", "page_range": None, "line_range": [1, 5] } payload2 = { "file_path": "/home/user/doc2.txt", "file_mtime": 1640995200.0, "chunk_index": 1, "chunks_total": 3, "chunk_text": "Second chunk content.", "version": "v1.0.0", "page_range": [2, 3], "line_range": None } point1 = self.create_mock_point(payload1) point2 = self.create_mock_point(payload2) points = [point1, point2] # type: ignore[list-item] # Mock objects for testing context = AiQuery.get_context_from_points(points) # type: ignore[arg-type] # Mock objects for testing # Should contain both chunks with hash separators assert "First chunk content." in context assert "Second chunk content." in context assert "<<<" in context # Hash separator format assert ">>>" in context class TestPayloadIntegrationBackwardCompatibility: """Test backward compatibility with legacy payloads across the system.""" def test_legacy_payload_missing_version_field(self): """Test that legacy payloads without version field work correctly.""" legacy_payload = { "file_path": "/home/user/legacy.txt", "file_mtime": 1609459200.0, "chunk_index": 0, "chunks_total": 1, "chunk_text": "Legacy content without version." # Missing: version, page_range, line_range } # Should parse successfully parsed = parse_payload(legacy_payload) assert parsed.version is None assert parsed.page_range is None assert parsed.line_range is None # Should work with format utilities mock_point = Mock(spec=ScoredPoint) mock_point.payload = legacy_payload mock_point.score = 0.9 reference_info = get_point_reference_info(logger=logger, point=mock_point, verbose=True) assert "chunk 1/1" in reference_info assert "file:///home/user/legacy.txt" in reference_info def test_legacy_payload_missing_range_fields(self): """Test legacy payloads without range fields (pre-v5.0.0).""" pre_v5_payload = { "file_path": "/home/user/pre_v5.txt", "file_mtime": 1577836800.0, # 2020-01-01 "chunk_index": 2, "chunks_total": 5, "chunk_text": "Pre-v5.0.0 chunk content.", "version": "v4.9.0" # Missing: page_range, line_range (added in v5.0.0) } # Should parse successfully with defaults parsed = parse_payload(pre_v5_payload) assert parsed.version == "v4.9.0" assert parsed.page_range is None assert parsed.line_range is None # Should work with all utilities mock_point = Mock(spec=ScoredPoint) mock_point.payload = pre_v5_payload mock_point.score = 0.8 # Format utilities should handle gracefully page_line_info = get_point_page_line_info(mock_point) assert page_line_info is None # No ranges available reference_info = get_point_reference_info(logger=logger, point=mock_point, verbose=True) assert "chunk 3/5" in reference_info # Falls back to chunk info # AiQuery should work hash_result = AiQuery.get_point_hash(mock_point) assert isinstance(hash_result, str) assert len(hash_result) == 16 def test_payload_evolution_compatibility(self): """Test that payloads can evolve while maintaining compatibility.""" # Simulate different payload versions payloads = [ # Very old payload (minimal fields) { "file_path": "/home/user/v1.txt", "file_mtime": 1546300800.0, # 2019-01-01 "chunk_index": 0, "chunks_total": 1, "chunk_text": "Very old chunk." }, # Pre-v5.0.0 payload (has version but no ranges) { "file_path": "/home/user/v4.txt", "file_mtime": 1577836800.0, # 2020-01-01 "chunk_index": 0, "chunks_total": 1, "chunk_text": "Pre-v5 chunk.", "version": "v4.9.0" }, # v5.0.0+ payload (has ranges) { "file_path": "/home/user/v5.txt", "file_mtime": 1609459200.0, # 2021-01-01 "chunk_index": 0, "chunks_total": 1, "chunk_text": "v5+ chunk with ranges.", "version": "v5.1.0", "page_range": None, "line_range": [1, 10] }, # Current payload (all fields) { "file_path": "/home/user/current.txt", "file_mtime": 1640995200.0, # 2022-01-01 "chunk_index": 0, "chunks_total": 1, "chunk_text": "Current chunk with all fields.", "version": "v7.4.0", "page_range": None, "line_range": [1, 15] } ] # All payloads should parse successfully for i, payload_dict in enumerate(payloads): parsed = parse_payload(payload_dict) assert isinstance(parsed, QdrantPayload) # Verify the payload has the expected file path pattern if i == 0: assert "v1" in parsed.file_path elif i == 1: assert "v4" in parsed.file_path # This is the v4.txt file elif i == 2: assert "v5" in parsed.file_path else: assert "current" in parsed.file_path # All should work with format utilities mock_point = Mock(spec=ScoredPoint) mock_point.payload = payload_dict mock_point.score = 0.7 reference_info = get_point_reference_info(logger=logger, point=mock_point, verbose=True) assert isinstance(reference_info, str) assert len(reference_info) > 0 hash_result = AiQuery.get_point_hash(mock_point) assert isinstance(hash_result, str) assert len(hash_result) == 16

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/shredEngineer/Archive-Agent'

If you have feedback or need assistance with the MCP directory API, please join our Discord server