MCPunk

  • tests
from pathlib import Path from mcpunk.file_chunk import Chunk, ChunkCategory def test_chunk_id_consistency() -> None: """Test that identical chunks produce the same ID.""" chunk1 = Chunk( category=ChunkCategory.callable, name="test_func", line=10, content="def test_func():\n return True", ) chunk2 = Chunk( category=ChunkCategory.callable, name="test_func", line=10, content="def test_func():\n return True", ) assert chunk1.id_(None) == chunk2.id_(None) def test_chunk_id_uniqueness() -> None: """Test that different chunks produce different IDs.""" chunk1 = Chunk( category=ChunkCategory.callable, name="func1", line=10, content="def func1():\n return True", ) chunk2 = Chunk( category=ChunkCategory.callable, name="func2", # Different name line=10, content="def func1():\n return True", ) chunk3 = Chunk( category=ChunkCategory.callable, name="func1", line=20, # Different line content="def func1():\n return True", ) chunk4 = Chunk( category=ChunkCategory.callable, name="func1", line=10, content="def func1():\n return False", # Different content ) assert chunk1.id_(None) != chunk2.id_(None) assert chunk1.id_(None) != chunk3.id_(None) assert chunk1.id_(None) != chunk4.id_(None) def test_chunk_id_format() -> None: """Test that the ID follows the expected format: name_hash.""" chunk = Chunk( category=ChunkCategory.callable, name="test_func", line=10, content="def test_func():\n return True", ) chunk_id = chunk.id_(None) # ID should start with the chunk name followed by underscore assert chunk_id.startswith("test_func_") # The rest should be a numeric hash hash_part = chunk_id[len("test_func_") :] assert hash_part.isdigit() or all(c in "0123456789abcdefABCDEF-" for c in hash_part) def test_chunk_id_with_path() -> None: """Test that including a path affects the ID.""" chunk = Chunk( category=ChunkCategory.callable, name="test_func", line=10, content="def test_func():\n return True", ) # Get ID without path id_without_path = chunk.id_(None) # Get ID with different paths path1 = Path("/path/to/file1.py") path2 = Path("/path/to/file2.py") id_with_path1 = chunk.id_(path1) id_with_path1_again = chunk.id_(path1) id_with_path2 = chunk.id_(path2) # Same path should give same ID assert id_with_path1 == id_with_path1_again # Different paths should give different IDs assert id_with_path1 != id_with_path2 # Path vs no path should give different IDs assert id_without_path != id_with_path1 def test_chunk_id_path_consistency() -> None: """Test that identical chunks with identical paths produce the same ID.""" path = Path("/some/path/file.py") chunk1 = Chunk( category=ChunkCategory.callable, name="test_func", line=10, content="def test_func():\n return True", ) chunk2 = Chunk( category=ChunkCategory.callable, name="test_func", line=10, content="def test_func():\n return True", ) assert chunk1.id_(path) == chunk2.id_(path) def test_chunk_matches_filter_string_on_name() -> None: """Test that a string filter matches when present in chunk name.""" chunk = Chunk( category=ChunkCategory.callable, name="test_function", line=1, content="def test_function():\n pass", ) assert chunk.matches_filter("function", "name") is True assert chunk.matches_filter("test", "name") is True assert chunk.matches_filter("test_function", "name") is True def test_chunk_matches_filter_list_on_name() -> None: """Test that a list filter matches when any element is in chunk name.""" chunk = Chunk( category=ChunkCategory.callable, name="test_function", line=1, content="def test_function():\n pass", ) assert chunk.matches_filter(["func", "other"], "name") is True assert chunk.matches_filter(["test", "xyz"], "name") is True assert chunk.matches_filter(["unrelated", "test_function"], "name") is True def test_chunk_matches_filter_none() -> None: """Test that None filter matches all chunks.""" chunk = Chunk( category=ChunkCategory.callable, name="test_function", line=1, content="def test_function():\n pass", ) assert chunk.matches_filter(None, "name") is True assert chunk.matches_filter(None, "content") is True assert chunk.matches_filter(None, "name_or_content") is True def test_chunk_matches_filter_on_content() -> None: """Test filtering on content.""" chunk = Chunk( category=ChunkCategory.callable, name="test_function", line=1, content="def test_function():\n return 'hello world'", ) assert chunk.matches_filter("hello", "content") is True assert chunk.matches_filter("return", "content") is True assert chunk.matches_filter(["world", "planet"], "content") is True assert chunk.matches_filter("not_present", "content") is False def test_chunk_matches_filter_on_name_or_content() -> None: """Test filtering on both name and content combined.""" chunk = Chunk( category=ChunkCategory.callable, name="example_function", line=1, content="def test_function():\n return 'hello'", ) assert chunk.matches_filter("example", "name_or_content") is True assert chunk.matches_filter("hello", "name_or_content") is True assert chunk.matches_filter("function", "name_or_content") is True def test_chunk_matches_filter_non_matching() -> None: """Test various non-matching cases.""" chunk = Chunk( category=ChunkCategory.callable, name="test_function", line=1, content="def test_function():\n pass", ) assert chunk.matches_filter("missing", "name") is False assert chunk.matches_filter(["absent", "not_here"], "name") is False assert chunk.matches_filter("return", "content") is False assert chunk.matches_filter("missing", "name_or_content") is False def test_chunk_split_small_chunk_not_split() -> None: """Test that small chunks (below max_size) aren't split.""" chunk = Chunk( category=ChunkCategory.callable, name="small_func", line=1, content="Small content that is definitely below default max_size", ) result = chunk.split() assert len(result) == 1 assert result[0] is chunk # It should return the original object, not a copy def test_chunk_split_at_line_boundaries() -> None: """Test that chunks are split at line boundaries when possible.""" # Create multi-line content where each line is below max_line_size lines = [f"Line {i}" + "x" * 50 for i in range(20)] content = "\n".join(lines) chunk = Chunk(category=ChunkCategory.callable, name="multi_line_func", line=1, content=content) # Choose a max_size that will require splitting but allow multiple lines per chunk max_size = 300 result = chunk.split(max_size=max_size, split_chunk_prefix="blah") assert len(result) > 1 for chunk_idx, r in enumerate(result): assert len(r.content) <= max_size, f"Chunk {chunk_idx} exceeds max_size" prefix_len = len("blah") # Verify splits occur at line boundaries for i in range(len(result) - 1): # Check all but the last chunk chunk_content = result[i].content[prefix_len:] # Each non-final chunk should end with a newline assert chunk_content.endswith("\n"), f"Chunk {i} doesn't end at a line boundary" # Verify no line is split across chunks (by checking each original line is fully in one chunk) for line in lines: # Count how many chunks contain this exact line (should be exactly 1) line_with_newline = line + "\n" found_in_chunks = 0 for r in result: chunk_content = r.content[prefix_len:] if line_with_newline in chunk_content: found_in_chunks += 1 # The last line doesn't have a newline if line == lines[-1]: line_no_newline = line for r in result: chunk_content = r.content[prefix_len:] if chunk_content.endswith(line_no_newline): found_in_chunks += 1 assert found_in_chunks == 1 # The combined content (without prefixes) should match original content reconstructed = "" for r in result: reconstructed += r.content[prefix_len:] assert reconstructed == content def test_chunk_split_very_long_single_line() -> None: """Test that very long single lines are split correctly.""" long_line = "x" * 5000 # Single line, no newlines chunk = Chunk(category=ChunkCategory.callable, name="long_line_func", line=1, content=long_line) max_size = 1000 result = chunk.split(max_size=max_size, split_chunk_prefix="blah") assert len(result) > 1, "Long line should be split into multiple chunks" for chunk_idx, r in enumerate(result): assert len(r.content) <= max_size, f"Chunk {chunk_idx} exceeds max_size" # The prefix length for first and subsequent chunks prefix_len = len("blah") # Reconstruct original content without prefixes reconstructed = "" for r in result: reconstructed += r.content[prefix_len:] assert reconstructed == long_line, "Reconstructed content doesn't match original" def test_chunk_split_naming_convention() -> None: """Test that split chunks follow the expected naming convention.""" chunk = Chunk( category=ChunkCategory.callable, name="original_name", line=1, content="\n".join(["x" * 100 for _ in range(10)]), # Content that will be split ) result = chunk.split(max_size=200) assert len(result) > 1, "Content should be split into multiple chunks" # Check naming pattern: original_name_part1, original_name_part2, etc. for i, r in enumerate(result, 1): assert r.name == f"original_name_part{i}", f"Incorrect name for chunk {i}" # Verify other properties are maintained or adjusted as expected for r in result: assert r.category == chunk.category, "Category should be preserved" assert r.line is None, "Line number should be None for split chunks" def test_chunk_split_custom_prefix() -> None: """Test that custom prefixes are applied correctly to split chunks.""" chunk = Chunk( category=ChunkCategory.callable, name="test_func", line=1, content="\n".join(["x" * 100 for _ in range(10)]), ) # Test with custom prefix custom_prefix = "CUSTOM PREFIX: " result = chunk.split(max_size=200, split_chunk_prefix=custom_prefix) assert len(result) > 1, "Content should be split into multiple chunks" # Each chunk should start with the custom prefix for chunk_idx, r in enumerate(result): assert r.content.startswith(custom_prefix), f"Chunk {chunk_idx} doesn't have custom prefix" # Test with empty prefix empty_result = chunk.split(max_size=200, split_chunk_prefix="") # Chunks should start with content, not the default prefix for chunk_idx, r in enumerate(empty_result): assert r.content.startswith("x"), f"Chunk {chunk_idx} doesn't start with expected content" assert not r.content.startswith( "[This is", ), "Default prefix was used despite empty custom prefix" def test_chunk_split_content_preservation() -> None: """Test that splitting preserves all original content.""" # Create content with distinct lines for easier verification lines = [f"Line {i} with unique content" for i in range(20)] content = "\n".join(lines) chunk = Chunk(category=ChunkCategory.callable, name="test_func", line=1, content=content) # Use empty prefix to simplify content reconstruction result = chunk.split(max_size=300, split_chunk_prefix="") combined = "".join(r.content for r in result) # Should exactly match original content assert combined == content, "Content was not preserved during splitting" # Verify specific lines are preserved for i, line in enumerate(lines): assert line in combined, f"Line {i} is missing from reconstructed content" def test_chunk_split_empty_content() -> None: """Test that empty content is handled correctly.""" chunk = Chunk(category=ChunkCategory.callable, name="empty_func", line=1, content="") result = chunk.split() assert len(result) == 1, "Empty content should result in a single chunk" assert result[0] is chunk, "Should return the original chunk for empty content" def test_chunk_split_exactly_at_max_size() -> None: """Test content that is exactly at the max_size limit.""" exact_content = "x" * 1000 chunk = Chunk( category=ChunkCategory.callable, name="exact_size_func", line=1, content=exact_content, ) result = chunk.split(max_size=1000) assert len(result) == 1, "Content exactly at max_size should not be split" assert result[0] is chunk, "Should return the original chunk when exactly at max_size"