diffchunk

Overview Schema Related Servers Score Discussions

diffchunk
tests

test_parser_edge_cases.py•13.4 KiB

"""Parser edge case integration tests.""" import tempfile import os import pytest from src.tools import DiffChunkTools class TestParserEdgeCases: """Test diff parser edge cases and error scenarios.""" @pytest.fixture def tools(self): """Return DiffChunkTools instance.""" return DiffChunkTools() def test_parse_malformed_diff_file(self, tools): """Test parsing malformed diff content.""" with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f: # Write malformed diff content f.write("This is not a valid diff file\n") f.write("Random content without diff headers\n") f.write("More random text\n") temp_file = f.name try: # Should handle malformed diff - may raise error or return empty result try: result = tools.load_diff(temp_file) # If it succeeds, should have no chunks assert result["chunks"] == 0 assert result["files"] == 0 except ValueError: # If it raises an error for malformed diff, that's acceptable pass finally: os.unlink(temp_file) def test_parse_diff_with_binary_files(self, tools): """Test parsing diff with binary file indicators.""" with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f: f.write("diff --git a/image.png b/image.png\n") f.write("index 1234567..abcdefg 100644\n") f.write("Binary files a/image.png and b/image.png differ\n") f.write("\n") f.write("diff --git a/text.txt b/text.txt\n") f.write("index 2345678..bcdefgh 100644\n") f.write("--- a/text.txt\n") f.write("+++ b/text.txt\n") f.write("@@ -1 +1 @@\n") f.write("-old line\n") f.write("+new line\n") temp_file = f.name try: result = tools.load_diff(temp_file) # Should parse successfully and include binary file assert result["chunks"] > 0 assert result["files"] >= 1 # At least the text file, maybe binary too finally: os.unlink(temp_file) def test_parse_diff_with_unicode_content(self, tools): """Test parsing diff with unicode characters.""" with tempfile.NamedTemporaryFile( mode="w", delete=False, suffix=".diff", encoding="utf-8" ) as f: f.write("diff --git a/unicode.txt b/unicode.txt\n") f.write("index 1234567..abcdefg 100644\n") f.write("--- a/unicode.txt\n") f.write("+++ b/unicode.txt\n") f.write("@@ -1,2 +1,2 @@\n") f.write("-Hello 世界\n") f.write("+Hello 🌍\n") f.write(" Regular line\n") temp_file = f.name try: result = tools.load_diff(temp_file) # Should parse unicode content successfully assert result["chunks"] > 0 assert result["files"] >= 1 # Get chunk content and verify unicode handling chunk_content = tools.get_chunk(temp_file, 1) assert isinstance(chunk_content, str) finally: os.unlink(temp_file) def test_parse_diff_with_special_filenames(self, tools): """Test parsing diff with filenames containing spaces and special chars.""" with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f: f.write('diff --git "a/file with spaces.txt" "b/file with spaces.txt"\n') f.write("index 1234567..abcdefg 100644\n") f.write('--- "a/file with spaces.txt"\n') f.write('+++ "b/file with spaces.txt"\n') f.write("@@ -1 +1 @@\n") f.write("-old content\n") f.write("+new content\n") f.write("\n") f.write("diff --git a/file-with-dashes.js b/file-with-dashes.js\n") f.write("index 2345678..bcdefgh 100644\n") f.write("--- a/file-with-dashes.js\n") f.write("+++ b/file-with-dashes.js\n") f.write("@@ -1 +1 @@\n") f.write("-console.log('old');\n") f.write("+console.log('new');\n") temp_file = f.name try: result = tools.load_diff(temp_file) # Should parse files - exact behavior may vary assert result["chunks"] >= 0 # If we have chunks, test pattern matching if result["chunks"] > 0: js_chunks = tools.find_chunks_for_files(temp_file, "*.js") txt_chunks = tools.find_chunks_for_files(temp_file, "*.txt") assert isinstance(js_chunks, list) assert isinstance(txt_chunks, list) finally: os.unlink(temp_file) def test_parse_diff_with_complex_patterns(self, tools): """Test include/exclude pattern edge cases.""" # Simplify this test - just verify pattern functionality works with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f: # Simple diff with a few file types f.write("diff --git a/main.py b/main.py\n") f.write("index 1234567..abcdefg 100644\n") f.write("--- a/main.py\n") f.write("+++ b/main.py\n") f.write("@@ -1 +1 @@\n") f.write("-old line\n") f.write("+new line\n") f.write("\n") f.write("diff --git a/config.json b/config.json\n") f.write("index 2345678..bcdefgh 100644\n") f.write("--- a/config.json\n") f.write("+++ b/config.json\n") f.write("@@ -1 +1 @@\n") f.write("-old config\n") f.write("+new config\n") temp_file = f.name try: # Test basic pattern functionality result = tools.load_diff(temp_file, include_patterns="*.py") # Should include only Python files assert result["files"] >= 0 # May be 0 if filtered, that's okay finally: os.unlink(temp_file) def test_parse_diff_with_no_changes(self, tools): """Test parsing diff with file headers but no actual changes.""" with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f: f.write("diff --git a/unchanged.txt b/unchanged.txt\n") f.write("index 1234567..1234567 100644\n") f.write("--- a/unchanged.txt\n") f.write("+++ b/unchanged.txt\n") # No hunks - file appears in diff but has no changes temp_file = f.name try: # Should handle files with no changes - may error or return empty result try: result = tools.load_diff(temp_file) assert result["chunks"] >= 0 except ValueError: # If it raises an error for no changes, that's acceptable pass finally: os.unlink(temp_file) def test_parse_very_large_diff_hunk(self, tools): """Test parsing diff with very large hunks.""" with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f: f.write("diff --git a/large.txt b/large.txt\n") f.write("index 1234567..abcdefg 100644\n") f.write("--- a/large.txt\n") f.write("+++ b/large.txt\n") f.write("@@ -1,1000 +1,1000 @@\n") # Write 1000 lines of changes for i in range(1000): if i % 2 == 0: f.write(f"-old line {i}\n") f.write(f"+new line {i}\n") else: f.write(f" unchanged line {i}\n") temp_file = f.name try: result = tools.load_diff(temp_file, max_chunk_lines=500) # Should handle large hunk and potentially split into multiple chunks assert result["chunks"] > 0 assert result["files"] >= 1 assert result["total_lines"] > 500 finally: os.unlink(temp_file) def test_parse_diff_with_generated_file_patterns(self, tools): """Test skip_generated functionality with common patterns.""" with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f: # Files that should be skipped when skip_generated=True generated_files = [ "package-lock.json", "yarn.lock", "composer.lock", "Pipfile.lock", "go.sum", "Cargo.lock", "dist/bundle.js", "build/output.css", "node_modules/dep/file.js", ".DS_Store", ] # Regular files that should not be skipped regular_files = ["src/main.py", "package.json", "README.md"] all_files = generated_files + regular_files for i, filename in enumerate(all_files): f.write(f"diff --git a/{filename} b/{filename}\n") f.write(f"index {i:07d}..{i + 1:07d} 100644\n") f.write(f"--- a/{filename}\n") f.write(f"+++ b/{filename}\n") f.write("@@ -1 +1 @@\n") f.write(f"-old content {i}\n") f.write(f"+new content {i}\n") f.write("\n") temp_file = f.name try: # Test with skip_generated=True (default) result_filtered = tools.load_diff(temp_file, skip_generated=True) # Test with skip_generated=False result_unfiltered = tools.load_diff(temp_file, skip_generated=False) # Filtered should have fewer files than unfiltered assert result_filtered["files"] <= result_unfiltered["files"] assert result_filtered["files"] >= len(regular_files) finally: os.unlink(temp_file) def test_parse_diff_with_whitespace_only_changes(self, tools): """Test skip_trivial functionality.""" with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f: # File with only whitespace changes f.write("diff --git a/whitespace.txt b/whitespace.txt\n") f.write("index 1234567..abcdefg 100644\n") f.write("--- a/whitespace.txt\n") f.write("+++ b/whitespace.txt\n") f.write("@@ -1,3 +1,3 @@\n") f.write("-line1 \n") # trailing spaces f.write("+line1\n") # no trailing spaces f.write("- line2\n") # leading spaces f.write("+\tline2\n") # tab instead f.write(" line3\n") # unchanged f.write("\n") # File with substantial changes f.write("diff --git a/substantial.txt b/substantial.txt\n") f.write("index 2345678..bcdefgh 100644\n") f.write("--- a/substantial.txt\n") f.write("+++ b/substantial.txt\n") f.write("@@ -1 +1 @@\n") f.write("-old content\n") f.write("+completely different content\n") temp_file = f.name try: # Test with skip_trivial=True (default) result_filtered = tools.load_diff(temp_file, skip_trivial=True) # Test with skip_trivial=False result_unfiltered = tools.load_diff(temp_file, skip_trivial=False) # Both should have the substantial file assert result_filtered["files"] >= 1 assert result_unfiltered["files"] >= 1 # Unfiltered might have more files (including whitespace-only) assert result_unfiltered["files"] >= result_filtered["files"] finally: os.unlink(temp_file) def test_parse_corrupted_diff_encoding(self, tools): """Test handling of files with encoding issues.""" with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".diff") as f: # Write some valid UTF-8 f.write(b"diff --git a/test.txt b/test.txt\n") f.write(b"index 1234567..abcdefg 100644\n") f.write(b"--- a/test.txt\n") f.write(b"+++ b/test.txt\n") f.write(b"@@ -1 +1 @@\n") f.write(b"-old line\n") # Insert some invalid UTF-8 bytes f.write(b"+new line with \xff\xfe invalid bytes\n") temp_file = f.name try: # Should handle encoding issues gracefully result = tools.load_diff(temp_file) # May succeed with some content or handle the error gracefully assert result["chunks"] >= 0 finally: os.unlink(temp_file) def test_parse_diff_missing_file_headers(self, tools): """Test parsing diff with missing or malformed file headers.""" with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f: # Valid diff start but missing some headers f.write("diff --git a/incomplete.txt b/incomplete.txt\n") f.write("index 1234567..abcdefg 100644\n") # Missing --- and +++ lines f.write("@@ -1 +1 @@\n") f.write("-old line\n") f.write("+new line\n") temp_file = f.name try: # Should handle incomplete headers gracefully result = tools.load_diff(temp_file) # May parse successfully or skip malformed sections assert result["chunks"] >= 0 finally: os.unlink(temp_file)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/peteretelej/diffchunk'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_parser_edge_cases.py•13.4 KiB