"""Parser edge case integration tests."""
import tempfile
import os
import pytest
from src.tools import DiffChunkTools
class TestParserEdgeCases:
"""Test diff parser edge cases and error scenarios."""
@pytest.fixture
def tools(self):
"""Return DiffChunkTools instance."""
return DiffChunkTools()
def test_parse_malformed_diff_file(self, tools):
"""Test parsing malformed diff content."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f:
# Write malformed diff content
f.write("This is not a valid diff file\n")
f.write("Random content without diff headers\n")
f.write("More random text\n")
temp_file = f.name
try:
# Should handle malformed diff - may raise error or return empty result
try:
result = tools.load_diff(temp_file)
# If it succeeds, should have no chunks
assert result["chunks"] == 0
assert result["files"] == 0
except ValueError:
# If it raises an error for malformed diff, that's acceptable
pass
finally:
os.unlink(temp_file)
def test_parse_diff_with_binary_files(self, tools):
"""Test parsing diff with binary file indicators."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f:
f.write("diff --git a/image.png b/image.png\n")
f.write("index 1234567..abcdefg 100644\n")
f.write("Binary files a/image.png and b/image.png differ\n")
f.write("\n")
f.write("diff --git a/text.txt b/text.txt\n")
f.write("index 2345678..bcdefgh 100644\n")
f.write("--- a/text.txt\n")
f.write("+++ b/text.txt\n")
f.write("@@ -1 +1 @@\n")
f.write("-old line\n")
f.write("+new line\n")
temp_file = f.name
try:
result = tools.load_diff(temp_file)
# Should parse successfully and include binary file
assert result["chunks"] > 0
assert result["files"] >= 1 # At least the text file, maybe binary too
finally:
os.unlink(temp_file)
def test_parse_diff_with_unicode_content(self, tools):
"""Test parsing diff with unicode characters."""
with tempfile.NamedTemporaryFile(
mode="w", delete=False, suffix=".diff", encoding="utf-8"
) as f:
f.write("diff --git a/unicode.txt b/unicode.txt\n")
f.write("index 1234567..abcdefg 100644\n")
f.write("--- a/unicode.txt\n")
f.write("+++ b/unicode.txt\n")
f.write("@@ -1,2 +1,2 @@\n")
f.write("-Hello δΈη\n")
f.write("+Hello π\n")
f.write(" Regular line\n")
temp_file = f.name
try:
result = tools.load_diff(temp_file)
# Should parse unicode content successfully
assert result["chunks"] > 0
assert result["files"] >= 1
# Get chunk content and verify unicode handling
chunk_content = tools.get_chunk(temp_file, 1)
assert isinstance(chunk_content, str)
finally:
os.unlink(temp_file)
def test_parse_diff_with_special_filenames(self, tools):
"""Test parsing diff with filenames containing spaces and special chars."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f:
f.write('diff --git "a/file with spaces.txt" "b/file with spaces.txt"\n')
f.write("index 1234567..abcdefg 100644\n")
f.write('--- "a/file with spaces.txt"\n')
f.write('+++ "b/file with spaces.txt"\n')
f.write("@@ -1 +1 @@\n")
f.write("-old content\n")
f.write("+new content\n")
f.write("\n")
f.write("diff --git a/file-with-dashes.js b/file-with-dashes.js\n")
f.write("index 2345678..bcdefgh 100644\n")
f.write("--- a/file-with-dashes.js\n")
f.write("+++ b/file-with-dashes.js\n")
f.write("@@ -1 +1 @@\n")
f.write("-console.log('old');\n")
f.write("+console.log('new');\n")
temp_file = f.name
try:
result = tools.load_diff(temp_file)
# Should parse files - exact behavior may vary
assert result["chunks"] >= 0
# If we have chunks, test pattern matching
if result["chunks"] > 0:
js_chunks = tools.find_chunks_for_files(temp_file, "*.js")
txt_chunks = tools.find_chunks_for_files(temp_file, "*.txt")
assert isinstance(js_chunks, list)
assert isinstance(txt_chunks, list)
finally:
os.unlink(temp_file)
def test_parse_diff_with_complex_patterns(self, tools):
"""Test include/exclude pattern edge cases."""
# Simplify this test - just verify pattern functionality works
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f:
# Simple diff with a few file types
f.write("diff --git a/main.py b/main.py\n")
f.write("index 1234567..abcdefg 100644\n")
f.write("--- a/main.py\n")
f.write("+++ b/main.py\n")
f.write("@@ -1 +1 @@\n")
f.write("-old line\n")
f.write("+new line\n")
f.write("\n")
f.write("diff --git a/config.json b/config.json\n")
f.write("index 2345678..bcdefgh 100644\n")
f.write("--- a/config.json\n")
f.write("+++ b/config.json\n")
f.write("@@ -1 +1 @@\n")
f.write("-old config\n")
f.write("+new config\n")
temp_file = f.name
try:
# Test basic pattern functionality
result = tools.load_diff(temp_file, include_patterns="*.py")
# Should include only Python files
assert result["files"] >= 0 # May be 0 if filtered, that's okay
finally:
os.unlink(temp_file)
def test_parse_diff_with_no_changes(self, tools):
"""Test parsing diff with file headers but no actual changes."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f:
f.write("diff --git a/unchanged.txt b/unchanged.txt\n")
f.write("index 1234567..1234567 100644\n")
f.write("--- a/unchanged.txt\n")
f.write("+++ b/unchanged.txt\n")
# No hunks - file appears in diff but has no changes
temp_file = f.name
try:
# Should handle files with no changes - may error or return empty result
try:
result = tools.load_diff(temp_file)
assert result["chunks"] >= 0
except ValueError:
# If it raises an error for no changes, that's acceptable
pass
finally:
os.unlink(temp_file)
def test_parse_very_large_diff_hunk(self, tools):
"""Test parsing diff with very large hunks."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f:
f.write("diff --git a/large.txt b/large.txt\n")
f.write("index 1234567..abcdefg 100644\n")
f.write("--- a/large.txt\n")
f.write("+++ b/large.txt\n")
f.write("@@ -1,1000 +1,1000 @@\n")
# Write 1000 lines of changes
for i in range(1000):
if i % 2 == 0:
f.write(f"-old line {i}\n")
f.write(f"+new line {i}\n")
else:
f.write(f" unchanged line {i}\n")
temp_file = f.name
try:
result = tools.load_diff(temp_file, max_chunk_lines=500)
# Should handle large hunk and potentially split into multiple chunks
assert result["chunks"] > 0
assert result["files"] >= 1
assert result["total_lines"] > 500
finally:
os.unlink(temp_file)
def test_parse_diff_with_generated_file_patterns(self, tools):
"""Test skip_generated functionality with common patterns."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f:
# Files that should be skipped when skip_generated=True
generated_files = [
"package-lock.json",
"yarn.lock",
"composer.lock",
"Pipfile.lock",
"go.sum",
"Cargo.lock",
"dist/bundle.js",
"build/output.css",
"node_modules/dep/file.js",
".DS_Store",
]
# Regular files that should not be skipped
regular_files = ["src/main.py", "package.json", "README.md"]
all_files = generated_files + regular_files
for i, filename in enumerate(all_files):
f.write(f"diff --git a/{filename} b/{filename}\n")
f.write(f"index {i:07d}..{i + 1:07d} 100644\n")
f.write(f"--- a/{filename}\n")
f.write(f"+++ b/{filename}\n")
f.write("@@ -1 +1 @@\n")
f.write(f"-old content {i}\n")
f.write(f"+new content {i}\n")
f.write("\n")
temp_file = f.name
try:
# Test with skip_generated=True (default)
result_filtered = tools.load_diff(temp_file, skip_generated=True)
# Test with skip_generated=False
result_unfiltered = tools.load_diff(temp_file, skip_generated=False)
# Filtered should have fewer files than unfiltered
assert result_filtered["files"] <= result_unfiltered["files"]
assert result_filtered["files"] >= len(regular_files)
finally:
os.unlink(temp_file)
def test_parse_diff_with_whitespace_only_changes(self, tools):
"""Test skip_trivial functionality."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f:
# File with only whitespace changes
f.write("diff --git a/whitespace.txt b/whitespace.txt\n")
f.write("index 1234567..abcdefg 100644\n")
f.write("--- a/whitespace.txt\n")
f.write("+++ b/whitespace.txt\n")
f.write("@@ -1,3 +1,3 @@\n")
f.write("-line1 \n") # trailing spaces
f.write("+line1\n") # no trailing spaces
f.write("- line2\n") # leading spaces
f.write("+\tline2\n") # tab instead
f.write(" line3\n") # unchanged
f.write("\n")
# File with substantial changes
f.write("diff --git a/substantial.txt b/substantial.txt\n")
f.write("index 2345678..bcdefgh 100644\n")
f.write("--- a/substantial.txt\n")
f.write("+++ b/substantial.txt\n")
f.write("@@ -1 +1 @@\n")
f.write("-old content\n")
f.write("+completely different content\n")
temp_file = f.name
try:
# Test with skip_trivial=True (default)
result_filtered = tools.load_diff(temp_file, skip_trivial=True)
# Test with skip_trivial=False
result_unfiltered = tools.load_diff(temp_file, skip_trivial=False)
# Both should have the substantial file
assert result_filtered["files"] >= 1
assert result_unfiltered["files"] >= 1
# Unfiltered might have more files (including whitespace-only)
assert result_unfiltered["files"] >= result_filtered["files"]
finally:
os.unlink(temp_file)
def test_parse_corrupted_diff_encoding(self, tools):
"""Test handling of files with encoding issues."""
with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".diff") as f:
# Write some valid UTF-8
f.write(b"diff --git a/test.txt b/test.txt\n")
f.write(b"index 1234567..abcdefg 100644\n")
f.write(b"--- a/test.txt\n")
f.write(b"+++ b/test.txt\n")
f.write(b"@@ -1 +1 @@\n")
f.write(b"-old line\n")
# Insert some invalid UTF-8 bytes
f.write(b"+new line with \xff\xfe invalid bytes\n")
temp_file = f.name
try:
# Should handle encoding issues gracefully
result = tools.load_diff(temp_file)
# May succeed with some content or handle the error gracefully
assert result["chunks"] >= 0
finally:
os.unlink(temp_file)
def test_parse_diff_missing_file_headers(self, tools):
"""Test parsing diff with missing or malformed file headers."""
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".diff") as f:
# Valid diff start but missing some headers
f.write("diff --git a/incomplete.txt b/incomplete.txt\n")
f.write("index 1234567..abcdefg 100644\n")
# Missing --- and +++ lines
f.write("@@ -1 +1 @@\n")
f.write("-old line\n")
f.write("+new line\n")
temp_file = f.name
try:
# Should handle incomplete headers gracefully
result = tools.load_diff(temp_file)
# May parse successfully or skip malformed sections
assert result["chunks"] >= 0
finally:
os.unlink(temp_file)