Deskaid

#!/usr/bin/env python3 """Unit tests for line_endings.py module.""" import os import re import tempfile import unittest from pathlib import Path from codemcp.line_endings import ( apply_line_endings, check_codemcp_toml, check_codemcprc, check_editorconfig, check_gitattributes, get_line_ending_preference, normalize_to_lf, ) class LineEndingsTest(unittest.TestCase): """Test line endings handling functions.""" def test_normalize_to_lf(self): """Test normalizing different line endings to LF.""" # Test CRLF to LF crlf_text = "line1\r\nline2\r\nline3" self.assertEqual(normalize_to_lf(crlf_text), "line1\nline2\nline3") # Test CR to LF cr_text = "line1\rline2\rline3" self.assertEqual(normalize_to_lf(cr_text), "line1\nline2\nline3") # Test mixed line endings mixed_text = "line1\nline2\r\nline3\rline4" self.assertEqual(normalize_to_lf(mixed_text), "line1\nline2\nline3\nline4") # Test already normalized text lf_text = "line1\nline2\nline3" self.assertEqual(normalize_to_lf(lf_text), lf_text) # Test empty text self.assertEqual(normalize_to_lf(""), "") def test_apply_line_endings(self): """Test applying different line endings to content.""" lf_text = "line1\nline2\nline3" # Test applying CRLF self.assertEqual(apply_line_endings(lf_text, "CRLF"), "line1\r\nline2\r\nline3") self.assertEqual(apply_line_endings(lf_text, "\r\n"), "line1\r\nline2\r\nline3") # Test applying LF self.assertEqual(apply_line_endings(lf_text, "LF"), lf_text) self.assertEqual(apply_line_endings(lf_text, "\n"), lf_text) # Test with mixed input (should normalize first) mixed_text = "line1\r\nline2\nline3" self.assertEqual( apply_line_endings(mixed_text, "CRLF"), "line1\r\nline2\r\nline3" ) self.assertEqual(apply_line_endings(mixed_text, "LF"), "line1\nline2\nline3") # Test default behavior (should default to LF) self.assertEqual(apply_line_endings(lf_text, None), lf_text) def test_check_editorconfig(self): """Test reading line ending preference from .editorconfig.""" with tempfile.TemporaryDirectory() as temp_dir: # Create a mock directory structure project_dir = Path(temp_dir) / "project" src_dir = project_dir / "src" src_dir.mkdir(parents=True, exist_ok=True) # Create a test file test_file = src_dir / "test.py" test_file.touch() # Create .editorconfig with LF line endings editorconfig_content = """ root = true [*] charset = utf-8 end_of_line = lf insert_final_newline = true """ with open(project_dir / ".editorconfig", "w") as f: f.write(editorconfig_content) # Test with file that should use LF result = check_editorconfig(str(test_file)) self.assertEqual(result, "LF") # Create .editorconfig with CRLF line endings for .py files editorconfig_content = """ root = true [*] charset = utf-8 end_of_line = lf [*.py] end_of_line = crlf """ with open(project_dir / ".editorconfig", "w") as f: f.write(editorconfig_content) # Test with .py file that should use CRLF result = check_editorconfig(str(test_file)) self.assertEqual(result, "CRLF") # Test with non-existent file (should return None) non_existent = src_dir / "non_existent.py" result = check_editorconfig(str(non_existent)) self.assertEqual(result, "CRLF") # Should still work based on pattern def test_check_gitattributes(self): """Test reading line ending preference from .gitattributes.""" with tempfile.TemporaryDirectory() as temp_dir: # Create a mock directory structure project_dir = Path(temp_dir) / "project" src_dir = project_dir / "src" src_dir.mkdir(parents=True, exist_ok=True) # Create a test file test_file = src_dir / "test.py" test_file.touch() # Test with LF (this matches the actual behavior) gitattributes_content = """ *.py text eol=lf """ with open(project_dir / ".gitattributes", "w") as f: f.write(gitattributes_content) result = check_gitattributes(str(test_file)) self.assertEqual(result, "LF") # Test with different file extension bin_file = src_dir / "test.bin" bin_file.touch() gitattributes_content = """ *.py text eol=lf *.bin binary """ with open(project_dir / ".gitattributes", "w") as f: f.write(gitattributes_content) result = check_gitattributes(str(bin_file)) self.assertIsNone(result) def test_check_codemcp_toml(self): """Test reading line ending preference from codemcp.toml.""" with tempfile.TemporaryDirectory() as temp_dir: # Create a mock directory structure project_dir = Path(temp_dir) / "project" src_dir = project_dir / "src" src_dir.mkdir(parents=True, exist_ok=True) # Create a test file test_file = src_dir / "test.py" test_file.touch() # Create codemcp.toml with LF line endings codemcp_toml_content = """ [files] line_endings = "LF" """ with open(project_dir / "codemcp.toml", "w") as f: f.write(codemcp_toml_content) # Test with file that should use LF result = check_codemcp_toml(str(test_file)) self.assertEqual(result, "LF") # Create codemcp.toml with CRLF line endings codemcp_toml_content = """ [files] line_endings = "CRLF" """ with open(project_dir / "codemcp.toml", "w") as f: f.write(codemcp_toml_content) # Test with file that should use CRLF result = check_codemcp_toml(str(test_file)) self.assertEqual(result, "CRLF") # Test with no line_endings setting codemcp_toml_content = """ [logger] verbosity = "INFO" """ with open(project_dir / "codemcp.toml", "w") as f: f.write(codemcp_toml_content) # Test with file that has no line_endings setting result = check_codemcp_toml(str(test_file)) self.assertIsNone(result) # Test with non-existent file (should return None) non_existent = project_dir / "non_existent.py" result = check_codemcp_toml(str(non_existent)) self.assertIsNone(result) def test_check_codemcprc(self): """Test reading line ending preference from .codemcprc. Note: This is a placeholder test as check_codemcprc() depends on config.get_line_endings_preference(), which would require mocking to fully test. For the purposes of this test suite, we'll just verify that the function doesn't raise exceptions. """ # Since this function depends on config module and would require mocking, # we'll just verify it doesn't error result = check_codemcprc() # We can't assert the exact value as it depends on the environment, # but we can check that it returns either None or a valid value self.assertTrue(result is None or result in ("LF", "CRLF")) def test_get_line_ending_preference_order(self): """Test that preferences are checked in the right order.""" # This test is more complex as it needs to test the hierarchical priority # We'll create a full directory structure with multiple config files with tempfile.TemporaryDirectory() as temp_dir: # Create a mock directory structure project_dir = Path(temp_dir) / "project" src_dir = project_dir / "src" src_dir.mkdir(parents=True, exist_ok=True) # Create a test file test_file = src_dir / "test.py" test_file.touch() # Test with no config files should default to OS line ending result = get_line_ending_preference(str(test_file)) self.assertEqual(result, os.linesep) # Create .editorconfig with CRLF editorconfig_content = """ root = true [*] end_of_line = crlf """ with open(project_dir / ".editorconfig", "w") as f: f.write(editorconfig_content) # Test .editorconfig takes precedence when it's the only file result = get_line_ending_preference(str(test_file)) self.assertEqual(result, "\r\n") # Add .gitattributes with LF gitattributes_content = """ *.py text eol=lf """ with open(project_dir / ".gitattributes", "w") as f: f.write(gitattributes_content) # Test .editorconfig still takes precedence over .gitattributes result = get_line_ending_preference(str(test_file)) self.assertEqual(result, "\r\n") # Remove .editorconfig (project_dir / ".editorconfig").unlink() # Test .gitattributes takes precedence when no .editorconfig result = get_line_ending_preference(str(test_file)) self.assertEqual(result, "\n") # Add codemcp.toml with CRLF codemcp_toml_content = """ [files] line_endings = "CRLF" """ with open(project_dir / "codemcp.toml", "w") as f: f.write(codemcp_toml_content) # Test .gitattributes still takes precedence over codemcp.toml result = get_line_ending_preference(str(test_file)) self.assertEqual(result, "\n") # Remove .gitattributes (project_dir / ".gitattributes").unlink() # Test codemcp.toml takes precedence when no .editorconfig or .gitattributes result = get_line_ending_preference(str(test_file)) self.assertEqual(result, "\r\n") def test_gitattributes_regex_matching(self): """Test specifically how the regex matching works in check_gitattributes.""" test_file = "test.py" pattern = "*.py" # Convert .gitattributes pattern to regex pattern exactly as it's done in the function regex_pattern = pattern.replace(".", r"\.").replace("*", ".*") self.assertEqual(regex_pattern, r".*\.py") # Test the match is_match = bool(re.match(regex_pattern, test_file)) self.assertTrue( is_match, f"Pattern {regex_pattern} failed to match {test_file}" ) # Create a simpler version of check_gitattributes to debug def debug_check_gitattributes(file_path, gitattributes_content): relative_path = Path(file_path).name lines = gitattributes_content.strip().split("\n") # Process lines in reverse to match the behavior for line in reversed(lines): line = line.strip() if not line or line.startswith("#"): continue parts = line.split() if len(parts) < 2: continue pattern, attrs = parts[0], parts[1:] # Convert git pattern to regex if pattern == "*": # Match all files is_match = True else: # Convert .gitattributes pattern to regex pattern regex_pattern = pattern.replace(".", r"\.").replace("*", ".*") # Print debugging info print( f"Pattern: {pattern}, Regex: {regex_pattern}, File: {relative_path}" ) is_match = bool(re.match(regex_pattern, relative_path)) print(f"Match result: {is_match}") if is_match: # Check for text/eol attributes for attr in attrs: if attr == "eol=crlf": return "CRLF" elif attr == "eol=lf": return "LF" elif attr == "text" and "eol=" not in str(attrs): return "LF" return None # Test our debug version with various patterns test_file = "test.py" # Test with simple CRLF attribute gitattributes_content = "*.py text eol=crlf" result = debug_check_gitattributes(test_file, gitattributes_content) self.assertEqual(result, "CRLF") # Test with simple LF attribute gitattributes_content = "*.py text eol=lf" result = debug_check_gitattributes(test_file, gitattributes_content) self.assertEqual(result, "LF") # Test with multiple patterns gitattributes_content = """ *.txt text eol=crlf *.py text eol=lf """ result = debug_check_gitattributes(test_file, gitattributes_content) self.assertEqual(result, "LF") def test_editorconfig_complex_patterns(self): """Test more complex .editorconfig pattern matching.""" with tempfile.TemporaryDirectory() as temp_dir: # Create a mock directory structure project_dir = Path(temp_dir) / "project" src_dir = project_dir / "src" src_dir.mkdir(parents=True, exist_ok=True) # Create test files py_file = src_dir / "test.py" js_file = src_dir / "test.js" md_file = src_dir / "README.md" html_file = src_dir / "index.html" py_file.touch() js_file.touch() md_file.touch() html_file.touch() # Create .editorconfig with complex pattern matching editorconfig_content = """ root = true # Default settings for all files [*] charset = utf-8 end_of_line = lf insert_final_newline = true trim_trailing_whitespace = true indent_style = space indent_size = 4 # JavaScript files [*.js] indent_size = 2 end_of_line = crlf # Python files [*.py] end_of_line = lf # Markdown files get special treatment [*.md] trim_trailing_whitespace = false """ with open(project_dir / ".editorconfig", "w") as f: f.write(editorconfig_content) # Test each file type gets the right setting result = check_editorconfig(str(py_file)) self.assertEqual(result, "LF", "Python files should use LF line endings") result = check_editorconfig(str(js_file)) self.assertEqual( result, "CRLF", "JavaScript files should use CRLF line endings" ) # HTML files should use default LF since they're not specified result = check_editorconfig(str(html_file)) self.assertEqual( result, "LF", "HTML files should use default LF line endings" ) # Test that the most specific pattern takes precedence # Default is LF, but *.js specifies CRLF more_specific_editorconfig = """ root = true [*] end_of_line = lf [*.js] end_of_line = crlf """ with open(project_dir / ".editorconfig", "w") as f: f.write(more_specific_editorconfig) result = check_editorconfig(str(py_file)) self.assertEqual(result, "LF", "Python files should use the default LF") result = check_editorconfig(str(js_file)) self.assertEqual( result, "CRLF", "JavaScript files should use specific CRLF" ) # Test a completely different pattern format glob_pattern_editorconfig = """ root = true [*.py] end_of_line = lf [Makefile] end_of_line = lf indent_style = tab [package.json] end_of_line = crlf indent_style = space indent_size = 2 """ with open(project_dir / ".editorconfig", "w") as f: f.write(glob_pattern_editorconfig) # Create the special files make_file = project_dir / "Makefile" pkg_file = project_dir / "package.json" make_file.touch() pkg_file.touch() result = check_editorconfig(str(make_file)) self.assertEqual(result, "LF", "Makefile should use LF") result = check_editorconfig(str(pkg_file)) self.assertEqual(result, "CRLF", "package.json should use CRLF") def test_nested_config_files(self): """Test handling of nested configuration files in subdirectories.""" with tempfile.TemporaryDirectory() as temp_dir: # Create a nested directory structure root_dir = Path(temp_dir) project_dir = root_dir / "project" frontend_dir = project_dir / "frontend" backend_dir = project_dir / "backend" # Make directories frontend_dir.mkdir(parents=True, exist_ok=True) backend_dir.mkdir(parents=True, exist_ok=True) # Create test files in each directory root_file = project_dir / "root.py" frontend_file = frontend_dir / "app.js" backend_file = backend_dir / "server.py" root_file.touch() frontend_file.touch() backend_file.touch() # Create root .editorconfig with default LF with open(project_dir / ".editorconfig", "w") as f: f.write(""" root = true [*] end_of_line = lf """) # Create frontend .editorconfig with CRLF with open(frontend_dir / ".editorconfig", "w") as f: f.write(""" root = false [*] end_of_line = crlf """) # Create backend .gitattributes with explicit LF with open(backend_dir / ".gitattributes", "w") as f: f.write(""" *.py text eol=lf """) # Test that each file gets the right setting from the nearest config result = check_editorconfig(str(root_file)) self.assertEqual(result, "LF", "Root files should use root .editorconfig") result = check_editorconfig(str(frontend_file)) self.assertEqual( result, "CRLF", "Frontend files should use frontend .editorconfig" ) result = check_editorconfig(str(backend_file)) self.assertEqual( result, "LF", "Backend files should use root .editorconfig" ) # Test combined preference from all sources result = get_line_ending_preference(str(root_file)) self.assertEqual( result, "\n", "Root files should use LF from root .editorconfig" ) result = get_line_ending_preference(str(frontend_file)) self.assertEqual( result, "\r\n", "Frontend files should use CRLF from frontend .editorconfig", ) result = get_line_ending_preference(str(backend_file)) self.assertEqual( result, "\n", "Backend files should use LF from either .editorconfig or .gitattributes", ) # Create a codemcp.toml in the root directory with explicit CRLF with open(project_dir / "codemcp.toml", "w") as f: f.write(""" [files] line_endings = "CRLF" """) # Test .editorconfig still takes precedence result = get_line_ending_preference(str(root_file)) self.assertEqual( result, "\n", ".editorconfig should take precedence over codemcp.toml" ) # Test precedence by removing .editorconfig files (project_dir / ".editorconfig").unlink() (frontend_dir / ".editorconfig").unlink() # Now .gitattributes should take precedence for backend files result = get_line_ending_preference(str(backend_file)) self.assertEqual( result, "\n", "Backend files should use LF from .gitattributes" ) # And codemcp.toml should be used for other files result = get_line_ending_preference(str(root_file)) self.assertEqual( result, "\r\n", "Root files should use CRLF from codemcp.toml when no .editorconfig", ) result = get_line_ending_preference(str(frontend_file)) self.assertEqual( result, "\r\n", "Frontend files should use CRLF from codemcp.toml when no .editorconfig", ) if __name__ == "__main__": unittest.main()