Deskaid
by ezyang
- codemcp
- tests
#!/usr/bin/env python3
"""Unit tests for line_endings.py module."""
import os
import re
import tempfile
import unittest
from pathlib import Path
from codemcp.line_endings import (
apply_line_endings,
check_codemcp_toml,
check_codemcprc,
check_editorconfig,
check_gitattributes,
get_line_ending_preference,
normalize_to_lf,
)
class LineEndingsTest(unittest.TestCase):
"""Test line endings handling functions."""
def test_normalize_to_lf(self):
"""Test normalizing different line endings to LF."""
# Test CRLF to LF
crlf_text = "line1\r\nline2\r\nline3"
self.assertEqual(normalize_to_lf(crlf_text), "line1\nline2\nline3")
# Test CR to LF
cr_text = "line1\rline2\rline3"
self.assertEqual(normalize_to_lf(cr_text), "line1\nline2\nline3")
# Test mixed line endings
mixed_text = "line1\nline2\r\nline3\rline4"
self.assertEqual(normalize_to_lf(mixed_text), "line1\nline2\nline3\nline4")
# Test already normalized text
lf_text = "line1\nline2\nline3"
self.assertEqual(normalize_to_lf(lf_text), lf_text)
# Test empty text
self.assertEqual(normalize_to_lf(""), "")
def test_apply_line_endings(self):
"""Test applying different line endings to content."""
lf_text = "line1\nline2\nline3"
# Test applying CRLF
self.assertEqual(apply_line_endings(lf_text, "CRLF"), "line1\r\nline2\r\nline3")
self.assertEqual(apply_line_endings(lf_text, "\r\n"), "line1\r\nline2\r\nline3")
# Test applying LF
self.assertEqual(apply_line_endings(lf_text, "LF"), lf_text)
self.assertEqual(apply_line_endings(lf_text, "\n"), lf_text)
# Test with mixed input (should normalize first)
mixed_text = "line1\r\nline2\nline3"
self.assertEqual(
apply_line_endings(mixed_text, "CRLF"), "line1\r\nline2\r\nline3"
)
self.assertEqual(apply_line_endings(mixed_text, "LF"), "line1\nline2\nline3")
# Test default behavior (should default to LF)
self.assertEqual(apply_line_endings(lf_text, None), lf_text)
def test_check_editorconfig(self):
"""Test reading line ending preference from .editorconfig."""
with tempfile.TemporaryDirectory() as temp_dir:
# Create a mock directory structure
project_dir = Path(temp_dir) / "project"
src_dir = project_dir / "src"
src_dir.mkdir(parents=True, exist_ok=True)
# Create a test file
test_file = src_dir / "test.py"
test_file.touch()
# Create .editorconfig with LF line endings
editorconfig_content = """
root = true
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
"""
with open(project_dir / ".editorconfig", "w") as f:
f.write(editorconfig_content)
# Test with file that should use LF
result = check_editorconfig(str(test_file))
self.assertEqual(result, "LF")
# Create .editorconfig with CRLF line endings for .py files
editorconfig_content = """
root = true
[*]
charset = utf-8
end_of_line = lf
[*.py]
end_of_line = crlf
"""
with open(project_dir / ".editorconfig", "w") as f:
f.write(editorconfig_content)
# Test with .py file that should use CRLF
result = check_editorconfig(str(test_file))
self.assertEqual(result, "CRLF")
# Test with non-existent file (should return None)
non_existent = src_dir / "non_existent.py"
result = check_editorconfig(str(non_existent))
self.assertEqual(result, "CRLF") # Should still work based on pattern
def test_check_gitattributes(self):
"""Test reading line ending preference from .gitattributes."""
with tempfile.TemporaryDirectory() as temp_dir:
# Create a mock directory structure
project_dir = Path(temp_dir) / "project"
src_dir = project_dir / "src"
src_dir.mkdir(parents=True, exist_ok=True)
# Create a test file
test_file = src_dir / "test.py"
test_file.touch()
# Test with LF (this matches the actual behavior)
gitattributes_content = """
*.py text eol=lf
"""
with open(project_dir / ".gitattributes", "w") as f:
f.write(gitattributes_content)
result = check_gitattributes(str(test_file))
self.assertEqual(result, "LF")
# Test with different file extension
bin_file = src_dir / "test.bin"
bin_file.touch()
gitattributes_content = """
*.py text eol=lf
*.bin binary
"""
with open(project_dir / ".gitattributes", "w") as f:
f.write(gitattributes_content)
result = check_gitattributes(str(bin_file))
self.assertIsNone(result)
def test_check_codemcp_toml(self):
"""Test reading line ending preference from codemcp.toml."""
with tempfile.TemporaryDirectory() as temp_dir:
# Create a mock directory structure
project_dir = Path(temp_dir) / "project"
src_dir = project_dir / "src"
src_dir.mkdir(parents=True, exist_ok=True)
# Create a test file
test_file = src_dir / "test.py"
test_file.touch()
# Create codemcp.toml with LF line endings
codemcp_toml_content = """
[files]
line_endings = "LF"
"""
with open(project_dir / "codemcp.toml", "w") as f:
f.write(codemcp_toml_content)
# Test with file that should use LF
result = check_codemcp_toml(str(test_file))
self.assertEqual(result, "LF")
# Create codemcp.toml with CRLF line endings
codemcp_toml_content = """
[files]
line_endings = "CRLF"
"""
with open(project_dir / "codemcp.toml", "w") as f:
f.write(codemcp_toml_content)
# Test with file that should use CRLF
result = check_codemcp_toml(str(test_file))
self.assertEqual(result, "CRLF")
# Test with no line_endings setting
codemcp_toml_content = """
[logger]
verbosity = "INFO"
"""
with open(project_dir / "codemcp.toml", "w") as f:
f.write(codemcp_toml_content)
# Test with file that has no line_endings setting
result = check_codemcp_toml(str(test_file))
self.assertIsNone(result)
# Test with non-existent file (should return None)
non_existent = project_dir / "non_existent.py"
result = check_codemcp_toml(str(non_existent))
self.assertIsNone(result)
def test_check_codemcprc(self):
"""Test reading line ending preference from .codemcprc.
Note: This is a placeholder test as check_codemcprc() depends on config.get_line_endings_preference(),
which would require mocking to fully test. For the purposes of this test suite, we'll just verify
that the function doesn't raise exceptions.
"""
# Since this function depends on config module and would require mocking,
# we'll just verify it doesn't error
result = check_codemcprc()
# We can't assert the exact value as it depends on the environment,
# but we can check that it returns either None or a valid value
self.assertTrue(result is None or result in ("LF", "CRLF"))
def test_get_line_ending_preference_order(self):
"""Test that preferences are checked in the right order."""
# This test is more complex as it needs to test the hierarchical priority
# We'll create a full directory structure with multiple config files
with tempfile.TemporaryDirectory() as temp_dir:
# Create a mock directory structure
project_dir = Path(temp_dir) / "project"
src_dir = project_dir / "src"
src_dir.mkdir(parents=True, exist_ok=True)
# Create a test file
test_file = src_dir / "test.py"
test_file.touch()
# Test with no config files should default to OS line ending
result = get_line_ending_preference(str(test_file))
self.assertEqual(result, os.linesep)
# Create .editorconfig with CRLF
editorconfig_content = """
root = true
[*]
end_of_line = crlf
"""
with open(project_dir / ".editorconfig", "w") as f:
f.write(editorconfig_content)
# Test .editorconfig takes precedence when it's the only file
result = get_line_ending_preference(str(test_file))
self.assertEqual(result, "\r\n")
# Add .gitattributes with LF
gitattributes_content = """
*.py text eol=lf
"""
with open(project_dir / ".gitattributes", "w") as f:
f.write(gitattributes_content)
# Test .editorconfig still takes precedence over .gitattributes
result = get_line_ending_preference(str(test_file))
self.assertEqual(result, "\r\n")
# Remove .editorconfig
(project_dir / ".editorconfig").unlink()
# Test .gitattributes takes precedence when no .editorconfig
result = get_line_ending_preference(str(test_file))
self.assertEqual(result, "\n")
# Add codemcp.toml with CRLF
codemcp_toml_content = """
[files]
line_endings = "CRLF"
"""
with open(project_dir / "codemcp.toml", "w") as f:
f.write(codemcp_toml_content)
# Test .gitattributes still takes precedence over codemcp.toml
result = get_line_ending_preference(str(test_file))
self.assertEqual(result, "\n")
# Remove .gitattributes
(project_dir / ".gitattributes").unlink()
# Test codemcp.toml takes precedence when no .editorconfig or .gitattributes
result = get_line_ending_preference(str(test_file))
self.assertEqual(result, "\r\n")
def test_gitattributes_regex_matching(self):
"""Test specifically how the regex matching works in check_gitattributes."""
test_file = "test.py"
pattern = "*.py"
# Convert .gitattributes pattern to regex pattern exactly as it's done in the function
regex_pattern = pattern.replace(".", r"\.").replace("*", ".*")
self.assertEqual(regex_pattern, r".*\.py")
# Test the match
is_match = bool(re.match(regex_pattern, test_file))
self.assertTrue(
is_match, f"Pattern {regex_pattern} failed to match {test_file}"
)
# Create a simpler version of check_gitattributes to debug
def debug_check_gitattributes(file_path, gitattributes_content):
relative_path = Path(file_path).name
lines = gitattributes_content.strip().split("\n")
# Process lines in reverse to match the behavior
for line in reversed(lines):
line = line.strip()
if not line or line.startswith("#"):
continue
parts = line.split()
if len(parts) < 2:
continue
pattern, attrs = parts[0], parts[1:]
# Convert git pattern to regex
if pattern == "*": # Match all files
is_match = True
else:
# Convert .gitattributes pattern to regex pattern
regex_pattern = pattern.replace(".", r"\.").replace("*", ".*")
# Print debugging info
print(
f"Pattern: {pattern}, Regex: {regex_pattern}, File: {relative_path}"
)
is_match = bool(re.match(regex_pattern, relative_path))
print(f"Match result: {is_match}")
if is_match:
# Check for text/eol attributes
for attr in attrs:
if attr == "eol=crlf":
return "CRLF"
elif attr == "eol=lf":
return "LF"
elif attr == "text" and "eol=" not in str(attrs):
return "LF"
return None
# Test our debug version with various patterns
test_file = "test.py"
# Test with simple CRLF attribute
gitattributes_content = "*.py text eol=crlf"
result = debug_check_gitattributes(test_file, gitattributes_content)
self.assertEqual(result, "CRLF")
# Test with simple LF attribute
gitattributes_content = "*.py text eol=lf"
result = debug_check_gitattributes(test_file, gitattributes_content)
self.assertEqual(result, "LF")
# Test with multiple patterns
gitattributes_content = """
*.txt text eol=crlf
*.py text eol=lf
"""
result = debug_check_gitattributes(test_file, gitattributes_content)
self.assertEqual(result, "LF")
def test_editorconfig_complex_patterns(self):
"""Test more complex .editorconfig pattern matching."""
with tempfile.TemporaryDirectory() as temp_dir:
# Create a mock directory structure
project_dir = Path(temp_dir) / "project"
src_dir = project_dir / "src"
src_dir.mkdir(parents=True, exist_ok=True)
# Create test files
py_file = src_dir / "test.py"
js_file = src_dir / "test.js"
md_file = src_dir / "README.md"
html_file = src_dir / "index.html"
py_file.touch()
js_file.touch()
md_file.touch()
html_file.touch()
# Create .editorconfig with complex pattern matching
editorconfig_content = """
root = true
# Default settings for all files
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
indent_style = space
indent_size = 4
# JavaScript files
[*.js]
indent_size = 2
end_of_line = crlf
# Python files
[*.py]
end_of_line = lf
# Markdown files get special treatment
[*.md]
trim_trailing_whitespace = false
"""
with open(project_dir / ".editorconfig", "w") as f:
f.write(editorconfig_content)
# Test each file type gets the right setting
result = check_editorconfig(str(py_file))
self.assertEqual(result, "LF", "Python files should use LF line endings")
result = check_editorconfig(str(js_file))
self.assertEqual(
result, "CRLF", "JavaScript files should use CRLF line endings"
)
# HTML files should use default LF since they're not specified
result = check_editorconfig(str(html_file))
self.assertEqual(
result, "LF", "HTML files should use default LF line endings"
)
# Test that the most specific pattern takes precedence
# Default is LF, but *.js specifies CRLF
more_specific_editorconfig = """
root = true
[*]
end_of_line = lf
[*.js]
end_of_line = crlf
"""
with open(project_dir / ".editorconfig", "w") as f:
f.write(more_specific_editorconfig)
result = check_editorconfig(str(py_file))
self.assertEqual(result, "LF", "Python files should use the default LF")
result = check_editorconfig(str(js_file))
self.assertEqual(
result, "CRLF", "JavaScript files should use specific CRLF"
)
# Test a completely different pattern format
glob_pattern_editorconfig = """
root = true
[*.py]
end_of_line = lf
[Makefile]
end_of_line = lf
indent_style = tab
[package.json]
end_of_line = crlf
indent_style = space
indent_size = 2
"""
with open(project_dir / ".editorconfig", "w") as f:
f.write(glob_pattern_editorconfig)
# Create the special files
make_file = project_dir / "Makefile"
pkg_file = project_dir / "package.json"
make_file.touch()
pkg_file.touch()
result = check_editorconfig(str(make_file))
self.assertEqual(result, "LF", "Makefile should use LF")
result = check_editorconfig(str(pkg_file))
self.assertEqual(result, "CRLF", "package.json should use CRLF")
def test_nested_config_files(self):
"""Test handling of nested configuration files in subdirectories."""
with tempfile.TemporaryDirectory() as temp_dir:
# Create a nested directory structure
root_dir = Path(temp_dir)
project_dir = root_dir / "project"
frontend_dir = project_dir / "frontend"
backend_dir = project_dir / "backend"
# Make directories
frontend_dir.mkdir(parents=True, exist_ok=True)
backend_dir.mkdir(parents=True, exist_ok=True)
# Create test files in each directory
root_file = project_dir / "root.py"
frontend_file = frontend_dir / "app.js"
backend_file = backend_dir / "server.py"
root_file.touch()
frontend_file.touch()
backend_file.touch()
# Create root .editorconfig with default LF
with open(project_dir / ".editorconfig", "w") as f:
f.write("""
root = true
[*]
end_of_line = lf
""")
# Create frontend .editorconfig with CRLF
with open(frontend_dir / ".editorconfig", "w") as f:
f.write("""
root = false
[*]
end_of_line = crlf
""")
# Create backend .gitattributes with explicit LF
with open(backend_dir / ".gitattributes", "w") as f:
f.write("""
*.py text eol=lf
""")
# Test that each file gets the right setting from the nearest config
result = check_editorconfig(str(root_file))
self.assertEqual(result, "LF", "Root files should use root .editorconfig")
result = check_editorconfig(str(frontend_file))
self.assertEqual(
result, "CRLF", "Frontend files should use frontend .editorconfig"
)
result = check_editorconfig(str(backend_file))
self.assertEqual(
result, "LF", "Backend files should use root .editorconfig"
)
# Test combined preference from all sources
result = get_line_ending_preference(str(root_file))
self.assertEqual(
result, "\n", "Root files should use LF from root .editorconfig"
)
result = get_line_ending_preference(str(frontend_file))
self.assertEqual(
result,
"\r\n",
"Frontend files should use CRLF from frontend .editorconfig",
)
result = get_line_ending_preference(str(backend_file))
self.assertEqual(
result,
"\n",
"Backend files should use LF from either .editorconfig or .gitattributes",
)
# Create a codemcp.toml in the root directory with explicit CRLF
with open(project_dir / "codemcp.toml", "w") as f:
f.write("""
[files]
line_endings = "CRLF"
""")
# Test .editorconfig still takes precedence
result = get_line_ending_preference(str(root_file))
self.assertEqual(
result, "\n", ".editorconfig should take precedence over codemcp.toml"
)
# Test precedence by removing .editorconfig files
(project_dir / ".editorconfig").unlink()
(frontend_dir / ".editorconfig").unlink()
# Now .gitattributes should take precedence for backend files
result = get_line_ending_preference(str(backend_file))
self.assertEqual(
result, "\n", "Backend files should use LF from .gitattributes"
)
# And codemcp.toml should be used for other files
result = get_line_ending_preference(str(root_file))
self.assertEqual(
result,
"\r\n",
"Root files should use CRLF from codemcp.toml when no .editorconfig",
)
result = get_line_ending_preference(str(frontend_file))
self.assertEqual(
result,
"\r\n",
"Frontend files should use CRLF from codemcp.toml when no .editorconfig",
)
if __name__ == "__main__":
unittest.main()