MCP Server Neurolorap

"""Unit tests for the CodeCollector class.""" import logging import os from pathlib import Path from typing import Any from unittest.mock import create_autospec, patch import pytest from mcp_server_neurolorap.collector import CodeCollector, LanguageMap def test_language_map() -> None: """Test LanguageMap extension to language mapping.""" test_cases = [ ("test.py", "python"), ("test.js", "javascript"), ("test.ts", "typescript"), ("test.jsx", "jsx"), ("test.tsx", "tsx"), ("test.html", "html"), ("test.css", "css"), ("test.md", "markdown"), ("test.json", "json"), ("test.yml", "yaml"), ("test.yaml", "yaml"), ("test.sh", "bash"), ("test.unknown", ""), # Unknown extension ("test", ""), # No extension ("TEST.PY", "python"), # Case insensitive ] for filename, expected_lang in test_cases: assert LanguageMap.get_language(Path(filename)) == expected_lang def test_collect_files_with_spaces(project_root: Path) -> None: """Test collecting files with spaces in paths.""" # Create test files space_dir = project_root / "test dir" space_dir.mkdir(exist_ok=True) space_file = space_dir / "test file.py" space_file.write_text("Test content") collector = CodeCollector(project_root) files = collector.collect_files(str(space_dir)) assert space_file in files # Cleanup space_file.unlink() space_dir.rmdir() def test_collect_files_absolute_path(project_root: Path) -> None: """Test collecting files with absolute paths.""" test_file = project_root / "test.py" test_file.write_text("Test content") collector = CodeCollector(project_root) # Test with absolute path abs_path = test_file.absolute() files = collector.collect_files(str(abs_path)) assert test_file in files # Test with path outside project root outside_dir = project_root.parent / "outside" outside_dir.mkdir(exist_ok=True) outside_file = outside_dir / "test.py" outside_file.write_text("Test content") files = collector.collect_files(str(outside_file)) assert outside_file in files # Cleanup test_file.unlink() outside_file.unlink() outside_dir.rmdir() def test_read_file_content_encodings(project_root: Path) -> None: """Test reading files with different encodings.""" collector = CodeCollector(project_root) # UTF-8 with BOM utf8_bom_file = project_root / "utf8_bom.txt" utf8_bom_file.write_bytes(b"\xef\xbb\xbfTest content") assert "Test content" in collector.read_file_content(utf8_bom_file) # UTF-16 utf16_file = project_root / "utf16.txt" utf16_file.write_text("Test content", encoding="utf-16") assert "[Binary file content not shown]" == collector.read_file_content( utf16_file ) # Invalid UTF-8 invalid_file = project_root / "invalid.txt" invalid_file.write_bytes(b"Test content \xff\xff") assert "[Binary file content not shown]" == collector.read_file_content( invalid_file ) # Cleanup utf8_bom_file.unlink() utf16_file.unlink() invalid_file.unlink() def test_read_file_content_errors(project_root: Path) -> None: """Test error handling when reading files.""" collector = CodeCollector(project_root) # Permission error no_access_file = project_root / "no_access.txt" no_access_file.write_text("Test content") os.chmod(no_access_file, 0o000) assert "[Permission denied]" == collector.read_file_content(no_access_file) os.chmod(no_access_file, 0o666) no_access_file.unlink() # File not found assert "[File not found]" == collector.read_file_content( project_root / "nonexistent.txt" ) def test_collect_code_output_files(project_root: Path) -> None: """Test creation of output files.""" collector = CodeCollector(project_root) # Create test file test_file = project_root / "test.py" test_file.write_text("Test content") # Collect code output_path = collector.collect_code(str(test_file)) assert output_path is not None assert output_path.exists() # Check content content = output_path.read_text() assert "# Code Collection" in content assert "## Table of Contents" in content assert "## Files" in content assert "### test.py" in content assert "```python" in content assert "Test content" in content # Check analysis prompt file analysis_path = output_path.parent / output_path.name.replace( "FULL_CODE_", "PROMPT_ANALYZE_" ) assert analysis_path.exists() # Cleanup test_file.unlink() def test_init_with_project_root(project_root: Path) -> None: """Test initializing CodeCollector with project root.""" collector = CodeCollector(project_root) assert collector.project_root == project_root assert isinstance(collector.ignore_patterns, list) def test_init_without_project_root() -> None: """Test initializing CodeCollector without project root.""" collector = CodeCollector() assert collector.project_root == Path.cwd() def test_load_ignore_patterns(project_root: Path, ignore_file: Path) -> None: """Test loading ignore patterns from .neuroloraignore file.""" collector = CodeCollector(project_root) patterns = collector.load_ignore_patterns() assert "*.log" in patterns assert "node_modules/" in patterns assert "__pycache__/" in patterns assert ".git/" in patterns def test_should_ignore_file(project_root: Path, ignore_file: Path) -> None: """Test file ignore logic.""" collector = CodeCollector(project_root) # Should ignore files matching patterns assert collector.should_ignore_file(project_root / "test.log") assert collector.should_ignore_file( project_root / "node_modules" / "test.js" ) assert collector.should_ignore_file( project_root / "__pycache__" / "test.pyc" ) assert collector.should_ignore_file(project_root / ".git" / "config") # Should not ignore regular files assert not collector.should_ignore_file(project_root / "test.py") assert not collector.should_ignore_file(project_root / "src" / "main.py") def test_collect_files(project_root: Path, sample_files: list[Path]) -> None: """Test collecting files from input paths.""" collector = CodeCollector(project_root) # Test collecting single file files = collector.collect_files(str(sample_files[0])) assert len(files) == 1 assert files[0] == sample_files[0] # Test collecting directory files = collector.collect_files(str(project_root)) assert len(files) == len(sample_files) assert all(f in files for f in sample_files) # Test collecting multiple paths paths = [str(sample_files[0]), str(project_root / "src")] files = collector.collect_files(paths) assert len(files) == 2 assert sample_files[0] in files assert project_root / "src" / "main.py" in files def test_collect_files_nonexistent(project_root: Path) -> None: """Test collecting files from nonexistent paths.""" collector = CodeCollector(project_root) files = collector.collect_files("nonexistent") assert files == [] def test_read_file_content( project_root: Path, sample_files: list[Path] ) -> None: """Test reading file content.""" collector = CodeCollector(project_root) # Test reading existing file content = collector.read_file_content(sample_files[0]) assert content == f"Test content in {sample_files[0].name}" # Test reading nonexistent file content = collector.read_file_content(project_root / "nonexistent") assert content == "[File not found]" def test_make_anchor() -> None: """Test markdown anchor generation.""" collector = CodeCollector() test_cases = [ ("src/test.py", "src-test-py"), ("test file.js", "test-file-js"), ("TEST.PY", "test-py"), ("src/sub/test.py", "src-sub-test-py"), ] for path, expected in test_cases: assert collector.make_anchor(Path(path)) == expected @pytest.mark.parametrize( "title", ["Test Collection", "Project Files", "Source Code"], ids=["collection", "files", "source"], ) def test_collect_code( project_root: Path, sample_files: list[Path], title: str, monkeypatch: pytest.MonkeyPatch, ) -> None: """Test full code collection process.""" collector = CodeCollector(project_root) # Mock os.sync and os.utime sync_mock = create_autospec(os.sync, return_value=None) utime_mock = create_autospec(os.utime, return_value=None) monkeypatch.setattr(os, "sync", sync_mock) monkeypatch.setattr(os, "utime", utime_mock) # Test collecting all files output_path = collector.collect_code(str(project_root), title=title) assert output_path is not None assert output_path.exists() # Verify output file content content = output_path.read_text() # Check title assert f"# {title}" in content # Check table of contents assert "## Table of Contents" in content for file in sample_files: rel_path = file.relative_to(project_root) assert f"- [{rel_path}]" in content # Check file contents assert "## Files" in content for file in sample_files: rel_path = file.relative_to(project_root) assert f"### {rel_path}" in content lang = LanguageMap.get_language(file) assert f"```{lang}" in content assert f"Test content in {file.name}" in content def test_collect_code_empty_input(project_root: Path) -> None: """Test code collection with empty input.""" collector = CodeCollector(project_root) assert collector.collect_code("nonexistent") is None @pytest.mark.parametrize( "error_type,error_msg,expected_log", [ (ValueError, "Invalid input", "Invalid input"), (FileNotFoundError, "File not found", "File not found"), (PermissionError, "Permission denied", "Permission denied"), (Exception, "Unexpected error", "Unexpected error"), ], ids=[ "value_error", "file_not_found", "permission_error", "unexpected_error", ], ) def test_collect_code_error_handling( project_root: Path, caplog: pytest.LogCaptureFixture, error_type: type[Exception], error_msg: str, expected_log: str, monkeypatch: pytest.MonkeyPatch, ) -> None: """Test error handling during code collection.""" collector = CodeCollector(project_root) def mock_collect_files(*args: Any, **kwargs: Any) -> list[Path]: raise error_type(error_msg) monkeypatch.setattr(collector, "collect_files", mock_collect_files) with caplog.at_level(logging.ERROR): result = collector.collect_code("test_input") assert result is None assert expected_log in caplog.text # Verify no output file was created output_files = list(project_root.glob("FULL_CODE_*")) assert len(output_files) == 0 def test_large_file_handling(project_root: Path) -> None: """Test handling of large files.""" collector = CodeCollector(project_root) # Create a large file (>1MB) large_file = project_root / "large.txt" large_file.write_bytes(b"0" * (1024 * 1024 + 1)) assert collector.should_ignore_file(large_file) # Cleanup large_file.unlink() def test_binary_file_handling(project_root: Path) -> None: """Test handling of binary files.""" collector = CodeCollector(project_root) # Create a binary file binary_file = project_root / "test.bin" binary_file.write_bytes(bytes(range(256))) content = collector.read_file_content(binary_file) assert content == "[Binary file content not shown]" # Cleanup binary_file.unlink() def test_should_ignore_file_special_cases(project_root: Path) -> None: """Test special cases for file ignore logic.""" collector = CodeCollector(project_root) # Test FULL_CODE_ files assert collector.should_ignore_file(project_root / "FULL_CODE_test.md") # Test .neuroloraignore file assert collector.should_ignore_file(project_root / ".neuroloraignore") # Test file with permission error no_access_file = project_root / "no_access.txt" no_access_file.write_text("Test content") os.chmod(no_access_file, 0o000) # On some systems, we might still be able to stat the file # even without read permissions. So we'll just verify that # should_ignore_file handles it correctly in either case try: no_access_file.stat() # If we can stat, we should still be able to check size assert collector.should_ignore_file(no_access_file) == ( no_access_file.stat().st_size > 1024 * 1024 ) except PermissionError: # If we can't stat, it should be ignored assert collector.should_ignore_file(no_access_file) os.chmod(no_access_file, 0o666) no_access_file.unlink() # Test directory patterns assert collector.should_ignore_file( project_root / "node_modules" / "deep" / "test.js" ) assert collector.should_ignore_file(project_root / "dist" / "index.html") # Test file outside project root outside_file = project_root.parent / "outside.py" outside_file.touch() assert not collector.should_ignore_file(outside_file) outside_file.unlink() def test_collect_files_error_handling(project_root: Path) -> None: """Test error handling in collect_files.""" collector = CodeCollector(project_root) # Test permission error no_access_dir = project_root / "no_access" no_access_dir.mkdir() no_access_file = no_access_dir / "test.py" no_access_file.write_text("Test content") with patch( "pathlib.Path.stat", create_autospec(Path.stat, return_value=None) ): os.chmod(no_access_dir, 0o000) files = collector.collect_files(str(no_access_dir)) assert files == [] os.chmod(no_access_dir, 0o777) no_access_file.unlink() no_access_dir.rmdir() # Test invalid path files = collector.collect_files("\0invalid") # Invalid path character assert files == [] # Should return empty list for invalid paths # Test collecting from multiple paths with some failing test_file = project_root / "test.py" test_file.write_text("Test content") files = collector.collect_files( [str(test_file), "nonexistent", "\0invalid"] ) assert len(files) == 1 assert test_file in files test_file.unlink() def test_collect_files_sorting(project_root: Path) -> None: """Test file sorting in collect_files.""" collector = CodeCollector(project_root) # Create test files files = ["b.py", "a.py", "PROJECT_SUMMARY.md", "src/test.py", "README.md"] for file in files: path = project_root / file path.parent.mkdir(exist_ok=True) path.write_text("Test content") collected = collector.collect_files(str(project_root)) # PROJECT_SUMMARY.md should be first assert collected[0].name == "PROJECT_SUMMARY.md" # Other files should be sorted alphabetically sorted_names = [f.name for f in collected[1:]] assert sorted_names == ["README.md", "a.py", "b.py", "test.py"] # Cleanup for file in files: path = project_root / file path.unlink() if path.parent != project_root: path.parent.rmdir()