MCP Server Neurolorap
by aindreyway
"""Unit tests for the CodeCollector class."""
import logging
import os
from pathlib import Path
from typing import Any
from unittest.mock import create_autospec, patch
import pytest
from mcp_server_neurolorap.collector import CodeCollector, LanguageMap
def test_language_map() -> None:
"""Test LanguageMap extension to language mapping."""
test_cases = [
("test.py", "python"),
("test.js", "javascript"),
("test.ts", "typescript"),
("test.jsx", "jsx"),
("test.tsx", "tsx"),
("test.html", "html"),
("test.css", "css"),
("test.md", "markdown"),
("test.json", "json"),
("test.yml", "yaml"),
("test.yaml", "yaml"),
("test.sh", "bash"),
("test.unknown", ""), # Unknown extension
("test", ""), # No extension
("TEST.PY", "python"), # Case insensitive
]
for filename, expected_lang in test_cases:
assert LanguageMap.get_language(Path(filename)) == expected_lang
def test_collect_files_with_spaces(project_root: Path) -> None:
"""Test collecting files with spaces in paths."""
# Create test files
space_dir = project_root / "test dir"
space_dir.mkdir(exist_ok=True)
space_file = space_dir / "test file.py"
space_file.write_text("Test content")
collector = CodeCollector(project_root)
files = collector.collect_files(str(space_dir))
assert space_file in files
# Cleanup
space_file.unlink()
space_dir.rmdir()
def test_collect_files_absolute_path(project_root: Path) -> None:
"""Test collecting files with absolute paths."""
test_file = project_root / "test.py"
test_file.write_text("Test content")
collector = CodeCollector(project_root)
# Test with absolute path
abs_path = test_file.absolute()
files = collector.collect_files(str(abs_path))
assert test_file in files
# Test with path outside project root
outside_dir = project_root.parent / "outside"
outside_dir.mkdir(exist_ok=True)
outside_file = outside_dir / "test.py"
outside_file.write_text("Test content")
files = collector.collect_files(str(outside_file))
assert outside_file in files
# Cleanup
test_file.unlink()
outside_file.unlink()
outside_dir.rmdir()
def test_read_file_content_encodings(project_root: Path) -> None:
"""Test reading files with different encodings."""
collector = CodeCollector(project_root)
# UTF-8 with BOM
utf8_bom_file = project_root / "utf8_bom.txt"
utf8_bom_file.write_bytes(b"\xef\xbb\xbfTest content")
assert "Test content" in collector.read_file_content(utf8_bom_file)
# UTF-16
utf16_file = project_root / "utf16.txt"
utf16_file.write_text("Test content", encoding="utf-16")
assert "[Binary file content not shown]" == collector.read_file_content(
utf16_file
)
# Invalid UTF-8
invalid_file = project_root / "invalid.txt"
invalid_file.write_bytes(b"Test content \xff\xff")
assert "[Binary file content not shown]" == collector.read_file_content(
invalid_file
)
# Cleanup
utf8_bom_file.unlink()
utf16_file.unlink()
invalid_file.unlink()
def test_read_file_content_errors(project_root: Path) -> None:
"""Test error handling when reading files."""
collector = CodeCollector(project_root)
# Permission error
no_access_file = project_root / "no_access.txt"
no_access_file.write_text("Test content")
os.chmod(no_access_file, 0o000)
assert "[Permission denied]" == collector.read_file_content(no_access_file)
os.chmod(no_access_file, 0o666)
no_access_file.unlink()
# File not found
assert "[File not found]" == collector.read_file_content(
project_root / "nonexistent.txt"
)
def test_collect_code_output_files(project_root: Path) -> None:
"""Test creation of output files."""
collector = CodeCollector(project_root)
# Create test file
test_file = project_root / "test.py"
test_file.write_text("Test content")
# Collect code
output_path = collector.collect_code(str(test_file))
assert output_path is not None
assert output_path.exists()
# Check content
content = output_path.read_text()
assert "# Code Collection" in content
assert "## Table of Contents" in content
assert "## Files" in content
assert "### test.py" in content
assert "```python" in content
assert "Test content" in content
# Check analysis prompt file
analysis_path = output_path.parent / output_path.name.replace(
"FULL_CODE_", "PROMPT_ANALYZE_"
)
assert analysis_path.exists()
# Cleanup
test_file.unlink()
def test_init_with_project_root(project_root: Path) -> None:
"""Test initializing CodeCollector with project root."""
collector = CodeCollector(project_root)
assert collector.project_root == project_root
assert isinstance(collector.ignore_patterns, list)
def test_init_without_project_root() -> None:
"""Test initializing CodeCollector without project root."""
collector = CodeCollector()
assert collector.project_root == Path.cwd()
def test_load_ignore_patterns(project_root: Path, ignore_file: Path) -> None:
"""Test loading ignore patterns from .neuroloraignore file."""
collector = CodeCollector(project_root)
patterns = collector.load_ignore_patterns()
assert "*.log" in patterns
assert "node_modules/" in patterns
assert "__pycache__/" in patterns
assert ".git/" in patterns
def test_should_ignore_file(project_root: Path, ignore_file: Path) -> None:
"""Test file ignore logic."""
collector = CodeCollector(project_root)
# Should ignore files matching patterns
assert collector.should_ignore_file(project_root / "test.log")
assert collector.should_ignore_file(
project_root / "node_modules" / "test.js"
)
assert collector.should_ignore_file(
project_root / "__pycache__" / "test.pyc"
)
assert collector.should_ignore_file(project_root / ".git" / "config")
# Should not ignore regular files
assert not collector.should_ignore_file(project_root / "test.py")
assert not collector.should_ignore_file(project_root / "src" / "main.py")
def test_collect_files(project_root: Path, sample_files: list[Path]) -> None:
"""Test collecting files from input paths."""
collector = CodeCollector(project_root)
# Test collecting single file
files = collector.collect_files(str(sample_files[0]))
assert len(files) == 1
assert files[0] == sample_files[0]
# Test collecting directory
files = collector.collect_files(str(project_root))
assert len(files) == len(sample_files)
assert all(f in files for f in sample_files)
# Test collecting multiple paths
paths = [str(sample_files[0]), str(project_root / "src")]
files = collector.collect_files(paths)
assert len(files) == 2
assert sample_files[0] in files
assert project_root / "src" / "main.py" in files
def test_collect_files_nonexistent(project_root: Path) -> None:
"""Test collecting files from nonexistent paths."""
collector = CodeCollector(project_root)
files = collector.collect_files("nonexistent")
assert files == []
def test_read_file_content(
project_root: Path, sample_files: list[Path]
) -> None:
"""Test reading file content."""
collector = CodeCollector(project_root)
# Test reading existing file
content = collector.read_file_content(sample_files[0])
assert content == f"Test content in {sample_files[0].name}"
# Test reading nonexistent file
content = collector.read_file_content(project_root / "nonexistent")
assert content == "[File not found]"
def test_make_anchor() -> None:
"""Test markdown anchor generation."""
collector = CodeCollector()
test_cases = [
("src/test.py", "src-test-py"),
("test file.js", "test-file-js"),
("TEST.PY", "test-py"),
("src/sub/test.py", "src-sub-test-py"),
]
for path, expected in test_cases:
assert collector.make_anchor(Path(path)) == expected
@pytest.mark.parametrize(
"title",
["Test Collection", "Project Files", "Source Code"],
ids=["collection", "files", "source"],
)
def test_collect_code(
project_root: Path,
sample_files: list[Path],
title: str,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Test full code collection process."""
collector = CodeCollector(project_root)
# Mock os.sync and os.utime
sync_mock = create_autospec(os.sync, return_value=None)
utime_mock = create_autospec(os.utime, return_value=None)
monkeypatch.setattr(os, "sync", sync_mock)
monkeypatch.setattr(os, "utime", utime_mock)
# Test collecting all files
output_path = collector.collect_code(str(project_root), title=title)
assert output_path is not None
assert output_path.exists()
# Verify output file content
content = output_path.read_text()
# Check title
assert f"# {title}" in content
# Check table of contents
assert "## Table of Contents" in content
for file in sample_files:
rel_path = file.relative_to(project_root)
assert f"- [{rel_path}]" in content
# Check file contents
assert "## Files" in content
for file in sample_files:
rel_path = file.relative_to(project_root)
assert f"### {rel_path}" in content
lang = LanguageMap.get_language(file)
assert f"```{lang}" in content
assert f"Test content in {file.name}" in content
def test_collect_code_empty_input(project_root: Path) -> None:
"""Test code collection with empty input."""
collector = CodeCollector(project_root)
assert collector.collect_code("nonexistent") is None
@pytest.mark.parametrize(
"error_type,error_msg,expected_log",
[
(ValueError, "Invalid input", "Invalid input"),
(FileNotFoundError, "File not found", "File not found"),
(PermissionError, "Permission denied", "Permission denied"),
(Exception, "Unexpected error", "Unexpected error"),
],
ids=[
"value_error",
"file_not_found",
"permission_error",
"unexpected_error",
],
)
def test_collect_code_error_handling(
project_root: Path,
caplog: pytest.LogCaptureFixture,
error_type: type[Exception],
error_msg: str,
expected_log: str,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Test error handling during code collection."""
collector = CodeCollector(project_root)
def mock_collect_files(*args: Any, **kwargs: Any) -> list[Path]:
raise error_type(error_msg)
monkeypatch.setattr(collector, "collect_files", mock_collect_files)
with caplog.at_level(logging.ERROR):
result = collector.collect_code("test_input")
assert result is None
assert expected_log in caplog.text
# Verify no output file was created
output_files = list(project_root.glob("FULL_CODE_*"))
assert len(output_files) == 0
def test_large_file_handling(project_root: Path) -> None:
"""Test handling of large files."""
collector = CodeCollector(project_root)
# Create a large file (>1MB)
large_file = project_root / "large.txt"
large_file.write_bytes(b"0" * (1024 * 1024 + 1))
assert collector.should_ignore_file(large_file)
# Cleanup
large_file.unlink()
def test_binary_file_handling(project_root: Path) -> None:
"""Test handling of binary files."""
collector = CodeCollector(project_root)
# Create a binary file
binary_file = project_root / "test.bin"
binary_file.write_bytes(bytes(range(256)))
content = collector.read_file_content(binary_file)
assert content == "[Binary file content not shown]"
# Cleanup
binary_file.unlink()
def test_should_ignore_file_special_cases(project_root: Path) -> None:
"""Test special cases for file ignore logic."""
collector = CodeCollector(project_root)
# Test FULL_CODE_ files
assert collector.should_ignore_file(project_root / "FULL_CODE_test.md")
# Test .neuroloraignore file
assert collector.should_ignore_file(project_root / ".neuroloraignore")
# Test file with permission error
no_access_file = project_root / "no_access.txt"
no_access_file.write_text("Test content")
os.chmod(no_access_file, 0o000)
# On some systems, we might still be able to stat the file
# even without read permissions. So we'll just verify that
# should_ignore_file handles it correctly in either case
try:
no_access_file.stat()
# If we can stat, we should still be able to check size
assert collector.should_ignore_file(no_access_file) == (
no_access_file.stat().st_size > 1024 * 1024
)
except PermissionError:
# If we can't stat, it should be ignored
assert collector.should_ignore_file(no_access_file)
os.chmod(no_access_file, 0o666)
no_access_file.unlink()
# Test directory patterns
assert collector.should_ignore_file(
project_root / "node_modules" / "deep" / "test.js"
)
assert collector.should_ignore_file(project_root / "dist" / "index.html")
# Test file outside project root
outside_file = project_root.parent / "outside.py"
outside_file.touch()
assert not collector.should_ignore_file(outside_file)
outside_file.unlink()
def test_collect_files_error_handling(project_root: Path) -> None:
"""Test error handling in collect_files."""
collector = CodeCollector(project_root)
# Test permission error
no_access_dir = project_root / "no_access"
no_access_dir.mkdir()
no_access_file = no_access_dir / "test.py"
no_access_file.write_text("Test content")
with patch(
"pathlib.Path.stat", create_autospec(Path.stat, return_value=None)
):
os.chmod(no_access_dir, 0o000)
files = collector.collect_files(str(no_access_dir))
assert files == []
os.chmod(no_access_dir, 0o777)
no_access_file.unlink()
no_access_dir.rmdir()
# Test invalid path
files = collector.collect_files("\0invalid") # Invalid path character
assert files == [] # Should return empty list for invalid paths
# Test collecting from multiple paths with some failing
test_file = project_root / "test.py"
test_file.write_text("Test content")
files = collector.collect_files(
[str(test_file), "nonexistent", "\0invalid"]
)
assert len(files) == 1
assert test_file in files
test_file.unlink()
def test_collect_files_sorting(project_root: Path) -> None:
"""Test file sorting in collect_files."""
collector = CodeCollector(project_root)
# Create test files
files = ["b.py", "a.py", "PROJECT_SUMMARY.md", "src/test.py", "README.md"]
for file in files:
path = project_root / file
path.parent.mkdir(exist_ok=True)
path.write_text("Test content")
collected = collector.collect_files(str(project_root))
# PROJECT_SUMMARY.md should be first
assert collected[0].name == "PROJECT_SUMMARY.md"
# Other files should be sorted alphabetically
sorted_names = [f.name for f in collected[1:]]
assert sorted_names == ["README.md", "a.py", "b.py", "test.py"]
# Cleanup
for file in files:
path = project_root / file
path.unlink()
if path.parent != project_root:
path.parent.rmdir()