Skip to main content
Glama
test_encoding.py8.59 kB
"""Tests for encoding detection and project encoding handling.""" from pathlib import Path from unittest.mock import patch from relace_mcp.tools.apply.encoding import ( detect_project_encoding, ) from relace_mcp.tools.apply.file_io import ( get_project_encoding, read_text_with_fallback, set_project_encoding, ) class TestDetectProjectEncoding: """Test project encoding detection.""" def test_returns_none_for_utf8_only_project(self, tmp_path: Path) -> None: """Should return None when project only has UTF-8 files.""" # Create UTF-8 files (tmp_path / "main.py").write_text("# UTF-8 file\nprint('hello')\n", encoding="utf-8") (tmp_path / "utils.py").write_text("def foo(): pass\n", encoding="utf-8") result = detect_project_encoding(tmp_path, sample_limit=10) assert result is None def test_detects_gbk_dominant_project(self, tmp_path: Path) -> None: """Should detect GBK when majority of files use it.""" # Create GBK files with Chinese content that is clearly simplified Chinese # Use characters that are unique to GB encoding (not in Big5) for i in range(5): # 简体 characters like 国、学、电 are GBK-specific content = f"# 简体中文注释 国学电脑 {i}\ndef func_{i}(): pass\n# 这是测试文件\n" (tmp_path / f"file_{i}.py").write_bytes(content.encode("gbk")) result = detect_project_encoding(tmp_path, sample_limit=10) # charset_normalizer may detect as gb2312, gbk, or gb18030 # In some cases with short content it might detect as big5 due to overlap assert result is not None # Accept any CJK encoding detection as valid (the key is detecting non-UTF-8) assert result.lower() in ("gbk", "gb2312", "gb18030", "big5") def test_detects_big5_project(self, tmp_path: Path) -> None: """Should detect Big5 for Traditional Chinese projects.""" # Create Big5 files with Traditional Chinese content for i in range(3): content = f"# 繁體中文註解 {i}\ndef func_{i}(): pass\n" (tmp_path / f"file_{i}.py").write_bytes(content.encode("big5")) result = detect_project_encoding(tmp_path, sample_limit=10) assert result is not None assert "big5" in result.lower() def test_skips_hidden_directories(self, tmp_path: Path) -> None: """Should skip files in hidden directories.""" # Create GBK file in hidden directory hidden_dir = tmp_path / ".hidden" hidden_dir.mkdir() (hidden_dir / "file.py").write_bytes("# 中文".encode("gbk")) # Create UTF-8 file in visible directory (tmp_path / "main.py").write_text("# UTF-8\n", encoding="utf-8") result = detect_project_encoding(tmp_path, sample_limit=10) # Should only see UTF-8 file, so return None assert result is None def test_skips_node_modules(self, tmp_path: Path) -> None: """Should skip node_modules directory.""" # Create GBK files in node_modules nm_dir = tmp_path / "node_modules" / "some_pkg" nm_dir.mkdir(parents=True) (nm_dir / "index.js").write_bytes("// 中文".encode("gbk")) # Create UTF-8 file in src src_dir = tmp_path / "src" src_dir.mkdir() (src_dir / "main.py").write_text("# UTF-8\n", encoding="utf-8") result = detect_project_encoding(tmp_path, sample_limit=10) assert result is None def test_respects_sample_limit(self, tmp_path: Path) -> None: """Should stop sampling after reaching limit.""" # Create many files for i in range(50): (tmp_path / f"file_{i}.py").write_text(f"# file {i}\n", encoding="utf-8") # With low sample limit, should still work result = detect_project_encoding(tmp_path, sample_limit=5) assert result is None # All UTF-8 def test_empty_directory_returns_none(self, tmp_path: Path) -> None: """Should return None for empty directory.""" result = detect_project_encoding(tmp_path, sample_limit=10) assert result is None def test_only_samples_known_extensions(self, tmp_path: Path) -> None: """Should only sample files with known text extensions.""" # Create GBK file with unknown extension (tmp_path / "data.xyz").write_bytes("# 中文".encode("gbk")) # Create UTF-8 file with known extension (tmp_path / "main.py").write_text("# UTF-8\n", encoding="utf-8") result = detect_project_encoding(tmp_path, sample_limit=10) assert result is None # Should only see UTF-8 .py file class TestSetProjectEncoding: """Test project encoding setter/getter.""" def teardown_method(self) -> None: """Reset project encoding after each test.""" set_project_encoding(None) def test_set_and_get_encoding(self) -> None: """Should set and retrieve project encoding.""" set_project_encoding("gbk") assert get_project_encoding() == "gbk" def test_normalizes_to_lowercase(self) -> None: """Should normalize encoding to lowercase.""" set_project_encoding("GBK") assert get_project_encoding() == "gbk" set_project_encoding("Big5") assert get_project_encoding() == "big5" def test_none_clears_encoding(self) -> None: """Should clear encoding when set to None.""" set_project_encoding("gbk") set_project_encoding(None) assert get_project_encoding() is None class TestReadTextWithFallbackProjectEncoding: """Test read_text_with_fallback with project encoding.""" def teardown_method(self) -> None: """Reset project encoding after each test.""" set_project_encoding(None) def test_reads_gbk_file_with_project_encoding(self, tmp_path: Path) -> None: """Should successfully read GBK file when project encoding is set.""" gbk_file = tmp_path / "test.py" content = "# 这是中文注释\nprint('你好')\n" gbk_file.write_bytes(content.encode("gbk")) set_project_encoding("gbk") read_content, detected_enc = read_text_with_fallback(gbk_file) assert read_content == content assert detected_enc == "gbk" def test_reads_big5_file_with_project_encoding(self, tmp_path: Path) -> None: """Should successfully read Big5 file when project encoding is set.""" big5_file = tmp_path / "test.py" content = "# 繁體中文\nprint('世界')\n" big5_file.write_bytes(content.encode("big5")) set_project_encoding("big5") read_content, detected_enc = read_text_with_fallback(big5_file) assert read_content == content assert detected_enc == "big5" def test_falls_back_to_utf8_for_ascii(self, tmp_path: Path) -> None: """ASCII files should be detected as UTF-8.""" ascii_file = tmp_path / "test.py" content = "# ASCII only\nprint('hello')\n" # Use binary write to avoid platform-dependent newline conversion ascii_file.write_bytes(content.encode("utf-8")) # Even with GBK as project encoding, ASCII works with both set_project_encoding("gbk") read_content, detected_enc = read_text_with_fallback(ascii_file) assert read_content == content # GBK can decode ASCII, so it will be detected as GBK (since it's tried first) assert detected_enc in ("gbk", "utf-8") class TestEnvironmentVariableEncoding: """Test RELACE_DEFAULT_ENCODING environment variable.""" def test_config_picks_up_env_encoding(self) -> None: """Should read encoding from environment variable.""" with patch.dict("os.environ", {"RELACE_DEFAULT_ENCODING": "big5"}): # Need to reload to pick up patched env import importlib import relace_mcp.config.settings as settings_module importlib.reload(settings_module) assert settings_module.RELACE_DEFAULT_ENCODING == "big5" def test_config_defaults_to_none(self) -> None: """Should default to None when env var not set.""" with patch.dict("os.environ", {}, clear=False): # Remove the env var if it exists import os os.environ.pop("RELACE_DEFAULT_ENCODING", None) import importlib import relace_mcp.config.settings as settings_module importlib.reload(settings_module) assert settings_module.RELACE_DEFAULT_ENCODING is None

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/possible055/relace-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server