Relace MCP Server

file_io.py•8.82 KiB

import logging import os import uuid from pathlib import Path from charset_normalizer import from_bytes from .exceptions import EncodingDetectionError logger = logging.getLogger(__name__) # Module-level state for project encoding (set by server at startup) _project_encoding: str | None = None # Encodings that are effectively UTF-8 for our purposes _UTF8_COMPATIBLE = frozenset({"utf-8", "utf-8-sig", "ascii", "us-ascii"}) def set_project_encoding(encoding: str | None) -> None: """Set the project-level default encoding. Called by server during initialization after encoding detection. Args: encoding: The detected or configured project encoding (e.g., "gbk"). """ global _project_encoding _project_encoding = encoding.lower() if encoding else None if _project_encoding: logger.info("Project encoding set to: %s", _project_encoding) def get_project_encoding() -> str | None: """Get the current project-level encoding.""" return _project_encoding def _looks_like_binary(data: bytes, sample_size: int = 8192) -> bool: """Check if data looks like binary (non-text) content. Uses null byte detection and high ratio of non-printable characters. Args: data: Raw bytes to check. sample_size: Number of bytes to sample. Returns: True if data appears to be binary. """ if not data: return False sample = data[:sample_size] # Null bytes are a strong indicator of binary if b"\x00" in sample: return True # Check ratio of non-text bytes (excluding common control chars) # Allow: tab (0x09), newline (0x0a), carriage return (0x0d), and printable ASCII non_text_count = 0 for byte in sample: if byte < 0x09 or (0x0E <= byte < 0x20 and byte != 0x1B): # Allow ESC for ANSI non_text_count += 1 # If more than 30% non-text bytes, likely binary if len(sample) > 0 and non_text_count / len(sample) > 0.30: return True return False def _detect_declared_encoding(path: Path, raw: bytes) -> str | None: """Detect a file-declared encoding (when available). For Python files, prefer the PEP 263 coding cookie / BOM via tokenize.detect_encoding. Returns None when no declaration is found or detection fails. """ if path.suffix.lower() not in {".py", ".pyi"}: return None # tokenize.detect_encoding is the canonical implementation for Python source encoding. # It handles UTF-8 BOM and coding cookies on the first two lines. try: import io import tokenize encoding, _ = tokenize.detect_encoding(io.BytesIO(raw).readline) return encoding.lower() if encoding else None except Exception: return None def decode_text_with_fallback( raw: bytes, *, path: Path | None = None, preferred_encoding: str | None = None, min_coherence: float = 0.5, ) -> tuple[str, str]: """Decode bytes into text with best-effort encoding detection. Strategy (ordered): 1) Reject likely-binary files 2) Honor declared encoding for Python source (PEP 263) 3) UTF-8 (common default) 4) charset_normalizer detection (handles GBK/Big5/etc reliably vs naive fallbacks) 5) Preferred encoding as last resort (if set) Returns: (decoded_text, encoding_name_lowercase) Raises: EncodingDetectionError: When decoding cannot be performed confidently. """ if _looks_like_binary(raw): raise EncodingDetectionError(str(path) if path else "<bytes>") preferred = preferred_encoding.lower() if preferred_encoding else None declared = _detect_declared_encoding(path, raw) if path else None if declared and declared not in _UTF8_COMPATIBLE: try: return raw.decode(declared), declared except (UnicodeDecodeError, LookupError): # Fall through to other strategies pass # Fast path: UTF-8 try: text = raw.decode("utf-8") # If the project encoding is configured and the file is ASCII-only, preserve # the project encoding to keep newly written non-ASCII content consistent. if preferred and preferred not in _UTF8_COMPATIBLE and text.isascii(): return text, preferred return text, "utf-8" except UnicodeDecodeError: pass def encoding_family(enc: str) -> str: e = enc.lower() if e.startswith("gb") or e in {"hz-gb-2312", "hz"}: return "gb" if "big5" in e or e in {"cp950"}: return "big5" return "other" preferred_text: str | None = None if preferred and preferred not in _UTF8_COMPATIBLE: try: preferred_text = raw.decode(preferred) except (UnicodeDecodeError, LookupError): preferred_text = None # Robust path: charset_normalizer (helps distinguish GBK vs Big5 without naive fallbacks) result = from_bytes(raw) best = result.best() if best is not None and best.encoding: best_enc = best.encoding.lower() best_ok = best.coherence >= min_coherence if preferred_text is not None: # Type narrowing: preferred_text is not None implies preferred is not None # (due to the check at line 149: `if preferred and preferred not in _UTF8_COMPATIBLE`) assert preferred is not None # nosec B101 - type narrowing only # When a project encoding is explicitly configured/detected, prefer it unless # charset_normalizer is strongly confident it's a different encoding family. if encoding_family(preferred) == encoding_family(best_enc): return preferred_text, preferred override_ok = best.coherence >= max(min_coherence, 0.5) if best_ok and override_ok: return str(best), best_enc return preferred_text, preferred if best_ok: return str(best), best_enc # Last resort: preferred encoding (if configured/detected) if preferred_text is not None: assert preferred is not None # nosec B101 - type narrowing only return preferred_text, preferred raise EncodingDetectionError(str(path) if path else "<bytes>") def decode_text_best_effort( raw: bytes, *, path: Path | None = None, preferred_encoding: str | None = None, errors: str = "replace", ) -> str | None: """Decode bytes into text for display/search/sync. Returns None for likely-binary content. Never raises due to decoding. """ if _looks_like_binary(raw): return None try: text, _ = decode_text_with_fallback( raw, path=path, preferred_encoding=preferred_encoding, min_coherence=0.2, ) return text except EncodingDetectionError: # As a last resort, return a lossy UTF-8 decode for robustness. return raw.decode("utf-8", errors=errors) def read_text_with_fallback(path: Path) -> tuple[str, str]: """Read text file with automatic encoding detection. Strategy: 1) Reject likely-binary files (fast heuristic) 2) Honor Python source declarations (PEP 263) 3) Try UTF-8 4) Use charset_normalizer detection for legacy encodings (GBK/Big5/etc) 5) Prefer configured project encoding when appropriate Args: path: File path. Returns: (content, encoding) tuple. Raises: EncodingDetectionError: If encoding cannot be detected or file is not text. """ raw = path.read_bytes() return decode_text_with_fallback( raw, path=path, preferred_encoding=_project_encoding, # Be tolerant here: binary files are filtered earlier, and short source files # can yield very low coherence scores even when the encoding is correct. min_coherence=0.0, ) def atomic_write(path: Path, content: str, encoding: str) -> None: """Atomically write to file (using temp file + os.replace). Atomic write prevents file corruption if interrupted during write. Uses unique temp file names to avoid collisions during concurrent writes. Args: path: Target file path. content: Content to write. encoding: Encoding. Raises: OSError: Raised when write fails. """ # Use uuid to generate unique temp file name, avoiding concurrent write collisions unique_suffix = f".{uuid.uuid4().hex[:8]}.tmp" temp_path = path.with_suffix(path.suffix + unique_suffix) try: # Use open with newline='' to preserve original line endings on all platforms. # Without this, Windows would convert \n to \r\n in text mode. with temp_path.open("w", encoding=encoding, newline="") as f: f.write(content) # os.replace is atomic on POSIX systems os.replace(temp_path, path) except Exception: # Clean up temp file temp_path.unlink(missing_ok=True) raise

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/possible055/relace-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

file_io.py•8.82 KiB