System Initiative

Official

Overview Schema Related Servers Score Discussions

si
prelude
apple
tools
selective_debugging

scrubber.py•8.26 KiB

# Copyright (c) Meta Platforms, Inc. and affiliates. # # This source code is licensed under both the MIT license found in the # LICENSE-MIT file in the root directory of this source tree and the Apache # License, Version 2.0 found in the LICENSE-APACHE file in the root directory # of this source tree. # pyre-strict import json import os import shutil import subprocess from typing import BinaryIO, Callable, List, Optional, Set, Tuple from apple.tools.re_compatibility_utils.writable import make_path_user_writable from .macho import Symbol from .macho_parser import load_commands, load_debug_symbols, load_header from .spec import Spec from .utils import MachOException FAKE_PATH = b"fake/path" # buck-out/isolation_dir/gen/project_cell/{hash}/.... NUM_OF_COMPONENTS_IN_BUCK2_OUTPUT_PATH_BEFORE_PROJECT_PATH = 5 def _always_scrub(_: str) -> bool: return True # Visible for testing def load_focused_targets_output_paths(json_file_path: str) -> Set[str]: if json_file_path is None or not os.path.exists(json_file_path): return set() with open(json_file_path, "r") as f: content = f.read() if not content: return set() data = json.loads(content) output_paths = set() for target in data["targets"]: _, package_and_name = target.split("//") package, name = package_and_name.split(":") # This assumes the output path created by buck2, which if # modified, would break this logic. output_directory = f"{package}/__{name}__" output_paths.add(output_directory) return output_paths # This function converts buck-out/isolation_dir/gen/project_cell/{hash}/X/Y/__name__/libFoo.a # into X/Y/__name__ to match the focus target output path created by load_focused_targets_output_paths # Visible for testing def _get_target_output_path_from_debug_file_path( debug_target_path: str, ) -> str: # This function assumes the debug file path created by buck2 in one of the following formats: # buck-out/isolation_dir/gen/project_cell/{hash}/.../__name__/libFoo.a # buck-out/isolation_dir/gen/project_cell/{hash}/.../__name__/__objects__/bar.o # buck-out/isolation_dir/gen/project_cell/{hash}/.../__name__/swift_object_file.o parts = debug_target_path.split("/") # We are doing the traverse in reverse order because this way we'll find the first # target directory sooner. _should_scrub can get called many times, so it's # important that we make it as efficient as possible. i = 1 while i <= len(parts): # We are looking for the path component that matches __name__ in the examples above, and if we don't explicitly skip __objects__, we'll get the wrong path if ( parts[-i].startswith("__") and parts[-i].endswith("__") and parts[-i] != "__objects__" ): break i += 1 if i > len(parts): raise Exception( f"Unrecognized format for debug file path : {debug_target_path}" ) return "/".join( parts[NUM_OF_COMPONENTS_IN_BUCK2_OUTPUT_PATH_BEFORE_PROJECT_PATH : -i + 1] ) # Visible for testing def should_scrub_with_focused_targets_output_paths( focused_targets_output_paths: Set[str], debug_file_path: str ) -> bool: # All paths to be scrubbed when no focused target is specified if len(focused_targets_output_paths) == 0: return True # debug_file_path usually have the format x/y/z/libFoo.a(bar.m.o) if "(" in debug_file_path: debug_target_path = debug_file_path.split("(")[0] else: debug_target_path = debug_file_path if debug_file_path.startswith("buck-out/"): target_output_path = _get_target_output_path_from_debug_file_path( debug_target_path ) return target_output_path not in focused_targets_output_paths else: # occasionally archive file can be directly from source. (package, name) = os.path.split(debug_file_path) while package != "": if f"{package}/__{name}__" in focused_targets_output_paths: return False (package, name) = os.path.split(package) return True def _should_scrub_with_targets_file( json_file_path: str, additional_labels: Set[str] ) -> Callable[[str], bool]: focused_targets_output_paths = load_focused_targets_output_paths(json_file_path) return lambda debug_file_path: should_scrub_with_focused_targets_output_paths( focused_targets_output_paths.union(additional_labels), debug_file_path ) def _should_scrub_with_spec_file( json_file_path: str, additional_labels: Set[str] ) -> Callable[[str], bool]: spec = Spec(json_file_path) return lambda debug_file_path: should_scrub_with_focused_targets_output_paths( additional_labels, debug_file_path ) and spec.scrub_debug_file_path(debug_file_path) def _scrub( f: BinaryIO, strtab_offset: int, symbols: List[Symbol], scrub_handler: Callable[[str], bool], ) -> List[Tuple[str, str]]: """ Return a list of tuples. Each tuple contains a pair of the original path and the rewritten path """ results = [] for symbol in symbols: f.seek(strtab_offset) f.seek(symbol.strtab_index, 1) # Read a byte at a time until we reach the end of the path, denoted by Hex 0. start = end = f.tell() path = b"" b = f.read(1) while b != b"\x00": path += b end += 1 b = f.read(1) str_len = end - start path_str = path.decode() if scrub_handler(path_str): f.seek(start) # We don't want to modify the length of the path, so pad the replacement # path with spaces buffer = FAKE_PATH + b" " * (str_len - len(FAKE_PATH)) f.write(buffer) results.append((path_str, buffer.decode())) else: results.append((path_str, path_str)) return results def scrub( input_file: str, output_file: str, persisted_targets_file: str, targets_file: Optional[str] = None, spec_file: Optional[str] = None, adhoc_codesign_tool: Optional[str] = None, ) -> List[Tuple[str, str]]: additional_labels = load_focused_targets_output_paths(persisted_targets_file) if targets_file and spec_file: raise Exception( "Only one of a targets file or spec file is supported, not both!" ) elif targets_file: scrub_handler = _should_scrub_with_targets_file(targets_file, additional_labels) elif spec_file: scrub_handler = _should_scrub_with_spec_file(spec_file, additional_labels) else: scrub_handler = _always_scrub shutil.copy2(input_file, output_file) # Make it RE-compatible make_path_user_writable(output_file) results = [] with open(output_file, "r+b") as f: header, offset = load_header(f, 0) if not header.is_valid: raise MachOException("Invalid macho format!") lc_linkedit, lc_symtab = load_commands(f, offset, header.n_cmds) if lc_linkedit is None: return [] if lc_symtab is None: raise MachOException("LC_SYMTAB command not found") if lc_symtab.strtab_size == 0 or lc_symtab.n_symbols == 0: return [] if lc_linkedit.file_size == 0: raise MachOException("LC_SEGMENT_64 command for string table not found") f.seek(lc_symtab.strtab_offset) """ ld64 deliberately burns the first byte with the space character, so that zero is never a valid string index and writes 0x00 at offset 1, so that it's always the empty string. The code for this in ld64 is in LinkEditClassic.hpp (StringPoolAtom::StringPoolAtom). """ if f.read(1) != b"\x20": raise MachOException("First character in the string table is not a space") if f.read(1) != b"\x00": raise MachOException("Second character in the string table is not a NUL") symbols = load_debug_symbols(f, lc_symtab.symtab_offset, lc_symtab.n_symbols) results = _scrub(f, lc_symtab.strtab_offset, symbols, scrub_handler) if adhoc_codesign_tool: subprocess.run( [adhoc_codesign_tool, "--binary", output_file], check=True, ) return results

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/systeminit/si'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

scrubber.py•8.26 KiB