codebrain_scan_file
Generate or refresh a .brain summary file for a source file by comparing SHA256 hashes, validating output format, and retrying once on failure.
Instructions
Generate or refresh the <path>.brain summary file for a source file.
Reads the source at path, computes its SHA256, and compares to the
existing .brain file's source_hash frontmatter. If they match and
force is false, generation is skipped. Otherwise Qwen produces a new
brain file (Purpose / Key exports / Collaborators / Gotchas /
Conventions), the output is validated against the format spec, and on
validation failure one retry with a sharper instruction is attempted
before giving up. No partial or broken brain files are ever written.
Format spec: .spec/brain-file-format.md.
Args: path: Path to the source file to summarise. force: If true, regenerate even when the hash matches.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| path | Yes | ||
| force | No |
Output Schema
| Name | Required | Description | Default |
|---|---|---|---|
| result | Yes |
Implementation Reference
- codebrain/server.py:163-181 (registration)MCP tool registration decorator for 'codebrain_scan_file'. Defines the tool's name, docstring/schema, and delegates to brain_scanner.scan_file().
@mcp.tool() async def codebrain_scan_file(path: str, force: bool = False) -> str: """Generate or refresh the `<path>.brain` summary file for a source file. Reads the source at `path`, computes its SHA256, and compares to the existing `.brain` file's `source_hash` frontmatter. If they match and `force` is false, generation is skipped. Otherwise Qwen produces a new brain file (Purpose / Key exports / Collaborators / Gotchas / Conventions), the output is validated against the format spec, and on validation failure one retry with a sharper instruction is attempted before giving up. No partial or broken brain files are ever written. Format spec: `.spec/brain-file-format.md`. Args: path: Path to the source file to summarise. force: If true, regenerate even when the hash matches. """ return await brain_scanner.scan_file(path, force=force) - codebrain/server.py:163-181 (handler)The async handler function for the tool. Takes 'path' (str) and optional 'force' (bool), returns a status string. Delegates to brain_scanner.scan_file().
@mcp.tool() async def codebrain_scan_file(path: str, force: bool = False) -> str: """Generate or refresh the `<path>.brain` summary file for a source file. Reads the source at `path`, computes its SHA256, and compares to the existing `.brain` file's `source_hash` frontmatter. If they match and `force` is false, generation is skipped. Otherwise Qwen produces a new brain file (Purpose / Key exports / Collaborators / Gotchas / Conventions), the output is validated against the format spec, and on validation failure one retry with a sharper instruction is attempted before giving up. No partial or broken brain files are ever written. Format spec: `.spec/brain-file-format.md`. Args: path: Path to the source file to summarise. force: If true, regenerate even when the hash matches. """ return await brain_scanner.scan_file(path, force=force) - codebrain/brain_scanner.py:249-323 (handler)Core scan_file() implementation: reads source file, computes SHA256 hash, checks existing .brain file (skip-gate logic), calls AI model via chat(), validates output with retry, assembles and writes the .brain frontmatter + sections.
async def scan_file( path: str, force: bool = False, model: str | None = None, ) -> str: """Generate or refresh the `.brain` file for `path`. Returns a human-readable status line starting with `skipped`, `generated`, or `[codebrain error]`. """ source = Path(path) try: source_bytes = source.read_bytes() except FileNotFoundError: return f"[codebrain error] source file not found: {path}" except OSError as exc: return f"[codebrain error] cannot read source {path}: {exc}" if len(source_bytes.strip()) < MIN_SOURCE_CHARS: return f"skipped (source too small): {path}" source_hash = compute_source_hash(source_bytes) brain_path = source.with_name(source.name + ".brain") if not force: existing = parse_existing_brain(brain_path) if existing is not None and existing.get("source_hash") == source_hash: return f"skipped (unchanged): {brain_path}" if existing is not None: existing_model = str(existing.get("model") or "") if existing_model and not existing_model.lower().startswith("qwen"): return f"skipped (foreign-model brain preserved, model={existing_model!r}): {brain_path}" try: source_content = source_bytes.decode("utf-8") except UnicodeDecodeError: return f"[codebrain error] source is not UTF-8 text: {path}" mtime_dt = dt.datetime.fromtimestamp(source.stat().st_mtime, tz=dt.timezone.utc) source_mtime = mtime_dt.strftime("%Y-%m-%dT%H:%M:%SZ") used_model = model or DEFAULT_MODEL display_path = resolve_display_path(source) few_shot = _load_few_shot() system = build_system_prompt(few_shot) user_prompt = build_user_prompt(display_path, source_content) try: output = await chat(user_prompt, system=system) except BackendError as exc: return f"[codebrain error] {exc}" output = strip_wrapper_fences(output) ok, reason = validate_sections(output) if not ok: retry_prompt = user_prompt + "\n\n" + RETRY_INSTRUCTION.format(reason=reason) try: output = await chat(retry_prompt, system=system) except BackendError as exc: return f"[codebrain error] {exc}" output = strip_wrapper_fences(output) ok, reason = validate_sections(output) if not ok: return f"[codebrain error] validation failed after retry: {reason}" frontmatter = { "source": display_path, "source_hash": source_hash, "source_mtime": source_mtime, "model": used_model, "generated_at": dt.datetime.now(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), } brain_content = assemble_brain_file(output, frontmatter) brain_path.write_text(brain_content, encoding="utf-8") return f"generated: {brain_path}" - codebrain/brain_scanner.py:120-165 (helper)validate_brain_output() — validates frontmatter completeness, section presence/order/non-emptiness, and optional value matching. Used to validate AI output.
def validate_brain_output( text: str, expected: dict | None = None ) -> tuple[bool, str]: """Check frontmatter completeness, section presence/order/non-emptiness. When `expected` is given, also verify that `source`, `source_hash` and `model` match — Qwen sometimes ignores the "use these values exactly" instruction and substitutes placeholders. """ m = FRONTMATTER_RE.match(text) if not m: return False, "missing or malformed frontmatter block" try: fm = yaml.safe_load(m.group(1)) except yaml.YAMLError as exc: return False, f"frontmatter YAML parse error: {exc}" if not isinstance(fm, dict): return False, "frontmatter is not a YAML mapping" missing = REQUIRED_FRONTMATTER_KEYS - set(fm.keys()) if missing: return False, f"frontmatter missing required keys: {sorted(missing)}" if expected is not None: for key in VALUE_CHECKED_FRONTMATTER_KEYS: if fm.get(key) != expected.get(key): return False, ( f"frontmatter {key!r} mismatch: got {fm.get(key)!r}, " f"expected {expected.get(key)!r}" ) positions: list[int] = [] cursor = m.end() for header in SECTION_HEADERS: idx = text.find(header, cursor) if idx == -1: return False, f"missing or out-of-order section {header!r}" positions.append(idx) cursor = idx + len(header) for i, header in enumerate(SECTION_HEADERS): start = positions[i] + len(header) end = positions[i + 1] if i + 1 < len(positions) else len(text) body = text[start:end].strip() if not body: return False, f"section {header!r} is empty" return True, "" - codebrain/brain_scanner.py:209-217 (helper)assemble_brain_file() — prepends YAML frontmatter to sections body to produce the final .brain file content.
def assemble_brain_file(sections: str, frontmatter: dict) -> str: """Prepend a YAML frontmatter block to the sections body and return one brain file.""" fm_yaml = yaml.safe_dump( frontmatter, sort_keys=False, allow_unicode=True ).strip() body = sections.lstrip() if not body.endswith("\n"): body += "\n" return f"---\n{fm_yaml}\n---\n\n{body}"