#!/usr/bin/env python3
"""
Generate mechanically-verifiable "gold" env var usage from Python sources.
This extractor intentionally supports multiple patterns:
- os.getenv("FOO")
- os.environ.get("FOO") / os.environ["FOO"]
- <envdict>.get("FOO") / <envdict>["FOO"] (default envdict names: env, environ)
Usage:
uv run python bench/mech/envvar_gold.py \
--globs "rlm_mcp_server/**/*.py" \
--out /tmp/gold_envvars.json
"""
from __future__ import annotations
import argparse
import ast
import glob
import json
import os
import re
from dataclasses import dataclass, asdict
from typing import Any, Iterable, List, Optional, Set, Tuple
ENVVAR_RE = re.compile(r"^[A-Z][A-Z0-9_]{2,}$")
@dataclass(frozen=True)
class Occurrence:
var: str
path: str
lineno: int
col: int
kind: str # getenv | environ_get | environ_subscript | envdict_get | envdict_subscript
def _iter_py_files(globs_: List[str]) -> List[str]:
files: Set[str] = set()
for g in globs_:
for p in glob.glob(g, recursive=True):
if os.path.isfile(p) and p.endswith(".py"):
files.add(p)
return sorted(files)
def _get_str(node: ast.AST) -> Optional[str]:
if isinstance(node, ast.Constant) and isinstance(node.value, str):
return node.value
return None
def _is_os_name(node: ast.AST) -> bool:
return isinstance(node, ast.Name) and node.id == "os"
def _is_os_environ_attr(node: ast.AST) -> bool:
# os.environ
return (
isinstance(node, ast.Attribute)
and node.attr == "environ"
and _is_os_name(node.value)
)
def _is_attr_call(node: ast.AST, attr: str) -> bool:
return isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute) and node.func.attr == attr
def _match_envdict_get(call: ast.Call, envdict_names: Set[str]) -> Optional[Tuple[str, str]]:
"""
Match env.get("FOO") where env is one of envdict_names.
Returns (kind, var) or None.
"""
if not isinstance(call.func, ast.Attribute) or call.func.attr != "get":
return None
base = call.func.value
if not (isinstance(base, ast.Name) and base.id in envdict_names):
return None
if not call.args:
return None
var = _get_str(call.args[0])
if var and ENVVAR_RE.match(var):
return ("envdict_get", var)
return None
def _match_os_getenv(call: ast.Call) -> Optional[Tuple[str, str]]:
# os.getenv("FOO")
if not isinstance(call.func, ast.Attribute) or call.func.attr != "getenv":
return None
if not _is_os_name(call.func.value):
return None
if not call.args:
return None
var = _get_str(call.args[0])
if var and ENVVAR_RE.match(var):
return ("getenv", var)
return None
def _match_os_environ_get(call: ast.Call) -> Optional[Tuple[str, str]]:
# os.environ.get("FOO")
if not isinstance(call.func, ast.Attribute) or call.func.attr != "get":
return None
base = call.func.value
if not _is_os_environ_attr(base):
return None
if not call.args:
return None
var = _get_str(call.args[0])
if var and ENVVAR_RE.match(var):
return ("environ_get", var)
return None
def _match_subscript(node: ast.Subscript, envdict_names: Set[str]) -> Optional[Tuple[str, str]]:
# env["FOO"] or os.environ["FOO"]
key = node.slice
# py3.9+: slice is an AST expr
var = _get_str(key)
if not var or not ENVVAR_RE.match(var):
return None
if isinstance(node.value, ast.Name) and node.value.id in envdict_names:
return ("envdict_subscript", var)
if _is_os_environ_attr(node.value):
return ("environ_subscript", var)
return None
def _scan_file(path: str, envdict_names: Set[str]) -> List[Occurrence]:
txt = open(path, "r", encoding="utf-8").read()
tree = ast.parse(txt, filename=path)
occ: List[Occurrence] = []
for node in ast.walk(tree):
if isinstance(node, ast.Call):
for matcher in (_match_os_getenv, _match_os_environ_get):
m = matcher(node)
if m:
kind, var = m
occ.append(Occurrence(var=var, path=path, lineno=node.lineno, col=node.col_offset, kind=kind))
break
else:
m2 = _match_envdict_get(node, envdict_names)
if m2:
kind, var = m2
occ.append(Occurrence(var=var, path=path, lineno=node.lineno, col=node.col_offset, kind=kind))
elif isinstance(node, ast.Subscript):
m3 = _match_subscript(node, envdict_names)
if m3:
kind, var = m3
occ.append(Occurrence(var=var, path=path, lineno=node.lineno, col=node.col_offset, kind=kind))
return occ
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--globs", nargs="+", required=True)
ap.add_argument("--out", required=True)
ap.add_argument("--envdict-names", nargs="*", default=["env", "environ"])
args = ap.parse_args()
envdict_names = set(args.envdict_names)
files = _iter_py_files(args.globs)
all_occ: List[Occurrence] = []
for f in files:
all_occ.extend(_scan_file(f, envdict_names))
env_vars = sorted({o.var for o in all_occ})
payload: dict[str, Any] = {
"env_vars": env_vars,
"occurrences": [asdict(o) for o in sorted(all_occ, key=lambda x: (x.var, x.path, x.lineno, x.col, x.kind))],
"stats": {"files_scanned": len(files), "occurrences": len(all_occ)},
}
os.makedirs(os.path.dirname(args.out) or ".", exist_ok=True)
with open(args.out, "w", encoding="utf-8") as f:
json.dump(payload, f, indent=2, sort_keys=True)
f.write("\n")
print(f"Wrote {len(env_vars)} env vars to {args.out}")
if __name__ == "__main__":
main()