#!/usr/bin/env python3
"""Deterministic fixture generator for RLM benchmark scoring.
Creates a synthetic mini-repo with known ground truth for:
- provider presets (name -> required_env_vars)
- environment variables referenced across the repo
It is designed to make scoring *mechanical* (set comparisons), not subjective.
Outputs:
<out_dir>/repo/ synthetic repository
<out_dir>/gold.json ground truth
<out_dir>/query.txt deterministic query that asks for strict JSON
<out_dir>/globs.txt suggested glob patterns for benchmark scripts
Example:
uv run python bench/fixtures/fixture_gen.py --out-dir /tmp/rlm_fixture --seed 1337
"""
from __future__ import annotations
import argparse
import json
import os
import random
import shutil
import string
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Iterable, List, Dict, Set
@dataclass(frozen=True)
class ProviderPresetGold:
name: str
required_env_vars: List[str]
@dataclass(frozen=True)
class Gold:
seed: int
env_vars: List[str]
provider_presets: List[ProviderPresetGold]
def _rand_ident(rng: random.Random, n: int = 8) -> str:
alphabet = string.ascii_lowercase + string.digits
return "".join(rng.choice(alphabet) for _ in range(n))
def _mk_env_name(prefix: str, rng: random.Random) -> str:
# env vars are typically uppercase + underscores
return f"{prefix}_{_rand_ident(rng, 10).upper()}"
def _write(p: Path, s: str) -> None:
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(s, encoding="utf-8")
def _json_dump(p: Path, obj: object) -> None:
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(json.dumps(obj, indent=2, sort_keys=True) + "\n", encoding="utf-8")
def _unique(seq: Iterable[str]) -> List[str]:
seen: Set[str] = set()
out: List[str] = []
for x in seq:
if x not in seen:
seen.add(x)
out.append(x)
return out
def build_fixture(
out_dir: Path,
*,
seed: int,
num_presets: int,
env_per_preset: int,
extra_env_calls: int,
decoy_env_strings: int,
) -> Gold:
rng = random.Random(seed)
repo = out_dir / "repo"
if repo.exists():
shutil.rmtree(repo)
repo.mkdir(parents=True, exist_ok=True)
# --- 1) Define provider presets and required env vars (gold)
presets_gold: List[ProviderPresetGold] = []
all_env_vars: List[str] = []
for i in range(num_presets):
preset_name = f"preset_{i:02d}_{_rand_ident(rng, 6)}"
envs = [_mk_env_name("FIXTURE_PRESET", rng) for _ in range(env_per_preset)]
envs = sorted(_unique(envs))
presets_gold.append(ProviderPresetGold(name=preset_name, required_env_vars=envs))
all_env_vars.extend(envs)
# --- 2) Add extra env vars referenced in code (not tied to presets)
extra_envs = [_mk_env_name("FIXTURE_MISC", rng) for _ in range(extra_env_calls)]
all_env_vars.extend(extra_envs)
# --- 3) Add decoy strings that look like env vars but are NOT runtime-used
# These are intentionally *not* included in gold.env_vars.
decoys = [_mk_env_name("FIXTURE_DECOY", rng) for _ in range(decoy_env_strings)]
# --- 4) Materialize repository files
# 4a) JSON config containing presets (easy for grep)
presets_json = {
"provider_presets": [
{
"name": p.name,
"required_env_vars": p.required_env_vars,
"base_url": f"https://api.example.com/{p.name}",
}
for p in presets_gold
]
}
_json_dump(repo / "config" / "provider_presets.json", presets_json)
# 4b) Python config containing presets (more realistic)
py_lines = [
"# Auto-generated fixture file.\n",
"from __future__ import annotations\n\n",
"PROVIDER_PRESETS = {\n",
]
for p in presets_gold:
py_lines.append(
f" {p.name!r}: {{'required_env_vars': {p.required_env_vars!r}, 'timeout_s': 30}},\n"
)
py_lines.append("}\n")
_write(repo / "rlm_mcp_server" / "providers" / "presets.py", "".join(py_lines))
# 4c) Python modules that *use* env vars via os.getenv / os.environ.get / os.environ[]
env_call_templates = [
"os.getenv({name!r})",
"os.environ.get({name!r})",
"os.environ[{name!r}] if {name!r} in os.environ else None",
]
modules_dir = repo / "app"
modules_dir.mkdir(parents=True, exist_ok=True)
# Mix preset envs + extra envs in actual code usage
runtime_envs = [*all_env_vars]
rng.shuffle(runtime_envs)
# Create a few files with actual env calls
chunks: List[List[str]] = []
chunk_size = max(1, len(runtime_envs) // max(1, min(10, len(runtime_envs))))
for i in range(0, len(runtime_envs), chunk_size):
chunks.append(runtime_envs[i : i + chunk_size])
for idx, envs_in_file in enumerate(chunks):
lines = [
"from __future__ import annotations\n",
"import os\n\n",
"def load_config() -> dict:\n",
" out = {}\n",
]
for e in envs_in_file:
tmpl = rng.choice(env_call_templates)
lines.append(f" out[{e!r}] = {tmpl.format(name=e)}\n")
# Add decoy strings in a docstring/comment to tempt naive extraction.
if decoys:
take = decoys[: max(1, len(decoys) // max(1, len(chunks)))]
del decoys[: len(take)]
lines.append("\n\n# Decoys: these are strings only; NOT used via os.getenv/os.environ.get\n")
for d in take:
lines.append(f"# DO_NOT_INCLUDE {d}\n")
lines.append("\n return out\n")
_write(modules_dir / f"module_{idx:02d}.py", "".join(lines))
# 4d) A small README to make the query feel realistic
readme = (
"# Fixture Repo\n\n"
"This repo is auto-generated for deterministic benchmarking.\n\n"
"Important: some strings look like env vars but are decoys.\n"
)
_write(repo / "README.md", readme)
# --- 5) Emit gold + query + globs
gold = Gold(
seed=seed,
env_vars=sorted(_unique(all_env_vars)),
provider_presets=sorted(presets_gold, key=lambda p: p.name),
)
_json_dump(out_dir / "gold.json", {
"seed": gold.seed,
"env_vars": gold.env_vars,
"provider_presets": [asdict(p) for p in gold.provider_presets],
})
query = (
"Task: Inspect the repository files and return STRICT JSON (no prose, no markdown).\n"
"\n"
"Find ALL provider presets and required environment variables.\n"
"Provider presets are defined in:\n"
"- config/provider_presets.json (field provider_presets[])\n"
"- rlm_mcp_server/providers/presets.py (PROVIDER_PRESETS dict)\n"
"\n"
"Also find ALL environment variable names that are actually used at runtime in Python\n"
"(i.e., referenced via os.getenv(...), os.environ.get(...), or os.environ[...]).\n"
"Ignore decoys in comments/docstrings (lines containing DO_NOT_INCLUDE).\n"
"\n"
"Return JSON object with this schema:\n"
"{\n"
" \"env_vars\": [\"ENV_A\", ...],\n"
" \"provider_presets\": [\n"
" {\"name\": \"...\", \"required_env_vars\": [\"ENV\", ...]},\n"
" ...\n"
" ]\n"
"}\n"
"\n"
"Constraints:\n"
"- env_vars must be unique and sorted ascending\n"
"- provider_presets sorted by name ascending\n"
"- required_env_vars sorted ascending per preset\n"
)
_write(out_dir / "query.txt", query)
globs = [
str(repo / "**" / "*.py"),
str(repo / "**" / "*.json"),
str(repo / "README.md"),
]
_write(out_dir / "globs.txt", "\n".join(globs) + "\n")
return gold
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--out-dir", type=Path, required=True, help="Output directory for fixture")
ap.add_argument("--seed", type=int, default=1337)
ap.add_argument("--num-presets", type=int, default=10)
ap.add_argument("--env-per-preset", type=int, default=3)
ap.add_argument("--extra-env-calls", type=int, default=10)
ap.add_argument("--decoy-env-strings", type=int, default=20)
args = ap.parse_args()
out_dir: Path = args.out_dir
out_dir.mkdir(parents=True, exist_ok=True)
gold = build_fixture(
out_dir,
seed=args.seed,
num_presets=args.num_presets,
env_per_preset=args.env_per_preset,
extra_env_calls=args.extra_env_calls,
decoy_env_strings=args.decoy_env_strings,
)
print(f"Wrote fixture to: {out_dir}")
print(f"- repo: {out_dir / 'repo'}")
print(f"- gold: {out_dir / 'gold.json'}")
print(f"- query: {out_dir / 'query.txt'}")
print(f"- globs: {out_dir / 'globs.txt'}")
print(f"Env vars: {len(gold.env_vars)} | Presets: {len(gold.provider_presets)}")
return 0
if __name__ == "__main__":
raise SystemExit(main())