from __future__ import annotations
import argparse
import json
import os
from typing import Any, Dict, List
from common_json import load_answer_from_artifact, extract_first_json, sha256_text
def main():
ap = argparse.ArgumentParser(description="Verify evidence items: excerpt exists in file and hash matches.")
ap.add_argument("--repo-root", default=".", help="Repo root")
ap.add_argument("--pred", required=True, help="Model output artifact (.txt/.json or directory)")
ap.add_argument("--out", default=None, help="Write report JSON here")
args = ap.parse_args()
raw, parsed = load_answer_from_artifact(args.pred)
if parsed is None:
try:
parsed = extract_first_json(raw)
except Exception:
parsed = None
report = {"parse_ok": 1 if parsed is not None else 0, "items_total": 0, "items_ok": 0, "items_bad": 0, "bad": []}
if not isinstance(parsed, list):
report["bad"].append({"reason": "expected top-level JSON array of evidence items"})
report["items_bad"] = 1
report["items_total"] = 0
else:
report["items_total"] = len(parsed)
for i, item in enumerate(parsed):
ok = True
reason = []
if not isinstance(item, dict):
ok = False
reason.append("item not object")
else:
path = item.get("path")
excerpt = item.get("excerpt")
h = item.get("hash") or item.get("excerpt_hash") or item.get("sha256")
if not isinstance(path, str) or not path:
ok = False; reason.append("missing path")
if not isinstance(excerpt, str) or not excerpt:
ok = False; reason.append("missing excerpt")
if not isinstance(h, str) or not h:
ok = False; reason.append("missing hash")
if ok:
abs_path = os.path.join(args.repo_root, path)
if not os.path.isfile(abs_path):
ok = False; reason.append(f"file not found: {path}")
else:
file_text = open(abs_path, "r", encoding="utf-8", errors="replace").read()
if excerpt not in file_text:
# Optional: allow line-range reconstruction
sl = item.get("start_line")
el = item.get("end_line")
if isinstance(sl, int) and isinstance(el, int) and 1 <= sl <= el:
lines = file_text.splitlines()
snippet = "\n".join(lines[sl-1:el])
if snippet != excerpt:
ok = False; reason.append("excerpt not found and line-range mismatch")
else:
ok = False; reason.append("excerpt not found in file")
if sha256_text(excerpt) != h:
ok = False; reason.append("hash mismatch")
if ok:
report["items_ok"] += 1
else:
report["items_bad"] += 1
report["bad"].append({"index": i, "reasons": reason, "item": item})
if args.out:
with open(args.out, "w", encoding="utf-8") as f:
json.dump(report, f, indent=2, ensure_ascii=False)
print(json.dumps(report, indent=2, ensure_ascii=False))
if __name__ == "__main__":
main()