diff
Catch LLM cost regressions by comparing call sites between two git refs, showing added, removed, or changed sites with cost impact.
Instructions
Compare LLM costs between two git refs.
Shows which LLM call sites were added, removed, or changed between the base and head refs, along with the cost impact of those changes.
Args: base_ref: The base git ref (branch, tag, or commit) to compare from. head_ref: The head git ref to compare to. Defaults to HEAD.
Returns: JSON string with the diff results including cost changes.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| base_ref | Yes | ||
| head_ref | No | HEAD |
Output Schema
| Name | Required | Description | Default |
|---|---|---|---|
| result | Yes |
Implementation Reference
- src/tokentoll/mcp_server.py:71-86 (handler)MCP tool handler for 'diff'. Registered as @server.tool() wrapper function 'diff' that delegates to the CLI subprocess with --base, --head, and --format json arguments.
@server.tool() def diff(base_ref: str, head_ref: str = "HEAD") -> str: """Compare LLM costs between two git refs. Shows which LLM call sites were added, removed, or changed between the base and head refs, along with the cost impact of those changes. Args: base_ref: The base git ref (branch, tag, or commit) to compare from. head_ref: The head git ref to compare to. Defaults to HEAD. Returns: JSON string with the diff results including cost changes. """ args = ["diff", "--base", base_ref, "--head", head_ref, "--format", "json"] return _run_cli(args) - src/tokentoll/core/pipeline.py:88-172 (handler)Core orchestration function run_diff_command() that handles git ref resolution, calls get_changed_files, scans old/new file contents, invokes compute_diff, builds a DiffReport, and formats output.
def run_diff_command( ref: str | None, base: str | None, head: str | None, output_format: str, calls_per_month: int | None, config_path: str | None = None, ) -> int: from tokentoll.diff.git import get_changed_files, get_file_at_ref if base and head: base_ref, head_ref = base, head elif ref and ".." in ref: base_ref, head_ref = ref.split("..", 1) elif ref: base_ref, head_ref = ref, "HEAD" else: base_ref, head_ref = "HEAD~1", "HEAD" changed_files = get_changed_files(base_ref, head_ref) if not changed_files: print("No Python files changed.") return 0 config = _load_project_config(config_path) effective_cpm = calls_per_month or config.calls_per_month or 1000 engine = PricingEngine() old_calls_map: dict[str, list] = {} new_calls_map: dict[str, list] = {} for fpath, status in changed_files: if status != "D": source = get_file_at_ref(head_ref, fpath) if source: new_calls_map[fpath] = scan_source(source, fpath) if status != "A": source = get_file_at_ref(base_ref, fpath) if source: old_calls_map[fpath] = scan_source(source, fpath) from tokentoll.diff.engine import compute_diff call_diffs = compute_diff(old_calls_map, new_calls_map, engine, effective_cpm, config) total_delta = 0.0 added = removed = modified = 0 for d in call_diffs: if d.monthly_delta is not None: total_delta += d.monthly_delta if d.change_type == ChangeType.ADDED: added += 1 elif d.change_type == ChangeType.REMOVED: removed += 1 elif d.change_type == ChangeType.MODIFIED: modified += 1 report = DiffReport( base_ref=base_ref, head_ref=head_ref, call_diffs=call_diffs, total_monthly_delta=total_delta, total_calls_added=added, total_calls_removed=removed, total_calls_modified=modified, warnings=engine.warnings, assumptions=[f"{effective_cpm} calls/month per call site"], ) if output_format == "json": from tokentoll.output.json_output import format_diff_report_json print(json.dumps(format_diff_report_json(report), indent=2)) elif output_format in ("markdown", "github-comment"): from tokentoll.output.markdown import format_diff_report_markdown print(format_diff_report_markdown(report)) else: from tokentoll.output.table import print_diff_report print_diff_report(report) return 0 - src/tokentoll/diff/engine.py:10-31 (helper)compute_diff() — compares old vs new LLM call maps across files, delegating per-file diffing to _diff_file(). Core comparison logic.
def compute_diff( old_calls_map: dict[str, list[LLMCall]], new_calls_map: dict[str, list[LLMCall]], engine: PricingEngine, calls_per_month: int, config: ProjectConfig | None = None, ) -> list[CallDiff]: """Compare old and new LLM calls across all files.""" all_files = set(old_calls_map.keys()) | set(new_calls_map.keys()) diffs: list[CallDiff] = [] for fpath in sorted(all_files): old_calls = old_calls_map.get(fpath, []) new_calls = new_calls_map.get(fpath, []) resolved = resolve_for_path(config, fpath) if config else None dm = resolved.default_model if resolved else None dms = resolved.default_models if resolved else None cpm = resolved.calls_per_month if resolved and resolved.calls_per_month else calls_per_month skip = resolved.skip_dynamic_models if resolved else False diffs.extend(_diff_file(old_calls, new_calls, engine, cpm, dm, dms, skip)) return diffs - src/tokentoll/diff/engine.py:34-126 (helper)_diff_file() — matches calls between old and new versions, computes MODIFIED/ADDED/REMOVED diffs with cost deltas.
def _diff_file( old_calls: list[LLMCall], new_calls: list[LLMCall], engine: PricingEngine, calls_per_month: int, default_model: str | None = None, default_models: dict[str, str] | None = None, skip_dynamic_models: bool = False, ) -> list[CallDiff]: matched_old: set[int] = set() matched_new: set[int] = set() diffs: list[CallDiff] = [] for ni, nc in enumerate(new_calls): best_oi = _find_best_match(nc, old_calls, matched_old) if best_oi is not None: matched_old.add(best_oi) matched_new.add(ni) oc = old_calls[best_oi] old_est = engine.estimate( oc, calls_per_month, default_model=default_model, default_models=default_models, skip_dynamic_models=skip_dynamic_models, ) new_est = engine.estimate( nc, calls_per_month, default_model=default_model, default_models=default_models, skip_dynamic_models=skip_dynamic_models, ) if _calls_differ(oc, nc): delta = _compute_delta(old_est.monthly_estimate, new_est.monthly_estimate) cost_delta = _compute_delta( old_est.estimated_cost_per_call, new_est.estimated_cost_per_call ) diffs.append( CallDiff( change_type=ChangeType.MODIFIED, old_call=oc, new_call=nc, old_estimate=old_est, new_estimate=new_est, cost_delta_per_call=cost_delta, monthly_delta=delta, ) ) for ni, nc in enumerate(new_calls): if ni not in matched_new: est = engine.estimate( nc, calls_per_month, default_model=default_model, default_models=default_models, skip_dynamic_models=skip_dynamic_models, ) diffs.append( CallDiff( change_type=ChangeType.ADDED, new_call=nc, new_estimate=est, monthly_delta=est.monthly_estimate, cost_delta_per_call=est.estimated_cost_per_call, ) ) for oi, oc in enumerate(old_calls): if oi not in matched_old: est = engine.estimate( oc, calls_per_month, default_model=default_model, default_models=default_models, skip_dynamic_models=skip_dynamic_models, ) monthly = -est.monthly_estimate if est.monthly_estimate else None cost = -est.estimated_cost_per_call if est.estimated_cost_per_call else None diffs.append( CallDiff( change_type=ChangeType.REMOVED, old_call=oc, old_estimate=est, monthly_delta=monthly, cost_delta_per_call=cost, ) ) return diffs - src/tokentoll/cli.py:37-58 (registration)CLI subparser registration for 'diff' command with arguments: ref, --base, --head, --format, --calls-per-month, --config.
# diff command diff_parser = subparsers.add_parser("diff", help="Compare LLM costs between git refs") diff_parser.add_argument("ref", nargs="?", default=None, help="Git ref to diff against") diff_parser.add_argument("--base", default=None, help="Base git ref") diff_parser.add_argument("--head", default=None, help="Head git ref (default: HEAD)") diff_parser.add_argument( "--format", choices=["table", "json", "markdown", "github-comment"], default="table", help="Output format (default: table)", ) diff_parser.add_argument( "--calls-per-month", type=int, default=None, help="Assumed monthly call volume per call site (default: 1000)", ) diff_parser.add_argument( "--config", default=None, help="Path to .tokentoll.yml config file", )