mcp-json-yaml-toml

benchmark_token_usage.py•8.67 KiB

"""Benchmark comparing token usage between raw file operations and MCP tools. This script measures and compares the token efficiency of different approaches for reading and editing JSON configuration files, including: - Raw file reads (full file) - Smart raw reads (grep + context) - MCP tool reads (specific queries) - Raw edits (full file rewrite) - Smart raw edits (sed) - MCP tool edits (specific updates) """ import json import subprocess import sys import time from pathlib import Path from typing import Any import tiktoken sys.path.append(str(Path(__file__).parent.parent / "packages")) from mcp_json_yaml_toml.yq_wrapper import FormatType, execute_yq # Setup TEST_FILE = Path("benchmark_test.json") NUM_ITEMS = 1000 def setup_test_file() -> dict[str, Any]: """Create a test JSON file with sample user data for benchmarking.""" data = { "users": [ { "id": i, "name": f"User {i}", "email": f"user{i}@example.com", "preferences": { "theme": "dark" if i % 2 == 0 else "light", "notifications": True, }, } for i in range(NUM_ITEMS) ], "settings": {"version": "1.0.0", "maintenance": False}, } with Path(TEST_FILE).open("w", encoding="utf-8") as f: json.dump(data, f, indent=2) return data def count_tokens(text: str) -> int: """Count the number of tokens in a text string using cl100k_base encoding.""" enc = tiktoken.get_encoding("cl100k_base") return len(enc.encode(text)) def benchmark_raw_read() -> tuple[int, float]: """Benchmark reading an entire file and counting its tokens.""" start = time.time() content = Path(TEST_FILE).read_text(encoding="utf-8") duration = time.time() - start tokens = count_tokens(content) return tokens, duration def benchmark_smart_raw_read() -> tuple[int, float]: """Simulate 'grep -n "version" file' then 'read_file(start, end)'.""" start = time.time() # 1. Grep to find line number grep_cmd = ["grep", "-n", "version", str(TEST_FILE)] grep_result = subprocess.run(grep_cmd, check=False, capture_output=True, text=True) if not grep_result.stdout: return 0, time.time() - start # Parse line number (simplified, assumes first match) try: line_num = int(grep_result.stdout.split(":")[0]) except (ValueError, IndexError): return 0, time.time() - start # 2. Read context (e.g., 10 lines around match) # In a real agent scenario, this would be a tool call like read_file(start_line, end_line) start_line = max(1, line_num - 5) end_line = line_num + 5 with Path(TEST_FILE).open(encoding="utf-8") as f: lines = f.readlines() # Adjust for 0-indexing context_lines = lines[start_line - 1 : end_line] content = "".join(context_lines) duration = time.time() - start # Tokens: # 1. User asking to grep (approx) + Grep output grep_tokens = count_tokens(f'grep -n "version" {TEST_FILE}') + count_tokens( grep_result.stdout ) # 2. User asking to read file range + File content read_tokens = count_tokens( f'read_file(path="{TEST_FILE}", start_line={start_line}, end_line={end_line})' ) + count_tokens(content) return grep_tokens + read_tokens, duration def benchmark_raw_edit() -> tuple[int, float]: """Benchmark editing a file by reading, modifying, and rewriting entirely.""" tokens_read, _ = benchmark_raw_read() # Simulate modification with Path(TEST_FILE).open(encoding="utf-8") as f: data = json.load(f) data["settings"]["maintenance"] = True start = time.time() new_content = json.dumps(data, indent=2) Path(TEST_FILE).write_text(new_content, encoding="utf-8") duration = time.time() - start tokens_write = count_tokens(new_content) return tokens_read + tokens_write, duration def benchmark_smart_raw_edit_sed() -> tuple[int, float]: """Simulate 'sed -i ...' to replace a value. Risk: High (regex fragility). Cost: Low. """ start = time.time() # Simulate agent constructing a sed command # sed -i 's/"maintenance": false/"maintenance": true/' file sed_cmd = [ "sed", "-i", 's/"maintenance": false/"maintenance": true/', str(TEST_FILE), ] subprocess.run(sed_cmd, check=True) duration = time.time() - start # Tokens: Just the command execution input_str = f'run_command(command="sed -i \'s/"maintenance": false/"maintenance": true/\' {TEST_FILE}")' tokens = count_tokens(input_str) # Output is usually empty or exit code tokens += count_tokens("Exit code: 0") return tokens, duration def benchmark_mcp_read_specific() -> tuple[int, float]: """Benchmark reading a specific value using MCP data_query tool.""" start = time.time() # Simulate data_query tool logic result = execute_yq( ".settings.version", input_file=TEST_FILE, input_format=FormatType.JSON, output_format=FormatType.JSON, ) duration = time.time() - start # Input tokens: Tool call arguments (approx) input_str = f'data_query(file_path="{TEST_FILE}", expression=".settings.version")' input_tokens = count_tokens(input_str) # Output tokens: Result # The tool wraps the result in a dict structure output_data = { "success": True, "result": result.data, "format": FormatType.JSON, "file": str(TEST_FILE), } output_str = json.dumps(output_data) output_tokens = count_tokens(output_str) return input_tokens + output_tokens, duration def benchmark_mcp_edit_specific() -> tuple[int, float]: """Benchmark editing a specific value using MCP data tool.""" start = time.time() # Simulate data tool logic for set operation # Note: yq expression for setting boolean is just assignment execute_yq( ".settings.maintenance = true", input_file=TEST_FILE, input_format=FormatType.JSON, output_format=FormatType.JSON, in_place=True, ) duration = time.time() - start # Input tokens input_str = f'data(file_path="{TEST_FILE}", operation="set", key_path="settings.maintenance", value="true", in_place=True)' input_tokens = count_tokens(input_str) # Output tokens output_data = { "success": True, "modified_in_place": True, "result": "File modified successfully", "file": str(TEST_FILE), } output_str = json.dumps(output_data) output_tokens = count_tokens(output_str) return input_tokens + output_tokens, duration def main() -> None: """Run all benchmarks and display comparison results.""" print("Setting up test file...") setup_test_file() print(f"File size: {Path(TEST_FILE).stat().st_size} bytes") print("-" * 80) print( f"{'Operation':<35} | {'Tokens':<10} | {'Ratio (vs Raw)':<15} | {'Risk/Notes'}" ) print("-" * 80) # Raw Read raw_read_tokens, _ = benchmark_raw_read() print( f"{'Raw Read (Full File)':<35} | {raw_read_tokens:<10} | {'1.0x':<15} | {'Baseline'}" ) # Smart Raw Read smart_read_tokens, _ = benchmark_smart_raw_read() ratio_smart_read = ( raw_read_tokens / smart_read_tokens if smart_read_tokens > 0 else 0 ) print( f"{'Smart Raw Read (Grep + Context)':<35} | {smart_read_tokens:<10} | {f'{ratio_smart_read:.1f}x cheaper':<15} | {'Multi-step'}" ) # MCP Read mcp_read_tokens, _ = benchmark_mcp_read_specific() ratio_read = raw_read_tokens / mcp_read_tokens if mcp_read_tokens > 0 else 0 print( f"{'MCP Read (Specific)':<35} | {mcp_read_tokens:<10} | {f'{ratio_read:.1f}x cheaper':<15} | {'Safe, Structured'}" ) print("-" * 80) # Raw Edit raw_edit_tokens, _ = benchmark_raw_edit() print( f"{'Raw Edit (Full Rewrite)':<35} | {raw_edit_tokens:<10} | {'1.0x':<15} | {'High Token Cost'}" ) # Smart Raw Edit (Sed) smart_edit_tokens, _ = benchmark_smart_raw_edit_sed() ratio_smart_edit = ( raw_edit_tokens / smart_edit_tokens if smart_edit_tokens > 0 else 0 ) print( f"{'Smart Raw Edit (Sed)':<35} | {smart_edit_tokens:<10} | {f'{ratio_smart_edit:.1f}x cheaper':<15} | {'High Risk (Regex)'}" ) # MCP Edit mcp_edit_tokens, _ = benchmark_mcp_edit_specific() ratio_edit = raw_edit_tokens / mcp_edit_tokens if mcp_edit_tokens > 0 else 0 print( f"{'MCP Edit (Specific)':<35} | {mcp_edit_tokens:<10} | {f'{ratio_edit:.1f}x cheaper':<15} | {'Safe, Structured'}" ) print("-" * 80) # Cleanup if Path(TEST_FILE).exists(): Path(TEST_FILE).unlink() if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/bitflight-devops/mcp-json-yaml-toml'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

benchmark_token_usage.py•8.67 KiB