CompText MCP Server

baseline_measurement.py•13.9 KiB

#!/usr/bin/env python3 """ Baseline Measurement Script Measures and compares performance between: - Baseline (verbose prompts, no CompText) - CompText (optimized DSL prompts) Generates metrics report for: - Token usage - Latency - Cost estimation - Success rate Run with: python examples/mobile_agent/baseline_measurement.py """ import asyncio import json import sys import time from dataclasses import dataclass, field from datetime import datetime from pathlib import Path from typing import Optional # Add parent to path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) @dataclass class TaskMeasurement: """Measurement for a single task.""" task: str mode: str # "baseline" or "comptext" success: bool prompt_tokens: int completion_tokens: int total_tokens: int latency_ms: float steps: int error: Optional[str] = None @dataclass class BenchmarkReport: """Complete benchmark report.""" timestamp: str measurements: list[TaskMeasurement] = field(default_factory=list) @property def baseline_measurements(self) -> list[TaskMeasurement]: return [m for m in self.measurements if m.mode == "baseline"] @property def comptext_measurements(self) -> list[TaskMeasurement]: return [m for m in self.measurements if m.mode == "comptext"] def get_summary(self) -> dict: """Calculate summary statistics.""" baseline = self.baseline_measurements comptext = self.comptext_measurements def avg(lst, key): values = [getattr(m, key) for m in lst] return sum(values) / len(values) if values else 0 def success_rate(lst): if not lst: return 0 return sum(1 for m in lst if m.success) / len(lst) * 100 baseline_tokens = avg(baseline, "total_tokens") comptext_tokens = avg(comptext, "total_tokens") return { "baseline": { "count": len(baseline), "avg_tokens": baseline_tokens, "avg_latency_ms": avg(baseline, "latency_ms"), "success_rate": success_rate(baseline), }, "comptext": { "count": len(comptext), "avg_tokens": comptext_tokens, "avg_latency_ms": avg(comptext, "latency_ms"), "success_rate": success_rate(comptext), }, "improvement": { "token_reduction_percent": ( (baseline_tokens - comptext_tokens) / baseline_tokens * 100 if baseline_tokens > 0 else 0 ), "latency_reduction_percent": ( (avg(baseline, "latency_ms") - avg(comptext, "latency_ms")) / avg(baseline, "latency_ms") * 100 if avg(baseline, "latency_ms") > 0 else 0 ), }, } # Sample prompts for comparison VERBOSE_SYSTEM_PROMPT = """You are a mobile automation agent controlling an Android device. Your primary capabilities include: 1. Analyzing screen states - You can examine the current UI hierarchy to understand what elements are visible, their positions, types, and whether they are interactive. 2. Planning action sequences - Based on the user's task, you create a step-by-step plan to achieve the goal. 3. Executing actions - You can perform various actions on the device: - tap: Touch a specific element or coordinate on the screen - swipe: Perform swipe gestures in any direction (up, down, left, right) - type: Enter text into focused input fields - back: Press the Android back button - home: Press the home button to return to the launcher - launch_app: Open an application by its package name - wait: Pause execution for a specified duration When responding, please use the following JSON format: { "thought": "Your detailed reasoning about the current state and what action to take next", "action": "The action type (tap, swipe, type, back, home, launch_app, wait, or done)", "params": { "element_index": "For tap - the index of the UI element to tap", "x": "For tap with coordinates - the x position", "y": "For tap with coordinates - the y position", "direction": "For swipe - up, down, left, or right", "text": "For type - the text to enter", "package": "For launch_app - the package name", "seconds": "For wait - the duration to wait" }, "confidence": "A value between 0.0 and 1.0 indicating your confidence" } Important guidelines: - Always prefer using element_index over coordinates when possible, as it's more reliable - Verify the success of each action before proceeding to the next step - If you encounter an unexpected state, try alternative approaches - Report "done" as the action when the task is complete - Keep your reasoning concise but informative """ COMPTEXT_SYSTEM_PROMPT = """MA:Android.Acts:tap/swipe/type/back/home/launch/wait/done. JSON:{t:"thought",a:"action",p:{params},c:0.0-1.0} tap:{ei:N}|{x,y}.swipe:{d:"u/d/l/r"}.type:{txt:""}.launch:{pkg:""}.done:{} Verify.Concise.""" VERBOSE_SCREEN_STATE = """Current Application: com.android.launcher3 Current Activity: com.android.launcher3.Launcher The current screen shows the Android home screen (launcher). The following UI elements are visible: UI Elements (15 visible): -------------------------------------------------- [0] text="Chrome" description="Chrome browser application" resource_id="com.android.launcher3:id/icon" class="android.widget.TextView" clickable=true enabled=true bounds=[120,800,280,1000] center=(200, 900) [1] text="Settings" description="Settings application" resource_id="com.android.launcher3:id/icon" class="android.widget.TextView" clickable=true enabled=true bounds=[400,800,560,1000] center=(480, 900) [2] text="Messages" description="Messages application" resource_id="com.android.launcher3:id/icon" class="android.widget.TextView" clickable=true enabled=true bounds=[680,800,840,1000] center=(760, 900) [3] text="Phone" description="Phone application" resource_id="com.android.launcher3:id/icon" class="android.widget.TextView" clickable=true enabled=true bounds=[120,1100,280,1300] center=(200, 1200) [4] text="Camera" description="Camera application" resource_id="com.android.launcher3:id/icon" class="android.widget.TextView" clickable=true enabled=true bounds=[400,1100,560,1300] center=(480, 1200) """ COMPTEXT_SCREEN_STATE = """App:launcher Act:Launcher Els:15 0:K:Chrome@200,900 1:K:Settings@480,900 2:K:Messages@760,900 3:K:Phone@200,1200 4:K:Camera@480,1200""" def estimate_tokens(text: str) -> int: """Estimate token count (rough approximation: ~4 chars per token).""" return len(text) // 4 def run_token_comparison(): """Run token comparison between verbose and CompText prompts.""" print("=" * 70) print(" Token Comparison: Verbose vs CompText") print("=" * 70) print() # System prompt comparison verbose_sys_tokens = estimate_tokens(VERBOSE_SYSTEM_PROMPT) comptext_sys_tokens = estimate_tokens(COMPTEXT_SYSTEM_PROMPT) sys_reduction = (verbose_sys_tokens - comptext_sys_tokens) / verbose_sys_tokens * 100 print("System Prompt:") print(f" Verbose: ~{verbose_sys_tokens} tokens ({len(VERBOSE_SYSTEM_PROMPT)} chars)") print(f" CompText: ~{comptext_sys_tokens} tokens ({len(COMPTEXT_SYSTEM_PROMPT)} chars)") print(f" Reduction: {sys_reduction:.1f}%") print() # Screen state comparison verbose_screen_tokens = estimate_tokens(VERBOSE_SCREEN_STATE) comptext_screen_tokens = estimate_tokens(COMPTEXT_SCREEN_STATE) screen_reduction = (verbose_screen_tokens - comptext_screen_tokens) / verbose_screen_tokens * 100 print("Screen State (5 elements):") print(f" Verbose: ~{verbose_screen_tokens} tokens ({len(VERBOSE_SCREEN_STATE)} chars)") print(f" CompText: ~{comptext_screen_tokens} tokens ({len(COMPTEXT_SCREEN_STATE)} chars)") print(f" Reduction: {screen_reduction:.1f}%") print() # Total per interaction verbose_total = verbose_sys_tokens + verbose_screen_tokens comptext_total = comptext_sys_tokens + comptext_screen_tokens total_reduction = (verbose_total - comptext_total) / verbose_total * 100 print("Total per Interaction:") print(f" Verbose: ~{verbose_total} tokens") print(f" CompText: ~{comptext_total} tokens") print(f" Reduction: {total_reduction:.1f}%") print() # Cost estimation (using approximate pricing) print("-" * 70) print("Cost Estimation (1,000 tasks, 5 steps each):") print("-" * 70) interactions = 1000 * 5 verbose_monthly = verbose_total * interactions comptext_monthly = comptext_total * interactions # Pricing: ~$0.003 per 1K input tokens (Claude 3.5 Sonnet) price_per_1k = 0.003 verbose_cost = (verbose_monthly / 1000) * price_per_1k comptext_cost = (comptext_monthly / 1000) * price_per_1k savings = verbose_cost - comptext_cost print(f" Verbose: {verbose_monthly:,} tokens = ${verbose_cost:.2f}") print(f" CompText: {comptext_monthly:,} tokens = ${comptext_cost:.2f}") print(f" Monthly Savings: ${savings:.2f} ({100*savings/verbose_cost:.0f}%)") print() return { "system_prompt": { "verbose_tokens": verbose_sys_tokens, "comptext_tokens": comptext_sys_tokens, "reduction_percent": sys_reduction, }, "screen_state": { "verbose_tokens": verbose_screen_tokens, "comptext_tokens": comptext_screen_tokens, "reduction_percent": screen_reduction, }, "total": { "verbose_tokens": verbose_total, "comptext_tokens": comptext_total, "reduction_percent": total_reduction, }, "cost_estimate": { "verbose_monthly": verbose_cost, "comptext_monthly": comptext_cost, "monthly_savings": savings, }, } def generate_report(comparison: dict) -> str: """Generate markdown report.""" report = f"""# CompText Baseline Measurement Report Generated: {datetime.now().isoformat()} ## Executive Summary CompText DSL achieves **{comparison['total']['reduction_percent']:.1f}% token reduction** compared to verbose prompts, resulting in: - **{comparison['cost_estimate']['monthly_savings']:.0f}% cost savings** per month - **Faster response times** due to reduced input processing - **Same semantic meaning** preserved through DSL compression ## Token Comparison | Component | Verbose | CompText | Reduction | |-----------|---------|----------|-----------| | System Prompt | {comparison['system_prompt']['verbose_tokens']} | {comparison['system_prompt']['comptext_tokens']} | {comparison['system_prompt']['reduction_percent']:.1f}% | | Screen State | {comparison['screen_state']['verbose_tokens']} | {comparison['screen_state']['comptext_tokens']} | {comparison['screen_state']['reduction_percent']:.1f}% | | **Total** | **{comparison['total']['verbose_tokens']}** | **{comparison['total']['comptext_tokens']}** | **{comparison['total']['reduction_percent']:.1f}%** | ## Cost Projection (1,000 tasks/month) | Metric | Verbose | CompText | Savings | |--------|---------|----------|---------| | Tokens | {comparison['total']['verbose_tokens'] * 5000:,} | {comparison['total']['comptext_tokens'] * 5000:,} | {(comparison['total']['verbose_tokens'] - comparison['total']['comptext_tokens']) * 5000:,} | | Cost | ${comparison['cost_estimate']['verbose_monthly']:.2f} | ${comparison['cost_estimate']['comptext_monthly']:.2f} | ${comparison['cost_estimate']['monthly_savings']:.2f} | ## Prompt Examples ### Verbose System Prompt (~{comparison['system_prompt']['verbose_tokens']} tokens) ``` {VERBOSE_SYSTEM_PROMPT[:500]}... ``` ### CompText System Prompt (~{comparison['system_prompt']['comptext_tokens']} tokens) ``` {COMPTEXT_SYSTEM_PROMPT} ``` ### Verbose Screen State (~{comparison['screen_state']['verbose_tokens']} tokens) ``` {VERBOSE_SCREEN_STATE[:400]}... ``` ### CompText Screen State (~{comparison['screen_state']['comptext_tokens']} tokens) ``` {COMPTEXT_SCREEN_STATE} ``` ## Methodology - Token estimation: ~4 characters per token (conservative estimate) - Cost calculation: $0.003 per 1K input tokens (Claude 3.5 Sonnet pricing) - Task assumption: 5 interactions per task average - Monthly volume: 1,000 tasks ## Conclusion CompText DSL provides significant efficiency gains: 1. **~{comparison['total']['reduction_percent']:.0f}% fewer tokens** per interaction 2. **~{comparison['cost_estimate']['monthly_savings']:.0f}% lower costs** at scale 3. **Faster agent loops** due to reduced processing 4. **Maintained accuracy** through semantic preservation """ return report def main(): """Main entry point.""" import argparse parser = argparse.ArgumentParser(description="Baseline Measurement for CompText") parser.add_argument("--output", "-o", help="Output file for report (markdown)") parser.add_argument("--json", action="store_true", help="Output as JSON") args = parser.parse_args() # Run comparison comparison = run_token_comparison() # Generate report if args.json: print(json.dumps(comparison, indent=2)) elif args.output: report = generate_report(comparison) output_path = Path(args.output) output_path.write_text(report) print(f"Report saved to: {output_path}") else: # Print visual comparison print("=" * 70) print(" Visual Token Comparison") print("=" * 70) print() verbose = comparison['total']['verbose_tokens'] comptext = comparison['total']['comptext_tokens'] bar_width = 50 verbose_bar = "█" * bar_width comptext_bar = "█" * int(bar_width * comptext / verbose) print(f"Verbose: {verbose_bar} {verbose} tokens") print(f"CompText: {comptext_bar.ljust(bar_width)} {comptext} tokens") print() print(f"Reduction: {comparison['total']['reduction_percent']:.1f}%") if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ProfRandom92/comptext-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

baseline_measurement.py•13.9 KiB