MCP vLLM Benchmarking Tool

server.py•3.18 kB

# server.py from mcp.server.fastmcp import FastMCP from typing import Dict import concurrent.futures import os import requests import pathlib import tqdm from benchmark_tool import run_benchmark # Create an MCP server mcp = FastMCP("vLLM Bencher") @mcp.tool() def benchmark_vllm( model: str, base_url: str, num_prompts: int = 10, ) -> Dict: """ Run vLLM benchmarking tool to measure model performance Args: model: The model to benchmark (e.g., 'meta-llama/Llama-2-7b-hf') backend: Backend server to use (vllm, tgi, openai, etc.) dataset: Dataset to use for benchmarking (sharegpt, random, etc.) dataset_path: Path to the dataset file num_prompts: Number of prompts to benchmark with request_rate: Requests per second concurrent_requests: Number of concurrent requests max_tokens: Maximum number of tokens to generate vllm_dir: Directory where vLLM is installed api_url: URL of the API to benchmark save_result: Whether to save benchmark results result_filename: Filename to save benchmark results api_key: API key for the backend trust_remote_code: Whether to trust remote code extra_args: Additional arguments to pass to benchmark_serving.py Returns: Dictionary containing benchmark results including throughput, latency, and other metrics """ # Define the dataset path dataset_filename = "ShareGPT_V3_unfiltered_cleaned_split.json" current_dir = pathlib.Path(__file__).parent.absolute() dataset_path = current_dir / dataset_filename # Check if dataset exists, if not, download it if not dataset_path.exists(): dataset_url = "https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json" try: response = requests.get(dataset_url, stream=True) response.raise_for_status() # Get file size if available total_size_in_bytes = int(response.headers.get('content-length', 0)) progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True, desc="Downloading dataset") with open(dataset_path, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): progress_bar.update(len(chunk)) f.write(chunk) progress_bar.close() except Exception as e: # If download failed and partial file exists, remove it if dataset_path.exists(): os.remove(dataset_path) raise # Run the benchmark in a separate thread to avoid asyncio event loop issues with concurrent.futures.ThreadPoolExecutor() as executor: future = executor.submit( run_benchmark, model=model, backend="vllm", dataset="sharegpt", dataset_path=str(dataset_path), num_prompts=num_prompts, base_url=base_url, ) return future.result() if __name__ == "__main__": mcp.run()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Eliovp-BV/mcp-vllm-benchmark'

If you have feedback or need assistance with the MCP directory API, please join our Discord server