Skip to main content
Glama

PubNub MCP Server

by pubnub
benchmark_mcp.sh17.3 kB
#!/bin/bash # LLM Benchmarking Script - Task Completion Performance (Approach 2) # Tests OpenAI API performance with and without PubNub context set -e # Configuration MODEL="gpt-4.1-2025-04-14" RESULTS_FILE="benchmark_results.json" RESOURCES_DIR="resources" TEMP_DIR="/tmp/llm_benchmark_$$" # Check for OpenAI API key if [ -z "$OPENAI_API_KEY" ]; then echo "Error: OPENAI_API_KEY environment variable not set" exit 1 fi # Create temp directory mkdir -p "$TEMP_DIR" echo "Starting LLM Benchmark - Task Completion Performance" echo "Model: $MODEL" echo "Scoring: OpenAI API ($MODEL) comparative evaluation" echo "Results will be saved to: $RESULTS_FILE" echo "==================================================" # Test tasks with varying complexity declare -a TASKS=( "Write JavaScript code to publish a message to a PubNub channel named 'chat-room' with the message 'Hello World'" "Create Python code that subscribes to multiple PubNub channels and handles incoming messages with error handling" "Write a Node.js function that implements presence detection on a PubNub channel and logs when users join/leave" "Create JavaScript code that uses PubNub's message persistence feature to retrieve the last 10 messages from a channel" "Write Python code that implements PubNub access control to grant read/write permissions to specific users" "Create a JavaScript function that uses PubNub Functions to filter messages based on content before delivery" "Write code that implements PubNub's file sharing feature to upload and share a file with other users" "Create a real-time location tracking system using PubNub that updates user positions on a map" "Create a PubNub-powered web-based social mapping app with OpenStreetMap, user markers with image uploads via PubNub Files API, user data stored in PubNub AppContext, real-time synchronization, and a global chat window" ) # Function to call OpenAI API call_openai_api() { local prompt="$1" local context="$2" local full_prompt if [ -n "$context" ]; then # Truncate context to avoid token limits local truncated_context=$(echo "$context" | head -c 200000) full_prompt="Context information:\n${truncated_context}\n\nTask: ${prompt}\n\nPlease provide working code that accomplishes this task." else full_prompt="Task: ${prompt}\n\nPlease provide working code that accomplishes this task using PubNub." fi # Create a temporary JSON file to avoid shell escaping issues local json_file="$TEMP_DIR/request_$$.json" cat > "$json_file" << EOF { "model": "$MODEL", "messages": [{"role": "user", "content": $(echo "$full_prompt" | jq -Rs .)}], "max_tokens": 1000, "temperature": 0.1 } EOF curl -s -X POST "https://api.openai.com/v1/chat/completions" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $OPENAI_API_KEY" \ -d @"$json_file" rm -f "$json_file" } # Function to gather relevant context from resources get_context_for_task() { local task="$1" local context="" local found_files=0 # Helper function to safely read files read_resource() { local file="$RESOURCES_DIR/$1" if [ -f "$file" ]; then cat "$file" found_files=$((found_files + 1)) echo -e "\n\n---\n\n" fi } # Determine which resource files are most relevant based on task keywords if [[ $task == *"publish"* || $task == *"subscribe"* ]]; then context+=$(read_resource "how_to_send_receive_json.md") context+=$(read_resource "pubnub_concepts.md") fi if [[ $task == *"presence"* ]]; then context+=$(read_resource "how_to_track_moving_objects.md") fi if [[ $task == *"persistence"* || $task == *"history"* || $task == *"messages"* ]]; then context+=$(read_resource "how_to_use_message_persistence.md") fi if [[ $task == *"access control"* || $task == *"permissions"* ]]; then context+=$(read_resource "how_to_implement_access_control.md") context+=$(read_resource "pubnub_security.md") fi if [[ $task == *"Functions"* || $task == *"filter"* ]]; then context+=$(read_resource "pubnub_functions.md") context+=$(read_resource "how_to_develop_pubnub_functions.md") fi if [[ $task == *"file"* || $task == *"sharing"* ]]; then context+=$(read_resource "how_to_use_file_sharing.md") fi if [[ $task == *"location"* || $task == *"tracking"* || $task == *"map"* ]]; then context+=$(read_resource "how_to_track_moving_objects.md") fi if [[ $task == *"social mapping"* || $task == *"AppContext"* || $task == *"Files API"* || $task == *"chat window"* ]]; then context+=$(read_resource "pubnub_objects.md") context+=$(read_resource "how_to_use_file_sharing.md") context+=$(read_resource "how_to_send_receive_json.md") context+=$(read_resource "how_to_add_chat_room_notifications.md") context+=$(read_resource "pubnub_concepts.md") context+=$(read_resource "how_to_track_moving_objects.md") fi # Always include basic concepts if no specific files were found if [ $found_files -eq 0 ]; then context+=$(read_resource "pubnub_concepts.md") context+=$(read_resource "how_to_send_receive_json.md") fi echo "$context" } # Function to score both code versions comparatively using OpenAI API compare_and_score_code() { local code_with_context="$1" local code_without_context="$2" local task="$3" # Handle empty or error responses if [[ -z "$code_with_context" || "$code_with_context" == "ERROR:"* ]]; then echo "0:0" return fi if [[ -z "$code_without_context" || "$code_without_context" == "ERROR:"* ]]; then echo "0:0" return fi local scoring_prompt="You are a code quality evaluator comparing two implementations of the same task. Please score each implementation on a scale of 1-100 based on these criteria: 1. **Correctness** (30 points): Does the code correctly accomplish the given task? 2. **PubNub Implementation** (40 points): Is PubNub properly used with correct methods and patterns? 3. **Code Quality** (20 points): Is the code well-structured, readable, and follows best practices? 4. **Error Handling** (10 points): Does the code include appropriate error handling? **Note**: Implementation A received context information, while Implementation B did not. Deduct significant points from Implementation B for any missing features, incorrect API usage, or omissions that context would have resolved, and reward Implementation A for effectively leveraging the provided context. **Task**: ${task} **Implementation A (With Context)**: \`\`\` ${code_with_context} \`\`\` **Implementation B (Without Context)**: \`\`\` ${code_without_context} \`\`\` Compare these implementations and provide scores. Pay special attention to: - Effective use of context (penalize omissions in B, reward improvements in A) - Correct PubNub API usage and best practices - Completeness and functionality - Robust error handling Respond with ONLY two numbers separated by a colon (A_score:B_score), for example: 85:65 No explanation, just the scores." local json_file="$TEMP_DIR/comparison_$$.json" cat > "$json_file" << EOF { "model": "$MODEL", "messages": [{"role": "user", "content": $(echo "$scoring_prompt" | jq -Rs .)}], "max_tokens": 20, "temperature": 0.1 } EOF local response=$(curl -s -X POST "https://api.openai.com/v1/chat/completions" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $OPENAI_API_KEY" \ -d @"$json_file") rm -f "$json_file" # Extract scores and validate if echo "$response" | jq -e '.error' > /dev/null 2>&1; then echo "0:0" # Return 0:0 if API call failed else local scores=$(echo "$response" | jq -r '.choices[0].message.content // "0:0"' | tr -d ' \n\r') # Validate format is number:number if [[ "$scores" =~ ^[0-9]+:[0-9]+$ ]]; then local score_a=$(echo "$scores" | cut -d':' -f1) local score_b=$(echo "$scores" | cut -d':' -f2) # Validate both scores are between 1-100 if [[ "$score_a" -ge 1 ]] && [[ "$score_a" -le 100 ]] && [[ "$score_b" -ge 1 ]] && [[ "$score_b" -le 100 ]]; then echo "$scores" else echo "1:1" # Default if invalid scores fi else echo "1:1" # Default if invalid format fi fi } # Function to measure response time and token usage measure_performance() { local start_time=$(date +%s.%N) local response="$1" local end_time=$(date +%s.%N) local duration=$(echo "$end_time - $start_time" | bc -l) # Extract token usage from response local prompt_tokens=$(echo "$response" | jq -r '.usage.prompt_tokens // 0') local completion_tokens=$(echo "$response" | jq -r '.usage.completion_tokens // 0') local total_tokens=$(echo "$response" | jq -r '.usage.total_tokens // 0') echo "{\"duration\": $duration, \"prompt_tokens\": $prompt_tokens, \"completion_tokens\": $completion_tokens, \"total_tokens\": $total_tokens}" } # Initialize results JSON echo "{" > "$RESULTS_FILE" echo " \"model\": \"$MODEL\"," >> "$RESULTS_FILE" echo " \"timestamp\": \"$(date -Iseconds)\"," >> "$RESULTS_FILE" echo " \"tasks\": [" >> "$RESULTS_FILE" task_count=0 total_with_context_score=0 total_without_context_score=0 total_with_context_time=0 total_without_context_time=0 total_with_context_tokens=0 total_without_context_tokens=0 # Run tests for each task for task in "${TASKS[@]}"; do echo "Testing task $((task_count + 1))/${#TASKS[@]}: ${task:0:50}..." if [ $task_count -gt 0 ]; then echo "," >> "$RESULTS_FILE" fi echo " {" >> "$RESULTS_FILE" echo " \"task\": \"$task\"," >> "$RESULTS_FILE" # Test WITH context echo " Testing with context..." context=$(get_context_for_task "$task") context_length=${#context} echo " Context length: $context_length characters" start_time=$(date +%s.%N) response_with_context=$(call_openai_api "$task" "$context") end_time=$(date +%s.%N) with_context_time=$(echo "$end_time - $start_time" | bc -l) # Check for API errors if echo "$response_with_context" | jq -e '.error' > /dev/null 2>&1; then echo " API Error: $(echo "$response_with_context" | jq -r '.error.message')" code_with_context="ERROR: API call failed" score_with_context=0 tokens_with_context=0 else code_with_context=$(echo "$response_with_context" | jq -r '.choices[0].message.content // ""') tokens_with_context=$(echo "$response_with_context" | jq -r '.usage.total_tokens // 0') fi # Test WITHOUT context echo " Testing without context..." start_time=$(date +%s.%N) response_without_context=$(call_openai_api "$task" "") end_time=$(date +%s.%N) without_context_time=$(echo "$end_time - $start_time" | bc -l) # Check for API errors if echo "$response_without_context" | jq -e '.error' > /dev/null 2>&1; then echo " API Error: $(echo "$response_without_context" | jq -r '.error.message')" code_without_context="ERROR: API call failed" score_with_context=0 score_without_context=0 tokens_without_context=0 else code_without_context=$(echo "$response_without_context" | jq -r '.choices[0].message.content // ""') tokens_without_context=$(echo "$response_without_context" | jq -r '.usage.total_tokens // 0') fi # Comparative scoring using OpenAI API echo " Performing comparative scoring..." if [[ "$code_with_context" != "ERROR:"* && "$code_without_context" != "ERROR:"* ]]; then scores=$(compare_and_score_code "$code_with_context" "$code_without_context" "$task") score_with_context=$(echo "$scores" | cut -d':' -f1) score_without_context=$(echo "$scores" | cut -d':' -f2) else score_with_context=0 score_without_context=0 fi # Write results for this task echo " \"with_context\": {" >> "$RESULTS_FILE" echo " \"score\": $score_with_context," >> "$RESULTS_FILE" echo " \"response_time\": $with_context_time," >> "$RESULTS_FILE" echo " \"tokens_used\": $tokens_with_context," >> "$RESULTS_FILE" echo " \"context_length\": ${#context}" >> "$RESULTS_FILE" echo " }," >> "$RESULTS_FILE" echo " \"without_context\": {" >> "$RESULTS_FILE" echo " \"score\": $score_without_context," >> "$RESULTS_FILE" echo " \"response_time\": $without_context_time," >> "$RESULTS_FILE" echo " \"tokens_used\": $tokens_without_context" >> "$RESULTS_FILE" echo " }" >> "$RESULTS_FILE" echo " }" >> "$RESULTS_FILE" # Update totals total_with_context_score=$((total_with_context_score + score_with_context)) total_without_context_score=$((total_without_context_score + score_without_context)) total_with_context_time=$(echo "$total_with_context_time + $with_context_time" | bc -l) total_without_context_time=$(echo "$total_without_context_time + $without_context_time" | bc -l) total_with_context_tokens=$((total_with_context_tokens + tokens_with_context)) total_without_context_tokens=$((total_without_context_tokens + tokens_without_context)) echo " With context: Score=$score_with_context, Time=${with_context_time}s, Tokens=$tokens_with_context" echo " Without context: Score=$score_without_context, Time=${without_context_time}s, Tokens=$tokens_without_context" echo "" task_count=$((task_count + 1)) # Rate limiting - wait 2 seconds between requests (3 API calls per task: with context, without context, comparative scoring) sleep 2 done # Calculate averages avg_with_context_score=$(echo "scale=2; $total_with_context_score / $task_count" | bc -l) avg_without_context_score=$(echo "scale=2; $total_without_context_score / $task_count" | bc -l) avg_with_context_time=$(echo "scale=3; $total_with_context_time / $task_count" | bc -l) avg_without_context_time=$(echo "scale=3; $total_without_context_time / $task_count" | bc -l) avg_with_context_tokens=$(echo "scale=2; $total_with_context_tokens / $task_count" | bc -l) avg_without_context_tokens=$(echo "scale=2; $total_without_context_tokens / $task_count" | bc -l) # Calculate improvement percentages score_improvement=$(echo "scale=2; (($avg_with_context_score - $avg_without_context_score) / $avg_without_context_score) * 100" | bc -l) time_difference=$(echo "scale=2; (($avg_with_context_time - $avg_without_context_time) / $avg_without_context_time) * 100" | bc -l) token_difference=$(echo "scale=2; (($avg_with_context_tokens - $avg_without_context_tokens) / $avg_without_context_tokens) * 100" | bc -l) # Write summary echo " ]," >> "$RESULTS_FILE" echo " \"summary\": {" >> "$RESULTS_FILE" echo " \"total_tasks\": $task_count," >> "$RESULTS_FILE" echo " \"averages\": {" >> "$RESULTS_FILE" echo " \"with_context\": {" >> "$RESULTS_FILE" echo " \"score\": $avg_with_context_score," >> "$RESULTS_FILE" echo " \"response_time\": $avg_with_context_time," >> "$RESULTS_FILE" echo " \"tokens_used\": $avg_with_context_tokens" >> "$RESULTS_FILE" echo " }," >> "$RESULTS_FILE" echo " \"without_context\": {" >> "$RESULTS_FILE" echo " \"score\": $avg_without_context_score," >> "$RESULTS_FILE" echo " \"response_time\": $avg_without_context_time," >> "$RESULTS_FILE" echo " \"tokens_used\": $avg_without_context_tokens" >> "$RESULTS_FILE" echo " }" >> "$RESULTS_FILE" echo " }," >> "$RESULTS_FILE" echo " \"improvements\": {" >> "$RESULTS_FILE" echo " \"score_improvement_percent\": $score_improvement," >> "$RESULTS_FILE" echo " \"time_difference_percent\": $time_difference," >> "$RESULTS_FILE" echo " \"token_usage_difference_percent\": $token_difference" >> "$RESULTS_FILE" echo " }" >> "$RESULTS_FILE" echo " }" >> "$RESULTS_FILE" echo "}" >> "$RESULTS_FILE" # Cleanup rm -rf "$TEMP_DIR" # Display final results echo "==================================================" echo "BENCHMARK RESULTS SUMMARY" echo "==================================================" echo "Total tasks tested: $task_count" echo "Scoring method: OpenAI $MODEL comparative evaluation (1-100)" echo "" echo "AVERAGE SCORES (1-100):" echo " With context: $avg_with_context_score" echo " Without context: $avg_without_context_score" echo " Improvement: $score_improvement%" echo "" echo "AVERAGE RESPONSE TIME (seconds):" echo " With context: $avg_with_context_time" echo " Without context: $avg_without_context_time" echo " Difference: $time_difference%" echo "" echo "AVERAGE TOKEN USAGE:" echo " With context: $avg_with_context_tokens" echo " Without context: $avg_without_context_tokens" echo " Difference: $token_difference%" echo "" echo "Detailed results saved to: $RESULTS_FILE"

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/pubnub/pubnub-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server