ContextForge MCP Gateway

Official

Overview Schema Related Servers Score Discussions

nginx-performance.conf•22.8 KiB

# High-Performance Nginx Caching Proxy for MCP Gateway # Optimized for static assets, API responses, and CDN-like behavior # Tuned for 10,000 concurrent users on 64 vCPU system user nginx; worker_processes auto; worker_rlimit_nofile 131072; error_log /var/log/nginx/error.log warn; pid /var/run/nginx.pid; # Performance: Reduce disk I/O for temp files # Store small request bodies in memory instead of temp files worker_cpu_affinity auto; events { worker_connections 32768; use epoll; multi_accept on; # Accept as many connections as possible after getting notification accept_mutex off; } http { include /etc/nginx/mime.types; default_type application/octet-stream; # Logging format with cache status log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for" ' 'cache_status=$upstream_cache_status ' 'rt=$request_time uct="$upstream_connect_time" ' 'uht="$upstream_header_time" urt="$upstream_response_time"'; # ============================================================ # Access Logging - DISABLE FOR LOAD TESTING (major I/O bottleneck) # ============================================================ # Uncomment for debugging, comment out for max performance # access_log /var/log/nginx/access.log main; access_log off; # ============================================================ # Core Performance Tuning # ============================================================ sendfile on; tcp_nopush on; # Combine headers with data (reduces packets) tcp_nodelay on; # Disable Nagle's algorithm (reduces latency) keepalive_timeout 65; keepalive_requests 10000; # Requests per keepalive connection (default 100) types_hash_max_size 2048; client_max_body_size 100M; client_body_buffer_size 128k; reset_timedout_connection on; # Free memory from timed-out connections # Hide nginx version server_tokens off; # ============================================================ # Open File Cache - Reduces file descriptor overhead # ============================================================ # Cache file descriptors, sizes, and modification times open_file_cache max=10000 inactive=60s; open_file_cache_valid 30s; # Revalidate cached info every 30s open_file_cache_min_uses 2; # Cache after 2 accesses open_file_cache_errors on; # Cache lookup errors too # ============================================================ # Proxy Buffer Tuning - Optimize memory for 4000 users # ============================================================ # Larger buffers reduce disk I/O for upstream responses proxy_buffer_size 16k; # First response buffer (headers) proxy_buffers 8 32k; # Number and size of buffers per connection proxy_busy_buffers_size 64k; # Max size while sending to client proxy_temp_file_write_size 64k; # Chunk size for temp files # ============================================================ # Gzip Compression # ============================================================ gzip on; gzip_vary on; gzip_proxied any; gzip_comp_level 6; # 1-9, 6 is good balance for production gzip_types text/plain text/css text/xml text/javascript application/json application/javascript application/xml+rss application/rss+xml font/truetype font/opentype application/vnd.ms-fontobject image/svg+xml; gzip_min_length 256; # ============================================================ # Rate Limiting - Tuned for 10,000+ concurrent users (64 vCPU system) # ============================================================ # # PROTECTION STRATEGY: # - Nginx caps raised to 50000 (headroom for 10000 users) # - Backend (PgBouncer/PostgreSQL) tuned for 10000+ connections # - Excess requests get 429 Too Many Requests # # HOW NGINX RATE LIMITING WORKS: # - rate=50000r/s means 50000 tokens added per second (leaky bucket algorithm) # - burst=50000 is the bucket size (max requests that can queue/proceed) # - nodelay means burst requests are processed immediately, not queued # # EFFECTIVE LIMIT: # - Sustained: 50000 requests/second continuously # - Burst: Up to 50000 additional requests can proceed instantly # - Peak: 100000 requests in first second, then 50000/s sustained # # Zone: 32MB shared memory (~500,000 unique IPs can be tracked) # $binary_remote_addr uses 4 bytes (IPv4) vs ~15 bytes for string limit_req_zone $binary_remote_addr zone=api_limit:32m rate=50000r/s; # Connection limiting - max concurrent connections per IP # Set to 50000 to avoid load-test throttling limit_conn_zone $binary_remote_addr zone=conn_limit:32m; # Return 429 (Too Many Requests) instead of default 503 # 429 is semantically correct for rate limiting per RFC 6585 limit_req_status 429; limit_conn_status 429; # Cache zones # Zone 1: Static assets (CSS, JS, images) - 1GB, 30 day max age proxy_cache_path /var/cache/nginx/static levels=1:2 keys_zone=static_cache:100m max_size=1g inactive=30d use_temp_path=off; # Zone 2: API responses - 512MB, 1 hour max age proxy_cache_path /var/cache/nginx/api levels=1:2 keys_zone=api_cache:50m max_size=512m inactive=1h use_temp_path=off; # Zone 3: OpenAPI/Schema responses - 256MB, 24 hour max age proxy_cache_path /var/cache/nginx/schema levels=1:2 keys_zone=schema_cache:20m max_size=256m inactive=24h use_temp_path=off; # Docker DNS resolver for dynamic upstream resolution # valid=5s forces re-resolution every 5 seconds for load balancing across replicas resolver 127.0.0.11 valid=5s ipv6=off; # ============================================================ # Upstream Definition - Tuned for 10,000 users on 64 vCPU system # ============================================================ # IMPORTANT: max_fails must be high enough to survive load spikes # Without this, nginx marks backends dead after just 3 timeouts upstream gateway_backend { # Load balancing: least_conn distributes to backend with fewest active connections # Fixes imbalance caused by keepalive connections sticking to one backend least_conn; server gateway:4444 max_fails=0; # Disable failure tracking (always retry) # Keepalive pool sizing for 10,000 capacity: # - Each nginx worker maintains its own pool # - With 8 workers: 1024 × 8 = 8192 reusable connections # - Remaining connections use short-lived TCP keepalive 1024; # Connections per worker keepalive_requests 100000; # Requests per connection keepalive_timeout 60s; # Connection idle timeout } # Cache bypass conditions map $request_method $skip_cache { default 0; POST 1; PUT 1; PATCH 1; DELETE 1; } # Cache key construction map $request_uri $cache_key { default "$scheme$request_method$host$request_uri"; } server { # Listen with performance optimizations: # - backlog=65535: Kernel queue for pending connections (max for 10k users) # - reuseport: Distribute connections across workers (reduces lock contention) listen 80 backlog=65535 reuseport; listen [::]:80 backlog=65535 reuseport; server_name localhost; # Security headers add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Content-Type-Options "nosniff" always; add_header X-XSS-Protection "1; mode=block" always; add_header Referrer-Policy "strict-origin-when-cross-origin" always; # Cache status header (for debugging) add_header X-Cache-Status $upstream_cache_status always; # ============================================================ # Nginx Status - For Prometheus nginx-exporter metrics # ============================================================ location /nginx_status { stub_status on; allow 127.0.0.1; allow 172.16.0.0/12; # Docker networks allow 10.0.0.0/8; # Docker networks deny all; } # ============================================================ # Health Check - No rate limiting (for monitoring during load tests) # ============================================================ location = /health { proxy_pass http://gateway_backend; proxy_set_header Host $http_host; proxy_set_header Connection ""; proxy_http_version 1.1; # No rate limiting - health checks must always succeed proxy_connect_timeout 5s; proxy_read_timeout 5s; } # ============================================================ # Static Assets - Aggressive Caching (30 days) # ============================================================ location ~* \.(css|js|jpg|jpeg|png|gif|ico|svg|woff|woff2|ttf|eot|otf|webp|avif)$ { proxy_pass http://gateway_backend; proxy_cache static_cache; proxy_cache_valid 200 30d; proxy_cache_valid 404 10m; proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; proxy_cache_background_update on; proxy_cache_lock on; proxy_cache_revalidate on; # Bypass cache for certain conditions proxy_cache_bypass $skip_cache; proxy_no_cache $skip_cache; # Add cache control headers expires 30d; add_header Cache-Control "public, immutable"; add_header X-Cache-Status $upstream_cache_status always; # Proxy headers proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header Connection ""; proxy_http_version 1.1; } # ============================================================ # OpenAPI/Schema - Moderate Caching (24 hours) # ============================================================ location ~ ^/(openapi\.json|docs|redoc)$ { proxy_pass http://gateway_backend; proxy_cache schema_cache; proxy_cache_valid 200 24h; proxy_cache_valid 404 1h; proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; proxy_cache_background_update on; proxy_cache_lock on; proxy_cache_revalidate on; # Cache key includes query string for filtering proxy_cache_key "$scheme$request_method$host$request_uri"; # Bypass cache for certain conditions proxy_cache_bypass $skip_cache; proxy_no_cache $skip_cache; # Add cache control headers expires 24h; add_header Cache-Control "public, must-revalidate"; add_header X-Cache-Status $upstream_cache_status always; # Proxy headers proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header Connection ""; proxy_http_version 1.1; } # ============================================================ # Read-Only API Endpoints - Short Caching (5 minutes) # ============================================================ location ~ ^/(tools|servers|gateways|resources|prompts|tags|a2a|health|version|metrics)$ { # Rate limiting: 50000 r/s sustained, burst=50000 for 10,000 users # limit_conn=50000 caps concurrent users (backend tuned for 10k) limit_req zone=api_limit burst=50000 nodelay; limit_conn conn_limit 50000; proxy_pass http://gateway_backend; proxy_cache api_cache; proxy_cache_valid 200 5m; proxy_cache_valid 404 1m; proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; proxy_cache_background_update on; proxy_cache_lock on; proxy_cache_revalidate on; # Only cache GET requests proxy_cache_methods GET HEAD; proxy_cache_key "$scheme$request_method$host$request_uri$is_args$args"; # Bypass cache for mutations proxy_cache_bypass $skip_cache; proxy_no_cache $skip_cache; # Upstream retry for transient failures proxy_next_upstream error timeout http_502 http_503 http_504; proxy_next_upstream_tries 2; proxy_next_upstream_timeout 10s; # Add cache control headers expires 5m; add_header Cache-Control "public, must-revalidate"; add_header X-Cache-Status $upstream_cache_status always; # Proxy headers proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header Connection ""; proxy_http_version 1.1; # Timeouts proxy_connect_timeout 30s; proxy_send_timeout 60s; proxy_read_timeout 60s; } # ============================================================ # Admin UI - Short-TTL Caching with Multi-Tenant Isolation # ============================================================ location = /admin/events { # SSE stream: disable caching/buffering and allow long-lived connections proxy_pass http://gateway_backend; proxy_cache off; proxy_buffering off; proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header Connection ""; proxy_http_version 1.1; proxy_connect_timeout 30s; proxy_send_timeout 3600s; proxy_read_timeout 3600s; } # Admin pages are CPU-intensive (Jinja2 template rendering) and can take # 5+ seconds under load. Short caching dramatically reduces server load # while keeping data reasonably fresh for admin use cases. # # SECURITY: Cache key includes all auth credentials to prevent data leakage # between users. See issue #1946 for full security analysis. location /admin { # Rate limiting: 50000 r/s sustained, burst=50000 for 10,000 users # limit_conn=50000 caps concurrent users (backend tuned for 10k) limit_req zone=api_limit burst=50000 nodelay; limit_conn conn_limit 50000; proxy_pass http://gateway_backend; # Short-TTL caching for admin pages (reduces CPU load from template rendering) proxy_cache api_cache; proxy_cache_valid 200 5s; proxy_cache_valid 404 1s; proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; proxy_cache_background_update on; proxy_cache_lock on; proxy_cache_revalidate on; # CRITICAL: Ignore Set-Cookie header for caching decisions # Backend sends Set-Cookie with JWT refresh on every request, which prevents caching. # This is safe because: (1) 5s TTL is short, (2) admin pages are read-only GETs, # (3) JWT cookie refresh is not session-critical for cached responses. proxy_ignore_headers Set-Cookie; # Only cache GET/HEAD requests proxy_cache_methods GET HEAD; # SECURITY: Include ALL auth credentials in cache key for multi-tenant isolation # Each user gets their own cached copy of admin pages (prevents data leakage) # - $http_authorization: For API clients using Bearer tokens # - $cookie_jwt_token: Primary browser session cookie # - $cookie_access_token: Alternative auth cookie (also accepted by RBAC middleware) proxy_cache_key "$scheme$request_method$host$request_uri$is_args$args$http_authorization$cookie_jwt_token$cookie_access_token"; # Bypass cache for mutations (POST, PUT, DELETE handled by $skip_cache) proxy_cache_bypass $skip_cache; proxy_no_cache $skip_cache; # Upstream retry for transient failures proxy_next_upstream error timeout http_502 http_503 http_504; proxy_next_upstream_tries 2; proxy_next_upstream_timeout 10s; # Browser cache: private prevents shared caches (CDNs, proxies) from storing # user-specific admin pages. max-age=5 allows same-user browser caching. add_header Cache-Control "private, max-age=5" always; add_header X-Cache-Status $upstream_cache_status always; # Proxy headers proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header Connection ""; proxy_http_version 1.1; # Timeouts (increased for slow template rendering under load) proxy_connect_timeout 30s; proxy_send_timeout 120s; proxy_read_timeout 120s; } # ============================================================ # SSE/WebSocket - No Caching # ============================================================ location ~ ^/servers/.*/sse$ { proxy_pass http://gateway_backend; # SSE-specific headers proxy_set_header Connection ''; proxy_http_version 1.1; chunked_transfer_encoding off; proxy_buffering off; proxy_cache off; # Proxy headers proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; # Extended timeouts for SSE proxy_connect_timeout 1h; proxy_send_timeout 1h; proxy_read_timeout 1h; } location ~ ^/servers/.*/ws$ { proxy_pass http://gateway_backend; # WebSocket upgrade proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "upgrade"; proxy_cache off; # Proxy headers proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; # Extended timeouts for WebSocket proxy_connect_timeout 1h; proxy_send_timeout 1h; proxy_read_timeout 1h; } # ============================================================ # JSON-RPC Endpoint - No Caching # ============================================================ location = / { # Rate limiting: 50000 r/s sustained, burst=50000 for 10,000 users # limit_conn=50000 caps concurrent users (backend tuned for 10k) limit_req zone=api_limit burst=50000 nodelay; limit_conn conn_limit 50000; proxy_pass http://gateway_backend; proxy_cache off; # Upstream retry for transient failures (reduces 502 errors) proxy_next_upstream error timeout http_502 http_503 http_504; proxy_next_upstream_tries 2; proxy_next_upstream_timeout 10s; # Proxy headers proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header Connection ""; proxy_http_version 1.1; # Timeouts proxy_connect_timeout 30s; proxy_send_timeout 60s; proxy_read_timeout 60s; } # ============================================================ # All Other Endpoints - No Caching (default) # ============================================================ location / { # Rate limiting: 50000 r/s sustained, burst=50000 for 10,000 users # limit_conn=50000 caps concurrent users (backend tuned for 10k) limit_req zone=api_limit burst=50000 nodelay; limit_conn conn_limit 50000; proxy_pass http://gateway_backend; proxy_cache off; # Upstream retry for transient failures (reduces 502 errors) proxy_next_upstream error timeout http_502 http_503 http_504; proxy_next_upstream_tries 2; proxy_next_upstream_timeout 10s; # Proxy headers proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header Connection ""; proxy_http_version 1.1; # Timeouts proxy_connect_timeout 30s; proxy_send_timeout 60s; proxy_read_timeout 60s; } # ============================================================ # Cache Purge Endpoint (optional, requires ngx_cache_purge) # ============================================================ # location ~ /purge(/.*) { # allow 127.0.0.1; # allow 172.16.0.0/12; # Docker networks # deny all; # proxy_cache_purge static_cache $scheme$request_method$host$1; # } } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/IBM/mcp-context-forge'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

nginx-performance.conf•22.8 KiB