# High-Performance Nginx Caching Proxy for MCP Gateway
# Optimized for static assets, API responses, and CDN-like behavior
# Tuned for 10,000 concurrent users on 64 vCPU system
user nginx;
worker_processes auto;
worker_rlimit_nofile 131072;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;
# Performance: Reduce disk I/O for temp files
# Store small request bodies in memory instead of temp files
worker_cpu_affinity auto;
events {
worker_connections 32768;
use epoll;
multi_accept on;
# Accept as many connections as possible after getting notification
accept_mutex off;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
# Logging format with cache status
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for" '
'cache_status=$upstream_cache_status '
'rt=$request_time uct="$upstream_connect_time" '
'uht="$upstream_header_time" urt="$upstream_response_time"';
# ============================================================
# Access Logging - DISABLE FOR LOAD TESTING (major I/O bottleneck)
# ============================================================
# Uncomment for debugging, comment out for max performance
# access_log /var/log/nginx/access.log main;
access_log off;
# ============================================================
# Core Performance Tuning
# ============================================================
sendfile on;
tcp_nopush on; # Combine headers with data (reduces packets)
tcp_nodelay on; # Disable Nagle's algorithm (reduces latency)
keepalive_timeout 65;
keepalive_requests 10000; # Requests per keepalive connection (default 100)
types_hash_max_size 2048;
client_max_body_size 100M;
client_body_buffer_size 128k;
reset_timedout_connection on; # Free memory from timed-out connections
# Hide nginx version
server_tokens off;
# ============================================================
# Open File Cache - Reduces file descriptor overhead
# ============================================================
# Cache file descriptors, sizes, and modification times
open_file_cache max=10000 inactive=60s;
open_file_cache_valid 30s; # Revalidate cached info every 30s
open_file_cache_min_uses 2; # Cache after 2 accesses
open_file_cache_errors on; # Cache lookup errors too
# ============================================================
# Proxy Buffer Tuning - Optimize memory for 4000 users
# ============================================================
# Larger buffers reduce disk I/O for upstream responses
proxy_buffer_size 16k; # First response buffer (headers)
proxy_buffers 8 32k; # Number and size of buffers per connection
proxy_busy_buffers_size 64k; # Max size while sending to client
proxy_temp_file_write_size 64k; # Chunk size for temp files
# ============================================================
# Gzip Compression
# ============================================================
gzip on;
gzip_vary on;
gzip_proxied any;
gzip_comp_level 6; # 1-9, 6 is good balance for production
gzip_types text/plain text/css text/xml text/javascript
application/json application/javascript application/xml+rss
application/rss+xml font/truetype font/opentype
application/vnd.ms-fontobject image/svg+xml;
gzip_min_length 256;
# ============================================================
# Rate Limiting - Tuned for 10,000+ concurrent users (64 vCPU system)
# ============================================================
#
# PROTECTION STRATEGY:
# - Nginx caps raised to 50000 (headroom for 10000 users)
# - Backend (PgBouncer/PostgreSQL) tuned for 10000+ connections
# - Excess requests get 429 Too Many Requests
#
# HOW NGINX RATE LIMITING WORKS:
# - rate=50000r/s means 50000 tokens added per second (leaky bucket algorithm)
# - burst=50000 is the bucket size (max requests that can queue/proceed)
# - nodelay means burst requests are processed immediately, not queued
#
# EFFECTIVE LIMIT:
# - Sustained: 50000 requests/second continuously
# - Burst: Up to 50000 additional requests can proceed instantly
# - Peak: 100000 requests in first second, then 50000/s sustained
#
# Zone: 32MB shared memory (~500,000 unique IPs can be tracked)
# $binary_remote_addr uses 4 bytes (IPv4) vs ~15 bytes for string
limit_req_zone $binary_remote_addr zone=api_limit:32m rate=50000r/s;
# Connection limiting - max concurrent connections per IP
# Set to 50000 to avoid load-test throttling
limit_conn_zone $binary_remote_addr zone=conn_limit:32m;
# Return 429 (Too Many Requests) instead of default 503
# 429 is semantically correct for rate limiting per RFC 6585
limit_req_status 429;
limit_conn_status 429;
# Cache zones
# Zone 1: Static assets (CSS, JS, images) - 1GB, 30 day max age
proxy_cache_path /var/cache/nginx/static
levels=1:2
keys_zone=static_cache:100m
max_size=1g
inactive=30d
use_temp_path=off;
# Zone 2: API responses - 512MB, 1 hour max age
proxy_cache_path /var/cache/nginx/api
levels=1:2
keys_zone=api_cache:50m
max_size=512m
inactive=1h
use_temp_path=off;
# Zone 3: OpenAPI/Schema responses - 256MB, 24 hour max age
proxy_cache_path /var/cache/nginx/schema
levels=1:2
keys_zone=schema_cache:20m
max_size=256m
inactive=24h
use_temp_path=off;
# Docker DNS resolver for dynamic upstream resolution
# valid=5s forces re-resolution every 5 seconds for load balancing across replicas
resolver 127.0.0.11 valid=5s ipv6=off;
# ============================================================
# Upstream Definition - Tuned for 10,000 users on 64 vCPU system
# ============================================================
# IMPORTANT: max_fails must be high enough to survive load spikes
# Without this, nginx marks backends dead after just 3 timeouts
upstream gateway_backend {
# Load balancing: least_conn distributes to backend with fewest active connections
# Fixes imbalance caused by keepalive connections sticking to one backend
least_conn;
server gateway:4444 max_fails=0; # Disable failure tracking (always retry)
# Keepalive pool sizing for 10,000 capacity:
# - Each nginx worker maintains its own pool
# - With 8 workers: 1024 × 8 = 8192 reusable connections
# - Remaining connections use short-lived TCP
keepalive 1024; # Connections per worker
keepalive_requests 100000; # Requests per connection
keepalive_timeout 60s; # Connection idle timeout
}
# Cache bypass conditions
map $request_method $skip_cache {
default 0;
POST 1;
PUT 1;
PATCH 1;
DELETE 1;
}
# Cache key construction
map $request_uri $cache_key {
default "$scheme$request_method$host$request_uri";
}
server {
# Listen with performance optimizations:
# - backlog=65535: Kernel queue for pending connections (max for 10k users)
# - reuseport: Distribute connections across workers (reduces lock contention)
listen 80 backlog=65535 reuseport;
listen [::]:80 backlog=65535 reuseport;
server_name localhost;
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
add_header X-XSS-Protection "1; mode=block" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
# Cache status header (for debugging)
add_header X-Cache-Status $upstream_cache_status always;
# ============================================================
# Nginx Status - For Prometheus nginx-exporter metrics
# ============================================================
location /nginx_status {
stub_status on;
allow 127.0.0.1;
allow 172.16.0.0/12; # Docker networks
allow 10.0.0.0/8; # Docker networks
deny all;
}
# ============================================================
# Health Check - No rate limiting (for monitoring during load tests)
# ============================================================
location = /health {
proxy_pass http://gateway_backend;
proxy_set_header Host $http_host;
proxy_set_header Connection "";
proxy_http_version 1.1;
# No rate limiting - health checks must always succeed
proxy_connect_timeout 5s;
proxy_read_timeout 5s;
}
# ============================================================
# Static Assets - Aggressive Caching (30 days)
# ============================================================
location ~* \.(css|js|jpg|jpeg|png|gif|ico|svg|woff|woff2|ttf|eot|otf|webp|avif)$ {
proxy_pass http://gateway_backend;
proxy_cache static_cache;
proxy_cache_valid 200 30d;
proxy_cache_valid 404 10m;
proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
proxy_cache_background_update on;
proxy_cache_lock on;
proxy_cache_revalidate on;
# Bypass cache for certain conditions
proxy_cache_bypass $skip_cache;
proxy_no_cache $skip_cache;
# Add cache control headers
expires 30d;
add_header Cache-Control "public, immutable";
add_header X-Cache-Status $upstream_cache_status always;
# Proxy headers
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Connection "";
proxy_http_version 1.1;
}
# ============================================================
# OpenAPI/Schema - Moderate Caching (24 hours)
# ============================================================
location ~ ^/(openapi\.json|docs|redoc)$ {
proxy_pass http://gateway_backend;
proxy_cache schema_cache;
proxy_cache_valid 200 24h;
proxy_cache_valid 404 1h;
proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
proxy_cache_background_update on;
proxy_cache_lock on;
proxy_cache_revalidate on;
# Cache key includes query string for filtering
proxy_cache_key "$scheme$request_method$host$request_uri";
# Bypass cache for certain conditions
proxy_cache_bypass $skip_cache;
proxy_no_cache $skip_cache;
# Add cache control headers
expires 24h;
add_header Cache-Control "public, must-revalidate";
add_header X-Cache-Status $upstream_cache_status always;
# Proxy headers
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Connection "";
proxy_http_version 1.1;
}
# ============================================================
# Read-Only API Endpoints - Short Caching (5 minutes)
# ============================================================
location ~ ^/(tools|servers|gateways|resources|prompts|tags|a2a|health|version|metrics)$ {
# Rate limiting: 50000 r/s sustained, burst=50000 for 10,000 users
# limit_conn=50000 caps concurrent users (backend tuned for 10k)
limit_req zone=api_limit burst=50000 nodelay;
limit_conn conn_limit 50000;
proxy_pass http://gateway_backend;
proxy_cache api_cache;
proxy_cache_valid 200 5m;
proxy_cache_valid 404 1m;
proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
proxy_cache_background_update on;
proxy_cache_lock on;
proxy_cache_revalidate on;
# Only cache GET requests
proxy_cache_methods GET HEAD;
proxy_cache_key "$scheme$request_method$host$request_uri$is_args$args";
# Bypass cache for mutations
proxy_cache_bypass $skip_cache;
proxy_no_cache $skip_cache;
# Upstream retry for transient failures
proxy_next_upstream error timeout http_502 http_503 http_504;
proxy_next_upstream_tries 2;
proxy_next_upstream_timeout 10s;
# Add cache control headers
expires 5m;
add_header Cache-Control "public, must-revalidate";
add_header X-Cache-Status $upstream_cache_status always;
# Proxy headers
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Connection "";
proxy_http_version 1.1;
# Timeouts
proxy_connect_timeout 30s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
}
# ============================================================
# Admin UI - Short-TTL Caching with Multi-Tenant Isolation
# ============================================================
location = /admin/events {
# SSE stream: disable caching/buffering and allow long-lived connections
proxy_pass http://gateway_backend;
proxy_cache off;
proxy_buffering off;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Connection "";
proxy_http_version 1.1;
proxy_connect_timeout 30s;
proxy_send_timeout 3600s;
proxy_read_timeout 3600s;
}
# Admin pages are CPU-intensive (Jinja2 template rendering) and can take
# 5+ seconds under load. Short caching dramatically reduces server load
# while keeping data reasonably fresh for admin use cases.
#
# SECURITY: Cache key includes all auth credentials to prevent data leakage
# between users. See issue #1946 for full security analysis.
location /admin {
# Rate limiting: 50000 r/s sustained, burst=50000 for 10,000 users
# limit_conn=50000 caps concurrent users (backend tuned for 10k)
limit_req zone=api_limit burst=50000 nodelay;
limit_conn conn_limit 50000;
proxy_pass http://gateway_backend;
# Short-TTL caching for admin pages (reduces CPU load from template rendering)
proxy_cache api_cache;
proxy_cache_valid 200 5s;
proxy_cache_valid 404 1s;
proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
proxy_cache_background_update on;
proxy_cache_lock on;
proxy_cache_revalidate on;
# CRITICAL: Ignore Set-Cookie header for caching decisions
# Backend sends Set-Cookie with JWT refresh on every request, which prevents caching.
# This is safe because: (1) 5s TTL is short, (2) admin pages are read-only GETs,
# (3) JWT cookie refresh is not session-critical for cached responses.
proxy_ignore_headers Set-Cookie;
# Only cache GET/HEAD requests
proxy_cache_methods GET HEAD;
# SECURITY: Include ALL auth credentials in cache key for multi-tenant isolation
# Each user gets their own cached copy of admin pages (prevents data leakage)
# - $http_authorization: For API clients using Bearer tokens
# - $cookie_jwt_token: Primary browser session cookie
# - $cookie_access_token: Alternative auth cookie (also accepted by RBAC middleware)
proxy_cache_key "$scheme$request_method$host$request_uri$is_args$args$http_authorization$cookie_jwt_token$cookie_access_token";
# Bypass cache for mutations (POST, PUT, DELETE handled by $skip_cache)
proxy_cache_bypass $skip_cache;
proxy_no_cache $skip_cache;
# Upstream retry for transient failures
proxy_next_upstream error timeout http_502 http_503 http_504;
proxy_next_upstream_tries 2;
proxy_next_upstream_timeout 10s;
# Browser cache: private prevents shared caches (CDNs, proxies) from storing
# user-specific admin pages. max-age=5 allows same-user browser caching.
add_header Cache-Control "private, max-age=5" always;
add_header X-Cache-Status $upstream_cache_status always;
# Proxy headers
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Connection "";
proxy_http_version 1.1;
# Timeouts (increased for slow template rendering under load)
proxy_connect_timeout 30s;
proxy_send_timeout 120s;
proxy_read_timeout 120s;
}
# ============================================================
# SSE/WebSocket - No Caching
# ============================================================
location ~ ^/servers/.*/sse$ {
proxy_pass http://gateway_backend;
# SSE-specific headers
proxy_set_header Connection '';
proxy_http_version 1.1;
chunked_transfer_encoding off;
proxy_buffering off;
proxy_cache off;
# Proxy headers
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Extended timeouts for SSE
proxy_connect_timeout 1h;
proxy_send_timeout 1h;
proxy_read_timeout 1h;
}
location ~ ^/servers/.*/ws$ {
proxy_pass http://gateway_backend;
# WebSocket upgrade
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_cache off;
# Proxy headers
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Extended timeouts for WebSocket
proxy_connect_timeout 1h;
proxy_send_timeout 1h;
proxy_read_timeout 1h;
}
# ============================================================
# JSON-RPC Endpoint - No Caching
# ============================================================
location = / {
# Rate limiting: 50000 r/s sustained, burst=50000 for 10,000 users
# limit_conn=50000 caps concurrent users (backend tuned for 10k)
limit_req zone=api_limit burst=50000 nodelay;
limit_conn conn_limit 50000;
proxy_pass http://gateway_backend;
proxy_cache off;
# Upstream retry for transient failures (reduces 502 errors)
proxy_next_upstream error timeout http_502 http_503 http_504;
proxy_next_upstream_tries 2;
proxy_next_upstream_timeout 10s;
# Proxy headers
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Connection "";
proxy_http_version 1.1;
# Timeouts
proxy_connect_timeout 30s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
}
# ============================================================
# All Other Endpoints - No Caching (default)
# ============================================================
location / {
# Rate limiting: 50000 r/s sustained, burst=50000 for 10,000 users
# limit_conn=50000 caps concurrent users (backend tuned for 10k)
limit_req zone=api_limit burst=50000 nodelay;
limit_conn conn_limit 50000;
proxy_pass http://gateway_backend;
proxy_cache off;
# Upstream retry for transient failures (reduces 502 errors)
proxy_next_upstream error timeout http_502 http_503 http_504;
proxy_next_upstream_tries 2;
proxy_next_upstream_timeout 10s;
# Proxy headers
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Connection "";
proxy_http_version 1.1;
# Timeouts
proxy_connect_timeout 30s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
}
# ============================================================
# Cache Purge Endpoint (optional, requires ngx_cache_purge)
# ============================================================
# location ~ /purge(/.*) {
# allow 127.0.0.1;
# allow 172.16.0.0/12; # Docker networks
# deny all;
# proxy_cache_purge static_cache $scheme$request_method$host$1;
# }
}
}