Skip to main content
Glama
cbcoutinho

Nextcloud MCP Server

by cbcoutinho
metrics.py14.7 kB
""" Prometheus metrics for the Nextcloud MCP Server. This module defines all Prometheus metrics for monitoring server health, performance, and resource usage. Metrics are organized by category: - HTTP Server Metrics (RED: Rate, Errors, Duration) - MCP Tool Metrics (per-tool invocation tracking) - MCP Resource Metrics - Nextcloud API Client Metrics - OAuth Flow Metrics - Vector Sync Metrics (conditional on feature flag) - Database Operation Metrics - External Dependency Health Metrics """ import logging from prometheus_client import ( Counter, Gauge, Histogram, start_http_server, ) logger = logging.getLogger(__name__) # ============================================================================= # HTTP Server Metrics (RED + System) # ============================================================================= http_requests_total = Counter( "mcp_http_requests_total", "Total HTTP requests received", ["method", "endpoint", "status_code"], ) http_request_duration_seconds = Histogram( "mcp_http_request_duration_seconds", "HTTP request latency in seconds", ["method", "endpoint"], buckets=(0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0), ) http_requests_in_progress = Gauge( "mcp_http_requests_in_progress", "Number of HTTP requests currently being processed", ["method", "endpoint"], ) # ============================================================================= # MCP Tool Metrics # ============================================================================= mcp_tool_calls_total = Counter( "mcp_tool_calls_total", "Total MCP tool invocations", ["tool_name", "status"], # status: success | error ) mcp_tool_duration_seconds = Histogram( "mcp_tool_duration_seconds", "MCP tool execution duration in seconds", ["tool_name"], buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0), ) mcp_tool_errors_total = Counter( "mcp_tool_errors_total", "Total MCP tool errors by type", ["tool_name", "error_type"], ) # ============================================================================= # MCP Resource Metrics # ============================================================================= mcp_resource_requests_total = Counter( "mcp_resource_requests_total", "Total MCP resource requests", ["resource_uri", "status"], ) mcp_resource_duration_seconds = Histogram( "mcp_resource_duration_seconds", "MCP resource request duration in seconds", ["resource_uri"], buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5), ) # ============================================================================= # Nextcloud API Client Metrics # ============================================================================= nextcloud_api_requests_total = Counter( "mcp_nextcloud_api_requests_total", "Total Nextcloud API requests", ["app", "method", "status_code"], # app: notes, calendar, contacts, etc. ) nextcloud_api_duration_seconds = Histogram( "mcp_nextcloud_api_duration_seconds", "Nextcloud API request duration in seconds", ["app", "method"], buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0), ) nextcloud_api_retries_total = Counter( "mcp_nextcloud_api_retries_total", "Total Nextcloud API retries", ["app", "reason"], # reason: 429 | timeout | connection_error ) # ============================================================================= # OAuth Flow Metrics # ============================================================================= oauth_token_validations_total = Counter( "mcp_oauth_token_validations_total", "Total OAuth token validation attempts", ["method", "result"], # method: introspect | jwt; result: valid | invalid | error ) oauth_token_exchange_total = Counter( "mcp_oauth_token_exchange_total", "Total OAuth token exchange operations (RFC 8693)", ["status"], # status: success | error ) oauth_token_cache_hits_total = Counter( "mcp_oauth_token_cache_hits_total", "Total OAuth token cache lookups", ["hit"], # hit: true | false ) oauth_refresh_token_operations_total = Counter( "mcp_oauth_refresh_token_operations_total", "Total refresh token storage operations", [ "operation", "status", ], # operation: store | retrieve | delete; status: success | error ) # ============================================================================= # Vector Sync Metrics (optional feature) # ============================================================================= vector_sync_documents_scanned_total = Counter( "mcp_vector_sync_documents_scanned_total", "Total documents scanned for vector sync", ) vector_sync_documents_processed_total = Counter( "mcp_vector_sync_documents_processed_total", "Total documents processed for vector sync", ["status"], # status: success | error ) vector_sync_processing_duration_seconds = Histogram( "mcp_vector_sync_processing_duration_seconds", "Document processing duration in seconds", buckets=(0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0), ) vector_sync_queue_size = Gauge( "mcp_vector_sync_queue_size", "Current number of documents in processing queue", ) qdrant_operations_total = Counter( "mcp_qdrant_operations_total", "Total Qdrant vector database operations", [ "operation", "status", ], # operation: upsert | search | delete; status: success | error ) # ============================================================================= # Database Metrics # ============================================================================= db_operations_total = Counter( "mcp_db_operations_total", "Total database operations", ["db", "operation", "status"], # db: sqlite | qdrant; operation varies ) db_operation_duration_seconds = Histogram( "mcp_db_operation_duration_seconds", "Database operation duration in seconds", ["db", "operation"], buckets=(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0), ) # ============================================================================= # External Dependency Health Metrics # ============================================================================= dependency_health = Gauge( "mcp_dependency_health", "External dependency health status (1=up, 0=down)", ["dependency"], # dependency: nextcloud | keycloak | qdrant | unstructured ) dependency_check_duration_seconds = Histogram( "mcp_dependency_check_duration_seconds", "Dependency health check duration in seconds", ["dependency"], buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5), ) # ============================================================================= # Metrics Setup and HTTP Handler # ============================================================================= def setup_metrics(port: int = 9090) -> None: """ Initialize Prometheus metrics collection and start HTTP server. Starts a dedicated HTTP server on the specified port to serve metrics. This server runs in a separate thread and is isolated from the main application. Args: port: Port to serve metrics on (default: 9090) Note: Metrics endpoint (/metrics) is ONLY accessible on this dedicated port, not on the main application HTTP port. This is a security best practice to prevent external exposure of metrics. """ try: start_http_server(port) logger.info(f"Prometheus metrics server started on port {port}") except OSError as e: if "Address already in use" in str(e): logger.warning( f"Metrics port {port} already in use (metrics server likely already running)" ) else: logger.error(f"Failed to start metrics server on port {port}: {e}") raise # ============================================================================= # Convenience Functions for Common Metric Updates # ============================================================================= def record_tool_call(tool_name: str, duration: float, status: str = "success") -> None: """ Record metrics for an MCP tool call. Args: tool_name: Name of the MCP tool duration: Execution duration in seconds status: "success" or "error" """ mcp_tool_calls_total.labels(tool_name=tool_name, status=status).inc() mcp_tool_duration_seconds.labels(tool_name=tool_name).observe(duration) def record_tool_error(tool_name: str, error_type: str) -> None: """ Record an MCP tool error. Args: tool_name: Name of the MCP tool error_type: Type of error (e.g., "HTTPStatusError", "ValueError") """ mcp_tool_errors_total.labels(tool_name=tool_name, error_type=error_type).inc() def record_nextcloud_api_call( app: str, method: str, status_code: int, duration: float, ) -> None: """ Record metrics for a Nextcloud API call. Args: app: Nextcloud app name (notes, calendar, contacts, etc.) method: HTTP method (GET, POST, PUT, DELETE, PROPFIND, etc.) status_code: HTTP status code duration: Request duration in seconds """ nextcloud_api_requests_total.labels( app=app, method=method, status_code=str(status_code) ).inc() nextcloud_api_duration_seconds.labels(app=app, method=method).observe(duration) def record_nextcloud_api_retry(app: str, reason: str) -> None: """ Record a Nextcloud API retry. Args: app: Nextcloud app name reason: Retry reason (429, timeout, connection_error) """ nextcloud_api_retries_total.labels(app=app, reason=reason).inc() def record_oauth_token_validation(method: str, result: str) -> None: """ Record an OAuth token validation. Args: method: Validation method ("introspect" or "jwt") result: Validation result ("valid", "invalid", or "error") """ oauth_token_validations_total.labels(method=method, result=result).inc() def record_db_operation( db: str, operation: str, duration: float, status: str = "success" ) -> None: """ Record a database operation. Args: db: Database type ("sqlite" or "qdrant") operation: Operation type (e.g., "insert", "select", "upsert", "search") duration: Operation duration in seconds status: "success" or "error" """ db_operations_total.labels(db=db, operation=operation, status=status).inc() db_operation_duration_seconds.labels(db=db, operation=operation).observe(duration) def set_dependency_health(dependency: str, is_healthy: bool) -> None: """ Update external dependency health status. Args: dependency: Dependency name (nextcloud, keycloak, qdrant, unstructured) is_healthy: True if dependency is healthy, False otherwise """ dependency_health.labels(dependency=dependency).set(1 if is_healthy else 0) def record_dependency_check(dependency: str, duration: float) -> None: """ Record a dependency health check duration. Args: dependency: Dependency name duration: Check duration in seconds """ dependency_check_duration_seconds.labels(dependency=dependency).observe(duration) def record_vector_sync_scan(documents_found: int) -> None: """ Record documents scanned during vector sync. Args: documents_found: Number of documents discovered in scan """ vector_sync_documents_scanned_total.inc(documents_found) def record_vector_sync_processing(duration: float, status: str = "success") -> None: """ Record document processing with duration and status. Args: duration: Processing duration in seconds status: "success" or "error" """ vector_sync_documents_processed_total.labels(status=status).inc() vector_sync_processing_duration_seconds.observe(duration) def record_qdrant_operation(operation: str, status: str = "success") -> None: """ Record Qdrant vector database operation. Args: operation: Operation type ("upsert", "search", "delete") status: "success" or "error" """ qdrant_operations_total.labels(operation=operation, status=status).inc() def update_vector_sync_queue_size(size: int) -> None: """ Update vector sync queue size gauge. Args: size: Current queue size """ vector_sync_queue_size.set(size) # ============================================================================= # Decorator for Automatic Tool Instrumentation # ============================================================================= def instrument_tool(func): """ Decorator to automatically instrument MCP tool functions with metrics and tracing. Wraps async tool functions to record execution time, success/error status, and create OpenTelemetry trace spans. Compatible with @mcp.tool() and @require_scopes() decorators. Usage: @mcp.tool() @require_scopes("notes:write") @instrument_tool async def nc_notes_create_note(...): ... Args: func: The async function to instrument Returns: Wrapped function with metrics and tracing instrumentation """ import functools import time from nextcloud_mcp_server.observability.tracing import trace_operation @functools.wraps(func) async def wrapper(*args, **kwargs): tool_name = func.__name__ start_time = time.time() # Extract tool arguments for tracing (sanitize sensitive fields) # kwargs contains the actual arguments passed to the tool tool_args = { k: v for k, v in kwargs.items() if k not in ("password", "token", "secret", "api_key", "etag", "ctx") } # Create trace span with metrics collection with trace_operation( f"mcp.tool.{tool_name}", attributes={ "mcp.tool.name": tool_name, "mcp.tool.args": str(tool_args)[:500] if tool_args else None, # Limit to 500 chars }, record_exception=True, ): try: result = await func(*args, **kwargs) duration = time.time() - start_time record_tool_call(tool_name, duration, "success") return result except Exception as e: duration = time.time() - start_time record_tool_call(tool_name, duration, "error") record_tool_error(tool_name, type(e).__name__) raise return wrapper

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cbcoutinho/nextcloud-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server