MCP Datadog Playcourt

mcp-server.js•29.9 kB

#!/usr/bin/env node /** * MCP Datadog Server * * Runs as an MCP (Model Context Protocol) server that can be used by Claude Desktop, * VS Code, and other compatible clients. * * This file provides tools for querying Datadog metrics through MCP. */ import dotenv from 'dotenv'; import { execSync } from 'child_process'; import path from 'path'; import { fileURLToPath } from 'url'; import axios from 'axios'; // Load environment variables dotenv.config(); const API_KEY = process.env.DATADOG_API_KEY; const APP_KEY = process.env.DATADOG_APP_KEY; const SITE = process.env.DATADOG_SITE || 'datadoghq.com'; if (!API_KEY || !APP_KEY) { console.error('Error: DATADOG_API_KEY and DATADOG_APP_KEY must be set in .env file'); process.exit(1); } // Datadog API client const datadogClient = axios.create({ baseURL: `https://api.${SITE}`, headers: { 'DD-API-KEY': API_KEY, 'DD-APPLICATION-KEY': APP_KEY, 'Content-Type': 'application/json' } }); const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Helper function to execute npm scripts function executeCliQuery(query, from, to) { try { // Validate timestamps are in seconds (not milliseconds) const now = Math.floor(Date.now() / 1000); if (from > now || to > now || from <= 0 || to <= 0) { throw new Error(`Invalid timestamp range: from=${from}, to=${to}. Must be Unix timestamp in seconds.`); } const cmd = `npm run query -- "${query}" ${from} ${to}`; const output = execSync(cmd, { cwd: __dirname, encoding: 'utf-8' }); return output; } catch (error) { throw new Error(`Query execution failed: ${error.message}`); } } /** * MCP Server Implementation * * This implements the Model Context Protocol server interface. * It communicates via JSON-RPC over stdio. */ class MCPServer { constructor() { this.requestId = 0; this.tools = this.defineTools(); } defineTools() { return { query_metrics: { description: "General purpose query for any Datadog metric. Use this when other specific tools don't match your needs.", inputSchema: { type: "object", properties: { query: { type: "string", description: "Datadog metric query string (e.g., avg:system.cpu.user{*}, sum:kubernetes.pods.running{kube_namespace:default})" }, from: { type: "integer", description: "Start time in Unix timestamp (seconds)" }, to: { type: "integer", description: "End time in Unix timestamp (seconds)" } }, required: ["query", "from", "to"] } }, query_kubernetes_metrics: { description: "Query Kubernetes metrics from Datadog", inputSchema: { type: "object", properties: { query: { type: "string", description: "Datadog metric query (e.g., avg:kubernetes.cpu.usage.total{kube_namespace:pijarsekolah-prod})" }, from: { type: "integer", description: "Start time in Unix timestamp (seconds)" }, to: { type: "integer", description: "End time in Unix timestamp (seconds)" }, cluster: { type: "string", description: "Kubernetes cluster name (optional)" } }, required: ["query", "from", "to"] } }, query_deployment_pods: { description: "Query pod count for a specific Kubernetes deployment", inputSchema: { type: "object", properties: { namespace: { type: "string", description: "Kubernetes namespace (e.g., pijarsekolah-prod)" }, deployment: { type: "string", description: "Deployment name (e.g., pijarsekolah-cbt-exam-api)" }, from: { type: "integer", description: "Start time in Unix timestamp (seconds)" }, to: { type: "integer", description: "End time in Unix timestamp (seconds)" }, cluster: { type: "string", description: "Kubernetes cluster name (optional)" }, apm_service_name: { type: "string", description: "APM service name for response time metric (optional, defaults to deployment name)" } }, required: ["namespace", "deployment", "from", "to"] } }, query_apm_metrics: { description: "Query APM metrics for a specific service", inputSchema: { type: "object", properties: { service: { type: "string", description: "Service name" }, from: { type: "integer", description: "Start time in Unix timestamp (seconds)" }, to: { type: "integer", description: "End time in Unix timestamp (seconds)" }, metric: { type: "string", description: "Metric name (e.g., trace.web.request.duration)" }, aggregator: { type: "string", enum: ["avg", "max", "min", "sum"], description: "Aggregation function" } }, required: ["service", "from", "to"] } }, query_host_metrics: { description: "Query host/infrastructure metrics", inputSchema: { type: "object", properties: { hostname: { type: "string", description: "Hostname to query" }, from: { type: "integer", description: "Start time in Unix timestamp (seconds)" }, to: { type: "integer", description: "End time in Unix timestamp (seconds)" }, metric: { type: "string", description: "Metric name (e.g., system.cpu.user)" }, aggregator: { type: "string", enum: ["avg", "max", "min", "sum"], description: "Aggregation function" } }, required: ["hostname", "from", "to"] } }, query_database_metrics: { description: "Query database monitoring (DBM) metrics", inputSchema: { type: "object", properties: { db_host: { type: "string", description: "Database host name" }, from: { type: "integer", description: "Start time in Unix timestamp (seconds)" }, to: { type: "integer", description: "End time in Unix timestamp (seconds)" }, db_type: { type: "string", enum: ["postgresql", "mysql"], description: "Database type" }, metric: { type: "string", description: "Metric name" }, aggregator: { type: "string", enum: ["avg", "max", "min", "sum"], description: "Aggregation function" } }, required: ["db_host", "from", "to"] } }, list_hosts: { description: "List all monitored hosts", inputSchema: { type: "object", properties: { from: { type: "integer", description: "Start time in Unix timestamp (seconds)" }, to: { type: "integer", description: "End time in Unix timestamp (seconds)" } }, required: ["from", "to"] } }, list_databases: { description: "List all monitored databases", inputSchema: { type: "object", properties: { from: { type: "integer", description: "Start time in Unix timestamp (seconds)" }, to: { type: "integer", description: "End time in Unix timestamp (seconds)" }, db_type: { type: "string", enum: ["postgresql", "mysql"], description: "Database type" } }, required: ["from", "to"] } }, list_all_hosts: { description: "List all monitored hosts with complete system info (hostname, CPU, memory, disk, OS, kernel, applications, etc)", inputSchema: { type: "object", properties: {}, required: [] } }, get_host_detail: { description: "Get detailed information for a specific host by hostname", inputSchema: { type: "object", properties: { hostname: { type: "string", description: "Hostname to get details for" } }, required: ["hostname"] } }, get_namespace_metrics: { description: "Get total resource usage and limits for a Kubernetes namespace", inputSchema: { type: "object", properties: { namespace: { type: "string", description: "Kubernetes namespace (e.g., pijarsekolah-prod)" }, from: { type: "integer", description: "Start time in Unix timestamp (seconds)" }, to: { type: "integer", description: "End time in Unix timestamp (seconds)" }, cluster: { type: "string", description: "Kubernetes cluster name (optional)" } }, required: ["namespace", "from", "to"] } } }; } /** * Handle MCP requests */ async handleRequest(request) { try { const { method, params, id } = request; if (method === "initialize") { return this.sendResponse(id, { protocolVersion: "2024-11-05", capabilities: { tools: {} }, serverInfo: { name: "datadog-mcp", version: "1.0.0" } }); } if (method === "tools/list") { return this.sendResponse(id, { tools: Object.entries(this.tools).map(([name, tool]) => ({ name, description: tool.description, inputSchema: tool.inputSchema })) }); } if (method === "tools/call") { const { name, arguments: args } = params; const result = await this.executeToolCall(name, args); return this.sendResponse(id, result); } return this.sendError(id, `Unknown method: ${method}`); } catch (error) { return this.sendError(id, error.message); } } /** * Execute tool calls */ async executeToolCall(toolName, args) { try { switch (toolName) { case 'query_metrics': return await this.queryMetrics(args); case 'query_kubernetes_metrics': return await this.queryKubernetesMetrics(args); case 'query_deployment_pods': return await this.queryDeploymentPods(args); case 'query_host_metrics': return await this.queryHostMetrics(args); case 'query_database_metrics': return await this.queryDatabaseMetrics(args); case 'list_hosts': return await this.listHosts(args); case 'list_databases': return await this.listDatabases(args); case 'list_all_hosts': return await this.listAllHosts(args); case 'get_host_detail': return await this.getHostDetail(args); case 'get_namespace_metrics': return await this.getNamespaceMetrics(args); default: throw new Error(`Unknown tool: ${toolName}`); } } catch (error) { return { content: [ { type: "text", text: `Error executing tool: ${error.message}` } ], isError: true }; } } async queryMetrics(args) { try { // Validate timestamps const from = parseInt(args.from); const to = parseInt(args.to); const now = Math.floor(Date.now() / 1000); if (from > now || to > now || from <= 0 || to <= 0) { return { content: [ { type: "text", text: `Error: Invalid timestamp. from=${from}, to=${to}. Current time=${now}. Make sure to use Unix timestamp in seconds (not milliseconds).` } ] }; } const query = args.query; const output = executeCliQuery(query, from, to); return { content: [ { type: "text", text: output } ] }; } catch (error) { throw error; } } async queryKubernetesMetrics(args) { try { // Validate timestamps const from = parseInt(args.from); const to = parseInt(args.to); const now = Math.floor(Date.now() / 1000); if (from > now || to > now || from <= 0 || to <= 0) { return { content: [ { type: "text", text: `Error: Invalid timestamp. from=${from}, to=${to}. Current time=${now}. Make sure to use Unix timestamp in seconds (not milliseconds).` } ] }; } let query = args.query; // Add cluster filter if provided if (args.cluster) { // Check if query already has filters if (query.includes('{')) { // Insert cluster filter into existing filters query = query.replace('{', `{kube_cluster_name:${args.cluster},`); } else { // Add cluster filter as new filter query = query + `{kube_cluster_name:${args.cluster}}`; } } const output = executeCliQuery(query, from, to); return { content: [ { type: "text", text: output } ] }; } catch (error) { throw error; } } async queryDeploymentPods(args) { try { const namespace = args.namespace; const deployment = args.deployment; const cluster = args.cluster || null; const apmServiceName = args.apm_service_name || null; const from = args.from; const to = args.to; // Build filter string with cluster if provided const clusterFilter = cluster ? `, kube_cluster_name:${cluster}` : ''; // Query for pod count const podQuery = `sum:kubernetes.pods.running{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`; // Query for CPU usage const cpuQuery = `avg:kubernetes.cpu.usage.total{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`; // Query for Memory usage const memoryQuery = `avg:kubernetes.memory.usage{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`; // Query for CPU limit const cpuLimitQuery = `avg:kubernetes.cpu.limits{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`; // Query for Memory limit const memoryLimitQuery = `avg:kubernetes.memory.limits{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`; // Query for network traffic in (received) const networkInQuery = `sum:kubernetes.network.rx_bytes{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`; // Query for network traffic out (transmitted) const networkOutQuery = `sum:kubernetes.network.tx_bytes{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`; // Query for response time (from APM/trace data) // Map namespace suffix to correct env name let envName = 'production'; // Default if (namespace.includes('-prod')) { envName = 'production'; } else if (namespace.includes('-stage')) { envName = 'staging'; } else if (namespace.includes('-dev')) { envName = 'dev'; } // Use apm_service_name if provided, otherwise use deployment name const serviceName = apmServiceName || deployment; // Use trace.http.request metric as per Datadog example const responseTimeMetric = 'trace.http.request'; let result = { cluster: cluster, namespace: namespace, deployment: deployment, pods: 'N/A', cpu_usage: 'N/A', cpu_limit: 'N/A', memory_usage: 'N/A', memory_limit: 'N/A', network_in: 'N/A', network_out: 'N/A', response_time: 'N/A' }; // Get pod count try { const podOutput = executeCliQuery(podQuery, from, to); result.pods = podOutput.trim(); } catch (e) { result.pods = `Error: ${e.message}`; } // Get CPU usage try { const cpuOutput = executeCliQuery(cpuQuery, from, to); result.cpu_usage = cpuOutput.trim(); } catch (e) { result.cpu_usage = `Error: ${e.message}`; } // Get CPU limit try { const cpuLimitOutput = executeCliQuery(cpuLimitQuery, from, to); result.cpu_limit = cpuLimitOutput.trim(); } catch (e) { result.cpu_limit = `Error: ${e.message}`; } // Get Memory usage try { const memoryOutput = executeCliQuery(memoryQuery, from, to); result.memory_usage = memoryOutput.trim(); } catch (e) { result.memory_usage = `Error: ${e.message}`; } // Get Memory limit try { const memoryLimitOutput = executeCliQuery(memoryLimitQuery, from, to); result.memory_limit = memoryLimitOutput.trim(); } catch (e) { result.memory_limit = `Error: ${e.message}`; } // Get Network In try { const networkInOutput = executeCliQuery(networkInQuery, from, to); result.network_in = networkInOutput.trim(); } catch (e) { result.network_in = `Error: ${e.message}`; } // Get Network Out try { const networkOutOutput = executeCliQuery(networkOutQuery, from, to); result.network_out = networkOutOutput.trim(); } catch (e) { result.network_out = `Error: ${e.message}`; } // Get Response Time - use exact query format from Datadog try { const query = `max:${responseTimeMetric}{service:${serviceName}, env:${envName}${cluster ? `, kube_cluster_name:${cluster}` : ''}}`; const responseTimeOutput = executeCliQuery(query, from, to); const trimmed = responseTimeOutput.trim(); // Check if we got data if (trimmed && !trimmed.includes('No data found')) { result.response_time = trimmed; } else { result.response_time = 'No data found'; } } catch (e) { result.response_time = `Error: ${e.message}`; } const output = JSON.stringify(result, null, 2); return { content: [ { type: "text", text: output } ] }; } catch (error) { throw error; } } async queryHostMetrics(args) { try { const metric = args.metric || 'avg:system.cpu.user'; const hostname = args.hostname || '*'; const query = `${metric}{host:${hostname}}`; const output = executeCliQuery(query, args.from, args.to); return { content: [{ type: "text", text: output }] }; } catch (error) { throw error; } } async queryDatabaseMetrics(args) { try { const dbType = args.db_type || 'postgresql'; const metric = args.metric || `avg:${dbType}.queries.active`; const dbHost = args.db_host || '*'; const query = `${metric}{dbhost:${dbHost}}`; const output = executeCliQuery(query, args.from, args.to); return { content: [{ type: "text", text: output }] }; } catch (error) { throw error; } } async listHosts(args) { try { const query = `avg:system.cpu.user{*} by {host}`; const output = executeCliQuery(query, args.from, args.to); return { content: [{ type: "text", text: output }] }; } catch (error) { throw error; } } async listDatabases(args) { try { const dbType = args.db_type || 'postgresql'; const query = `avg:${dbType}.queries.active{*} by {dbhost}`; const output = executeCliQuery(query, args.from, args.to); return { content: [{ type: "text", text: output }] }; } catch (error) { throw error; } } async listAllHosts(args) { try { const response = await datadogClient.get('/api/v1/hosts'); const hosts = response.data.host_list || []; // Format host info with basic details const hostInfo = hosts.map(host => ({ hostname: host.name, status: host.up ? 'up' : 'down', last_reported: host.last_reported_time ? new Date(host.last_reported_time * 1000).toISOString() : 'unknown', os: host.meta?.platform || 'unknown', processor: host.meta?.processor || 'unknown', kernel: host.meta?.kernel_release || 'unknown', agent_version: host.meta?.agent_version || 'unknown', applications: (host.apps || []).slice(0, 8).join(', '), tags: (host.tags || []).join(', ') || 'none', cpu_cores: host.meta?.cpuCores || 'unknown', cpu_host: host.meta?.processor || 'unknown', total_memory: host.meta?.memory?.total ? (parseInt(host.meta.memory.total.replace(/[^0-9]/g, '')) / 1024).toFixed(2) + ' GB' : 'unknown', memory_host: host.meta?.memory?.total || 'unknown', cpu_usage: host.metrics?.cpu ? host.metrics.cpu.toFixed(2) + '%' : 'N/A', memory_usage: host.metrics?.iowait ? host.metrics.iowait.toFixed(2) + '%' : 'N/A', host_id: host.meta?.host_id || 'unknown' })); // Sort by hostname hostInfo.sort((a, b) => a.hostname.localeCompare(b.hostname)); const output = JSON.stringify(hostInfo, null, 2); return { content: [{ type: "text", text: output }] }; } catch (error) { throw error; } } async getHostDetail(args) { try { const response = await datadogClient.get('/api/v1/hosts'); const hosts = response.data.host_list || []; // Find the specific host const host = hosts.find(h => h.name === args.hostname || h.name.toLowerCase() === args.hostname.toLowerCase()); if (!host) { return { content: [{ type: "text", text: `Host '${args.hostname}' not found` }] }; } // Format detailed host info const hostDetail = { hostname: host.name, status: host.up ? 'up' : 'down', last_reported: host.last_reported_time ? new Date(host.last_reported_time * 1000).toISOString() : 'unknown', os: host.meta?.platform || 'unknown', processor: host.meta?.processor || 'unknown', kernel: host.meta?.kernel_release || 'unknown', agent_version: host.meta?.agent_version || 'unknown', applications: (host.apps || []).join(', ') || 'none', tags: (host.tags || []).join(', ') || 'none', cpu_cores: host.meta?.cpuCores || 'unknown', cpu_host: host.meta?.processor || 'unknown', total_memory: host.meta?.memory?.total ? (parseInt(host.meta.memory.total.replace(/[^0-9]/g, '')) / 1024).toFixed(2) + ' GB' : 'unknown', memory_host: host.meta?.memory?.total || 'unknown', cpu_usage: host.metrics?.cpu ? host.metrics.cpu.toFixed(2) + '%' : 'N/A', memory_usage: host.metrics?.iowait ? host.metrics.iowait.toFixed(2) + '%' : 'N/A', host_id: host.meta?.host_id || 'unknown', // Additional details socket_fqdn: host.meta?.socket_fqdn || 'unknown', socket_hostname: host.meta?.socket_hostname || 'unknown', gohai: host.meta?.gohai || 'unknown' }; const output = JSON.stringify(hostDetail, null, 2); return { content: [{ type: "text", text: output }] }; } catch (error) { throw error; } } async getNamespaceMetrics(args) { try { const namespace = args.namespace; const cluster = args.cluster || null; const from = args.from; const to = args.to; // Build filter string with cluster if provided const clusterFilter = cluster ? `, kube_cluster_name:${cluster}` : ''; // Query for total pods in namespace const podQuery = `sum:kubernetes.pods.running{kube_namespace:${namespace}${clusterFilter}}`; // Query for total CPU usage in namespace const cpuUsageQuery = `sum:kubernetes.cpu.usage.total{kube_namespace:${namespace}${clusterFilter}}`; // Query for total CPU limits in namespace const cpuLimitQuery = `sum:kubernetes.cpu.limits{kube_namespace:${namespace}${clusterFilter}}`; // Query for total Memory usage in namespace const memoryUsageQuery = `sum:kubernetes.memory.usage{kube_namespace:${namespace}${clusterFilter}}`; // Query for total Memory limits in namespace const memoryLimitQuery = `sum:kubernetes.memory.limits{kube_namespace:${namespace}${clusterFilter}}`; // Query for total network traffic in (received) const networkInQuery = `sum:kubernetes.network.rx_bytes{kube_namespace:${namespace}${clusterFilter}}`; // Query for total network traffic out (transmitted) const networkOutQuery = `sum:kubernetes.network.tx_bytes{kube_namespace:${namespace}${clusterFilter}}`; let result = { cluster: cluster, namespace: namespace, total_pods: 'N/A', cpu_usage_total: 'N/A', cpu_limit_total: 'N/A', memory_usage_total: 'N/A', memory_limit_total: 'N/A', network_in_total: 'N/A', network_out_total: 'N/A' }; // Get total pods try { const podOutput = executeCliQuery(podQuery, from, to); result.total_pods = podOutput.trim(); } catch (e) { result.total_pods = `Error: ${e.message}`; } // Get total CPU usage try { const cpuUsageOutput = executeCliQuery(cpuUsageQuery, from, to); result.cpu_usage_total = cpuUsageOutput.trim(); } catch (e) { result.cpu_usage_total = `Error: ${e.message}`; } // Get total CPU limit try { const cpuLimitOutput = executeCliQuery(cpuLimitQuery, from, to); result.cpu_limit_total = cpuLimitOutput.trim(); } catch (e) { result.cpu_limit_total = `Error: ${e.message}`; } // Get total Memory usage try { const memoryUsageOutput = executeCliQuery(memoryUsageQuery, from, to); result.memory_usage_total = memoryUsageOutput.trim(); } catch (e) { result.memory_usage_total = `Error: ${e.message}`; } // Get total Memory limit try { const memoryLimitOutput = executeCliQuery(memoryLimitQuery, from, to); result.memory_limit_total = memoryLimitOutput.trim(); } catch (e) { result.memory_limit_total = `Error: ${e.message}`; } // Get total Network In try { const networkInOutput = executeCliQuery(networkInQuery, from, to); result.network_in_total = networkInOutput.trim(); } catch (e) { result.network_in_total = `Error: ${e.message}`; } // Get total Network Out try { const networkOutOutput = executeCliQuery(networkOutQuery, from, to); result.network_out_total = networkOutOutput.trim(); } catch (e) { result.network_out_total = `Error: ${e.message}`; } const output = JSON.stringify(result, null, 2); return { content: [ { type: "text", text: output } ] }; } catch (error) { throw error; } } formatMetricsOutput(data, label = '') { // This method is deprecated - metrics are now formatted by CLI return 'Metrics formatted by CLI'; } sendResponse(id, result) { const response = { jsonrpc: "2.0", id, result }; console.log(JSON.stringify(response)); } sendError(id, message) { const response = { jsonrpc: "2.0", id, error: { code: -32603, message } }; console.log(JSON.stringify(response)); } } /** * Main MCP Server Loop */ async function main() { const server = new MCPServer(); // Read requests from stdin let buffer = ''; process.stdin.on('data', async (chunk) => { buffer += chunk.toString(); const lines = buffer.split('\n'); buffer = lines.pop() || ''; for (const line of lines) { if (line.trim()) { try { const request = JSON.parse(line); await server.handleRequest(request); } catch (error) { console.error(`Error processing request: ${error.message}`); } } } }); process.stdin.on('end', () => { process.exit(0); }); } main().catch(error => { console.error('Fatal error:', error); process.exit(1); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/adisaputra10/mcp-datadog'

If you have feedback or need assistance with the MCP directory API, please join our Discord server