mcp-server.js•29.9 kB
#!/usr/bin/env node
/**
* MCP Datadog Server
*
* Runs as an MCP (Model Context Protocol) server that can be used by Claude Desktop,
* VS Code, and other compatible clients.
*
* This file provides tools for querying Datadog metrics through MCP.
*/
import dotenv from 'dotenv';
import { execSync } from 'child_process';
import path from 'path';
import { fileURLToPath } from 'url';
import axios from 'axios';
// Load environment variables
dotenv.config();
const API_KEY = process.env.DATADOG_API_KEY;
const APP_KEY = process.env.DATADOG_APP_KEY;
const SITE = process.env.DATADOG_SITE || 'datadoghq.com';
if (!API_KEY || !APP_KEY) {
console.error('Error: DATADOG_API_KEY and DATADOG_APP_KEY must be set in .env file');
process.exit(1);
}
// Datadog API client
const datadogClient = axios.create({
baseURL: `https://api.${SITE}`,
headers: {
'DD-API-KEY': API_KEY,
'DD-APPLICATION-KEY': APP_KEY,
'Content-Type': 'application/json'
}
});
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Helper function to execute npm scripts
function executeCliQuery(query, from, to) {
try {
// Validate timestamps are in seconds (not milliseconds)
const now = Math.floor(Date.now() / 1000);
if (from > now || to > now || from <= 0 || to <= 0) {
throw new Error(`Invalid timestamp range: from=${from}, to=${to}. Must be Unix timestamp in seconds.`);
}
const cmd = `npm run query -- "${query}" ${from} ${to}`;
const output = execSync(cmd, {
cwd: __dirname,
encoding: 'utf-8'
});
return output;
} catch (error) {
throw new Error(`Query execution failed: ${error.message}`);
}
}
/**
* MCP Server Implementation
*
* This implements the Model Context Protocol server interface.
* It communicates via JSON-RPC over stdio.
*/
class MCPServer {
constructor() {
this.requestId = 0;
this.tools = this.defineTools();
}
defineTools() {
return {
query_metrics: {
description: "General purpose query for any Datadog metric. Use this when other specific tools don't match your needs.",
inputSchema: {
type: "object",
properties: {
query: {
type: "string",
description: "Datadog metric query string (e.g., avg:system.cpu.user{*}, sum:kubernetes.pods.running{kube_namespace:default})"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
}
},
required: ["query", "from", "to"]
}
},
query_kubernetes_metrics: {
description: "Query Kubernetes metrics from Datadog",
inputSchema: {
type: "object",
properties: {
query: {
type: "string",
description: "Datadog metric query (e.g., avg:kubernetes.cpu.usage.total{kube_namespace:pijarsekolah-prod})"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
cluster: {
type: "string",
description: "Kubernetes cluster name (optional)"
}
},
required: ["query", "from", "to"]
}
},
query_deployment_pods: {
description: "Query pod count for a specific Kubernetes deployment",
inputSchema: {
type: "object",
properties: {
namespace: {
type: "string",
description: "Kubernetes namespace (e.g., pijarsekolah-prod)"
},
deployment: {
type: "string",
description: "Deployment name (e.g., pijarsekolah-cbt-exam-api)"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
cluster: {
type: "string",
description: "Kubernetes cluster name (optional)"
},
apm_service_name: {
type: "string",
description: "APM service name for response time metric (optional, defaults to deployment name)"
}
},
required: ["namespace", "deployment", "from", "to"]
}
},
query_apm_metrics: {
description: "Query APM metrics for a specific service",
inputSchema: {
type: "object",
properties: {
service: {
type: "string",
description: "Service name"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
metric: {
type: "string",
description: "Metric name (e.g., trace.web.request.duration)"
},
aggregator: {
type: "string",
enum: ["avg", "max", "min", "sum"],
description: "Aggregation function"
}
},
required: ["service", "from", "to"]
}
},
query_host_metrics: {
description: "Query host/infrastructure metrics",
inputSchema: {
type: "object",
properties: {
hostname: {
type: "string",
description: "Hostname to query"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
metric: {
type: "string",
description: "Metric name (e.g., system.cpu.user)"
},
aggregator: {
type: "string",
enum: ["avg", "max", "min", "sum"],
description: "Aggregation function"
}
},
required: ["hostname", "from", "to"]
}
},
query_database_metrics: {
description: "Query database monitoring (DBM) metrics",
inputSchema: {
type: "object",
properties: {
db_host: {
type: "string",
description: "Database host name"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
db_type: {
type: "string",
enum: ["postgresql", "mysql"],
description: "Database type"
},
metric: {
type: "string",
description: "Metric name"
},
aggregator: {
type: "string",
enum: ["avg", "max", "min", "sum"],
description: "Aggregation function"
}
},
required: ["db_host", "from", "to"]
}
},
list_hosts: {
description: "List all monitored hosts",
inputSchema: {
type: "object",
properties: {
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
}
},
required: ["from", "to"]
}
},
list_databases: {
description: "List all monitored databases",
inputSchema: {
type: "object",
properties: {
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
db_type: {
type: "string",
enum: ["postgresql", "mysql"],
description: "Database type"
}
},
required: ["from", "to"]
}
},
list_all_hosts: {
description: "List all monitored hosts with complete system info (hostname, CPU, memory, disk, OS, kernel, applications, etc)",
inputSchema: {
type: "object",
properties: {},
required: []
}
},
get_host_detail: {
description: "Get detailed information for a specific host by hostname",
inputSchema: {
type: "object",
properties: {
hostname: {
type: "string",
description: "Hostname to get details for"
}
},
required: ["hostname"]
}
},
get_namespace_metrics: {
description: "Get total resource usage and limits for a Kubernetes namespace",
inputSchema: {
type: "object",
properties: {
namespace: {
type: "string",
description: "Kubernetes namespace (e.g., pijarsekolah-prod)"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
cluster: {
type: "string",
description: "Kubernetes cluster name (optional)"
}
},
required: ["namespace", "from", "to"]
}
}
};
}
/**
* Handle MCP requests
*/
async handleRequest(request) {
try {
const { method, params, id } = request;
if (method === "initialize") {
return this.sendResponse(id, {
protocolVersion: "2024-11-05",
capabilities: {
tools: {}
},
serverInfo: {
name: "datadog-mcp",
version: "1.0.0"
}
});
}
if (method === "tools/list") {
return this.sendResponse(id, {
tools: Object.entries(this.tools).map(([name, tool]) => ({
name,
description: tool.description,
inputSchema: tool.inputSchema
}))
});
}
if (method === "tools/call") {
const { name, arguments: args } = params;
const result = await this.executeToolCall(name, args);
return this.sendResponse(id, result);
}
return this.sendError(id, `Unknown method: ${method}`);
} catch (error) {
return this.sendError(id, error.message);
}
}
/**
* Execute tool calls
*/
async executeToolCall(toolName, args) {
try {
switch (toolName) {
case 'query_metrics':
return await this.queryMetrics(args);
case 'query_kubernetes_metrics':
return await this.queryKubernetesMetrics(args);
case 'query_deployment_pods':
return await this.queryDeploymentPods(args);
case 'query_host_metrics':
return await this.queryHostMetrics(args);
case 'query_database_metrics':
return await this.queryDatabaseMetrics(args);
case 'list_hosts':
return await this.listHosts(args);
case 'list_databases':
return await this.listDatabases(args);
case 'list_all_hosts':
return await this.listAllHosts(args);
case 'get_host_detail':
return await this.getHostDetail(args);
case 'get_namespace_metrics':
return await this.getNamespaceMetrics(args);
default:
throw new Error(`Unknown tool: ${toolName}`);
}
} catch (error) {
return {
content: [
{
type: "text",
text: `Error executing tool: ${error.message}`
}
],
isError: true
};
}
}
async queryMetrics(args) {
try {
// Validate timestamps
const from = parseInt(args.from);
const to = parseInt(args.to);
const now = Math.floor(Date.now() / 1000);
if (from > now || to > now || from <= 0 || to <= 0) {
return {
content: [
{
type: "text",
text: `Error: Invalid timestamp. from=${from}, to=${to}. Current time=${now}. Make sure to use Unix timestamp in seconds (not milliseconds).`
}
]
};
}
const query = args.query;
const output = executeCliQuery(query, from, to);
return {
content: [
{
type: "text",
text: output
}
]
};
} catch (error) {
throw error;
}
}
async queryKubernetesMetrics(args) {
try {
// Validate timestamps
const from = parseInt(args.from);
const to = parseInt(args.to);
const now = Math.floor(Date.now() / 1000);
if (from > now || to > now || from <= 0 || to <= 0) {
return {
content: [
{
type: "text",
text: `Error: Invalid timestamp. from=${from}, to=${to}. Current time=${now}. Make sure to use Unix timestamp in seconds (not milliseconds).`
}
]
};
}
let query = args.query;
// Add cluster filter if provided
if (args.cluster) {
// Check if query already has filters
if (query.includes('{')) {
// Insert cluster filter into existing filters
query = query.replace('{', `{kube_cluster_name:${args.cluster},`);
} else {
// Add cluster filter as new filter
query = query + `{kube_cluster_name:${args.cluster}}`;
}
}
const output = executeCliQuery(query, from, to);
return {
content: [
{
type: "text",
text: output
}
]
};
} catch (error) {
throw error;
}
}
async queryDeploymentPods(args) {
try {
const namespace = args.namespace;
const deployment = args.deployment;
const cluster = args.cluster || null;
const apmServiceName = args.apm_service_name || null;
const from = args.from;
const to = args.to;
// Build filter string with cluster if provided
const clusterFilter = cluster ? `, kube_cluster_name:${cluster}` : '';
// Query for pod count
const podQuery = `sum:kubernetes.pods.running{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
// Query for CPU usage
const cpuQuery = `avg:kubernetes.cpu.usage.total{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
// Query for Memory usage
const memoryQuery = `avg:kubernetes.memory.usage{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
// Query for CPU limit
const cpuLimitQuery = `avg:kubernetes.cpu.limits{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
// Query for Memory limit
const memoryLimitQuery = `avg:kubernetes.memory.limits{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
// Query for network traffic in (received)
const networkInQuery = `sum:kubernetes.network.rx_bytes{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
// Query for network traffic out (transmitted)
const networkOutQuery = `sum:kubernetes.network.tx_bytes{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
// Query for response time (from APM/trace data)
// Map namespace suffix to correct env name
let envName = 'production'; // Default
if (namespace.includes('-prod')) {
envName = 'production';
} else if (namespace.includes('-stage')) {
envName = 'staging';
} else if (namespace.includes('-dev')) {
envName = 'dev';
}
// Use apm_service_name if provided, otherwise use deployment name
const serviceName = apmServiceName || deployment;
// Use trace.http.request metric as per Datadog example
const responseTimeMetric = 'trace.http.request';
let result = {
cluster: cluster,
namespace: namespace,
deployment: deployment,
pods: 'N/A',
cpu_usage: 'N/A',
cpu_limit: 'N/A',
memory_usage: 'N/A',
memory_limit: 'N/A',
network_in: 'N/A',
network_out: 'N/A',
response_time: 'N/A'
};
// Get pod count
try {
const podOutput = executeCliQuery(podQuery, from, to);
result.pods = podOutput.trim();
} catch (e) {
result.pods = `Error: ${e.message}`;
}
// Get CPU usage
try {
const cpuOutput = executeCliQuery(cpuQuery, from, to);
result.cpu_usage = cpuOutput.trim();
} catch (e) {
result.cpu_usage = `Error: ${e.message}`;
}
// Get CPU limit
try {
const cpuLimitOutput = executeCliQuery(cpuLimitQuery, from, to);
result.cpu_limit = cpuLimitOutput.trim();
} catch (e) {
result.cpu_limit = `Error: ${e.message}`;
}
// Get Memory usage
try {
const memoryOutput = executeCliQuery(memoryQuery, from, to);
result.memory_usage = memoryOutput.trim();
} catch (e) {
result.memory_usage = `Error: ${e.message}`;
}
// Get Memory limit
try {
const memoryLimitOutput = executeCliQuery(memoryLimitQuery, from, to);
result.memory_limit = memoryLimitOutput.trim();
} catch (e) {
result.memory_limit = `Error: ${e.message}`;
}
// Get Network In
try {
const networkInOutput = executeCliQuery(networkInQuery, from, to);
result.network_in = networkInOutput.trim();
} catch (e) {
result.network_in = `Error: ${e.message}`;
}
// Get Network Out
try {
const networkOutOutput = executeCliQuery(networkOutQuery, from, to);
result.network_out = networkOutOutput.trim();
} catch (e) {
result.network_out = `Error: ${e.message}`;
}
// Get Response Time - use exact query format from Datadog
try {
const query = `max:${responseTimeMetric}{service:${serviceName}, env:${envName}${cluster ? `, kube_cluster_name:${cluster}` : ''}}`;
const responseTimeOutput = executeCliQuery(query, from, to);
const trimmed = responseTimeOutput.trim();
// Check if we got data
if (trimmed && !trimmed.includes('No data found')) {
result.response_time = trimmed;
} else {
result.response_time = 'No data found';
}
} catch (e) {
result.response_time = `Error: ${e.message}`;
}
const output = JSON.stringify(result, null, 2);
return {
content: [
{
type: "text",
text: output
}
]
};
} catch (error) {
throw error;
}
}
async queryHostMetrics(args) {
try {
const metric = args.metric || 'avg:system.cpu.user';
const hostname = args.hostname || '*';
const query = `${metric}{host:${hostname}}`;
const output = executeCliQuery(query, args.from, args.to);
return {
content: [{ type: "text", text: output }]
};
} catch (error) {
throw error;
}
}
async queryDatabaseMetrics(args) {
try {
const dbType = args.db_type || 'postgresql';
const metric = args.metric || `avg:${dbType}.queries.active`;
const dbHost = args.db_host || '*';
const query = `${metric}{dbhost:${dbHost}}`;
const output = executeCliQuery(query, args.from, args.to);
return {
content: [{ type: "text", text: output }]
};
} catch (error) {
throw error;
}
}
async listHosts(args) {
try {
const query = `avg:system.cpu.user{*} by {host}`;
const output = executeCliQuery(query, args.from, args.to);
return {
content: [{ type: "text", text: output }]
};
} catch (error) {
throw error;
}
}
async listDatabases(args) {
try {
const dbType = args.db_type || 'postgresql';
const query = `avg:${dbType}.queries.active{*} by {dbhost}`;
const output = executeCliQuery(query, args.from, args.to);
return {
content: [{ type: "text", text: output }]
};
} catch (error) {
throw error;
}
}
async listAllHosts(args) {
try {
const response = await datadogClient.get('/api/v1/hosts');
const hosts = response.data.host_list || [];
// Format host info with basic details
const hostInfo = hosts.map(host => ({
hostname: host.name,
status: host.up ? 'up' : 'down',
last_reported: host.last_reported_time ? new Date(host.last_reported_time * 1000).toISOString() : 'unknown',
os: host.meta?.platform || 'unknown',
processor: host.meta?.processor || 'unknown',
kernel: host.meta?.kernel_release || 'unknown',
agent_version: host.meta?.agent_version || 'unknown',
applications: (host.apps || []).slice(0, 8).join(', '),
tags: (host.tags || []).join(', ') || 'none',
cpu_cores: host.meta?.cpuCores || 'unknown',
cpu_host: host.meta?.processor || 'unknown',
total_memory: host.meta?.memory?.total ? (parseInt(host.meta.memory.total.replace(/[^0-9]/g, '')) / 1024).toFixed(2) + ' GB' : 'unknown',
memory_host: host.meta?.memory?.total || 'unknown',
cpu_usage: host.metrics?.cpu ? host.metrics.cpu.toFixed(2) + '%' : 'N/A',
memory_usage: host.metrics?.iowait ? host.metrics.iowait.toFixed(2) + '%' : 'N/A',
host_id: host.meta?.host_id || 'unknown'
}));
// Sort by hostname
hostInfo.sort((a, b) => a.hostname.localeCompare(b.hostname));
const output = JSON.stringify(hostInfo, null, 2);
return {
content: [{ type: "text", text: output }]
};
} catch (error) {
throw error;
}
}
async getHostDetail(args) {
try {
const response = await datadogClient.get('/api/v1/hosts');
const hosts = response.data.host_list || [];
// Find the specific host
const host = hosts.find(h => h.name === args.hostname || h.name.toLowerCase() === args.hostname.toLowerCase());
if (!host) {
return {
content: [{ type: "text", text: `Host '${args.hostname}' not found` }]
};
}
// Format detailed host info
const hostDetail = {
hostname: host.name,
status: host.up ? 'up' : 'down',
last_reported: host.last_reported_time ? new Date(host.last_reported_time * 1000).toISOString() : 'unknown',
os: host.meta?.platform || 'unknown',
processor: host.meta?.processor || 'unknown',
kernel: host.meta?.kernel_release || 'unknown',
agent_version: host.meta?.agent_version || 'unknown',
applications: (host.apps || []).join(', ') || 'none',
tags: (host.tags || []).join(', ') || 'none',
cpu_cores: host.meta?.cpuCores || 'unknown',
cpu_host: host.meta?.processor || 'unknown',
total_memory: host.meta?.memory?.total ? (parseInt(host.meta.memory.total.replace(/[^0-9]/g, '')) / 1024).toFixed(2) + ' GB' : 'unknown',
memory_host: host.meta?.memory?.total || 'unknown',
cpu_usage: host.metrics?.cpu ? host.metrics.cpu.toFixed(2) + '%' : 'N/A',
memory_usage: host.metrics?.iowait ? host.metrics.iowait.toFixed(2) + '%' : 'N/A',
host_id: host.meta?.host_id || 'unknown',
// Additional details
socket_fqdn: host.meta?.socket_fqdn || 'unknown',
socket_hostname: host.meta?.socket_hostname || 'unknown',
gohai: host.meta?.gohai || 'unknown'
};
const output = JSON.stringify(hostDetail, null, 2);
return {
content: [{ type: "text", text: output }]
};
} catch (error) {
throw error;
}
}
async getNamespaceMetrics(args) {
try {
const namespace = args.namespace;
const cluster = args.cluster || null;
const from = args.from;
const to = args.to;
// Build filter string with cluster if provided
const clusterFilter = cluster ? `, kube_cluster_name:${cluster}` : '';
// Query for total pods in namespace
const podQuery = `sum:kubernetes.pods.running{kube_namespace:${namespace}${clusterFilter}}`;
// Query for total CPU usage in namespace
const cpuUsageQuery = `sum:kubernetes.cpu.usage.total{kube_namespace:${namespace}${clusterFilter}}`;
// Query for total CPU limits in namespace
const cpuLimitQuery = `sum:kubernetes.cpu.limits{kube_namespace:${namespace}${clusterFilter}}`;
// Query for total Memory usage in namespace
const memoryUsageQuery = `sum:kubernetes.memory.usage{kube_namespace:${namespace}${clusterFilter}}`;
// Query for total Memory limits in namespace
const memoryLimitQuery = `sum:kubernetes.memory.limits{kube_namespace:${namespace}${clusterFilter}}`;
// Query for total network traffic in (received)
const networkInQuery = `sum:kubernetes.network.rx_bytes{kube_namespace:${namespace}${clusterFilter}}`;
// Query for total network traffic out (transmitted)
const networkOutQuery = `sum:kubernetes.network.tx_bytes{kube_namespace:${namespace}${clusterFilter}}`;
let result = {
cluster: cluster,
namespace: namespace,
total_pods: 'N/A',
cpu_usage_total: 'N/A',
cpu_limit_total: 'N/A',
memory_usage_total: 'N/A',
memory_limit_total: 'N/A',
network_in_total: 'N/A',
network_out_total: 'N/A'
};
// Get total pods
try {
const podOutput = executeCliQuery(podQuery, from, to);
result.total_pods = podOutput.trim();
} catch (e) {
result.total_pods = `Error: ${e.message}`;
}
// Get total CPU usage
try {
const cpuUsageOutput = executeCliQuery(cpuUsageQuery, from, to);
result.cpu_usage_total = cpuUsageOutput.trim();
} catch (e) {
result.cpu_usage_total = `Error: ${e.message}`;
}
// Get total CPU limit
try {
const cpuLimitOutput = executeCliQuery(cpuLimitQuery, from, to);
result.cpu_limit_total = cpuLimitOutput.trim();
} catch (e) {
result.cpu_limit_total = `Error: ${e.message}`;
}
// Get total Memory usage
try {
const memoryUsageOutput = executeCliQuery(memoryUsageQuery, from, to);
result.memory_usage_total = memoryUsageOutput.trim();
} catch (e) {
result.memory_usage_total = `Error: ${e.message}`;
}
// Get total Memory limit
try {
const memoryLimitOutput = executeCliQuery(memoryLimitQuery, from, to);
result.memory_limit_total = memoryLimitOutput.trim();
} catch (e) {
result.memory_limit_total = `Error: ${e.message}`;
}
// Get total Network In
try {
const networkInOutput = executeCliQuery(networkInQuery, from, to);
result.network_in_total = networkInOutput.trim();
} catch (e) {
result.network_in_total = `Error: ${e.message}`;
}
// Get total Network Out
try {
const networkOutOutput = executeCliQuery(networkOutQuery, from, to);
result.network_out_total = networkOutOutput.trim();
} catch (e) {
result.network_out_total = `Error: ${e.message}`;
}
const output = JSON.stringify(result, null, 2);
return {
content: [
{
type: "text",
text: output
}
]
};
} catch (error) {
throw error;
}
}
formatMetricsOutput(data, label = '') {
// This method is deprecated - metrics are now formatted by CLI
return 'Metrics formatted by CLI';
}
sendResponse(id, result) {
const response = {
jsonrpc: "2.0",
id,
result
};
console.log(JSON.stringify(response));
}
sendError(id, message) {
const response = {
jsonrpc: "2.0",
id,
error: {
code: -32603,
message
}
};
console.log(JSON.stringify(response));
}
}
/**
* Main MCP Server Loop
*/
async function main() {
const server = new MCPServer();
// Read requests from stdin
let buffer = '';
process.stdin.on('data', async (chunk) => {
buffer += chunk.toString();
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (line.trim()) {
try {
const request = JSON.parse(line);
await server.handleRequest(request);
} catch (error) {
console.error(`Error processing request: ${error.message}`);
}
}
}
});
process.stdin.on('end', () => {
process.exit(0);
});
}
main().catch(error => {
console.error('Fatal error:', error);
process.exit(1);
});