#!/usr/bin/env node
/**
* MCP Datadog Server (using @modelcontextprotocol/sdk)
*
* Runs as an MCP (Model Context Protocol) server using the official SDK.
* Compatible with Claude Desktop, VS Code, and other MCP clients.
*/
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import {
CallToolRequestSchema,
ListToolsRequestSchema,
} from "@modelcontextprotocol/sdk/types.js";
import dotenv from 'dotenv';
import axios from 'axios';
import { execSync } from 'child_process';
import { fileURLToPath } from 'url';
import path from 'path';
// Load environment variables
dotenv.config();
const API_KEY = process.env.DATADOG_API_KEY;
const APP_KEY = process.env.DATADOG_APP_KEY;
const SITE = process.env.DATADOG_SITE || 'datadoghq.com';
if (!API_KEY || !APP_KEY) {
console.error('Error: DATADOG_API_KEY and DATADOG_APP_KEY must be set in .env file');
process.exit(1);
}
// Datadog API client
const datadogClient = axios.create({
baseURL: `https://api.${SITE}`,
headers: {
'DD-API-KEY': API_KEY,
'DD-APPLICATION-KEY': APP_KEY,
'Content-Type': 'application/json'
}
});
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Helper function to execute npm scripts
function executeCliQuery(query, from, to) {
try {
// Validate timestamps are in seconds (not milliseconds)
const now = Math.floor(Date.now() / 1000);
const maxFuture = now + (24 * 3600); // Allow up to 24 hours in future
if (from <= 0 || to <= 0) {
throw new Error(`Invalid timestamp range: from=${from}, to=${to}. Timestamps must be positive.`);
}
if (from > 2000000000 || to > 2000000000) {
throw new Error(`Invalid timestamp: from=${from}, to=${to}. Looks like milliseconds. Use seconds instead.`);
}
if (from > maxFuture || to > maxFuture) {
const fromDate = new Date(from * 1000).toISOString();
const toDate = new Date(to * 1000).toISOString();
throw new Error(`Timestamp too far in future: from=${fromDate}, to=${toDate}. Current time=${new Date(now * 1000).toISOString()}`);
}
const cmd = `npm run query -- "${query}" ${from} ${to}`;
const output = execSync(cmd, {
cwd: __dirname,
encoding: 'utf-8'
});
return output;
} catch (error) {
throw new Error(`Query execution failed: ${error.message}`);
}
}
// Create MCP Server
const server = new Server(
{
name: "datadog-mcp",
version: "2.0.0"
},
{
capabilities: {
tools: {}
}
}
);
// Advertise available tools
server.setRequestHandler(ListToolsRequestSchema, async () => {
return {
tools: [
{
name: "list_services",
description: "List all available tools/services in this MCP server",
inputSchema: {
type: "object",
properties: {},
required: []
}
},
{
name: "query_metrics",
description: "General purpose query for any Datadog metric. Use this when other specific tools don't match your needs.",
inputSchema: {
type: "object",
properties: {
query: {
type: "string",
description: "Datadog metric query string (e.g., avg:system.cpu.user{*}, sum:kubernetes.pods.running{kube_namespace:default})"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
}
},
required: ["query", "from", "to"]
}
},
{
name: "query_kubernetes_metrics",
description: "Query Kubernetes metrics from Datadog",
inputSchema: {
type: "object",
properties: {
query: {
type: "string",
description: "Datadog metric query (e.g., avg:kubernetes.cpu.usage.total{kube_namespace:pijarsekolah-prod})"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
cluster: {
type: "string",
description: "Kubernetes cluster name (optional)"
}
},
required: ["query", "from", "to"]
}
},
{
name: "query_deployment_pods",
description: "Query pod count for a specific Kubernetes deployment",
inputSchema: {
type: "object",
properties: {
namespace: {
type: "string",
description: "Kubernetes namespace (e.g., pijarsekolah-prod)"
},
deployment: {
type: "string",
description: "Deployment name (e.g., pijarsekolah-cbt-exam-api)"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
cluster: {
type: "string",
description: "Kubernetes cluster name (optional)"
},
apm_service_name: {
type: "string",
description: "APM service name for response time metric (optional, defaults to deployment name)"
}
},
required: ["namespace", "deployment", "from", "to"]
}
},
{
name: "query_apm_metrics",
description: "Query APM metrics for a specific service",
inputSchema: {
type: "object",
properties: {
service: {
type: "string",
description: "Service name"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
metric: {
type: "string",
description: "Metric name (e.g., trace.web.request.duration)"
},
aggregator: {
type: "string",
enum: ["avg", "max", "min", "sum"],
description: "Aggregation function"
}
},
required: ["service", "from", "to"]
}
},
{
name: "query_host_metrics",
description: "Query host/infrastructure metrics",
inputSchema: {
type: "object",
properties: {
hostname: {
type: "string",
description: "Hostname to query"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
metric: {
type: "string",
description: "Metric name (e.g., system.cpu.user)"
},
aggregator: {
type: "string",
enum: ["avg", "max", "min", "sum"],
description: "Aggregation function"
}
},
required: ["hostname", "from", "to"]
}
},
{
name: "query_database_metrics",
description: "Query database monitoring (DBM) metrics",
inputSchema: {
type: "object",
properties: {
db_host: {
type: "string",
description: "Database host name"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
db_type: {
type: "string",
enum: ["postgresql", "mysql"],
description: "Database type"
},
metric: {
type: "string",
description: "Metric name"
},
aggregator: {
type: "string",
enum: ["avg", "max", "min", "sum"],
description: "Aggregation function"
}
},
required: ["db_host", "from", "to"]
}
},
{
name: "list_hosts",
description: "List all monitored hosts",
inputSchema: {
type: "object",
properties: {
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
}
},
required: ["from", "to"]
}
},
{
name: "list_databases",
description: "List all monitored databases",
inputSchema: {
type: "object",
properties: {
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
db_type: {
type: "string",
enum: ["postgresql", "mysql"],
description: "Database type"
}
},
required: ["from", "to"]
}
},
{
name: "list_all_hosts",
description: "List all monitored hosts with complete system info (hostname, CPU, memory, disk, OS, kernel, applications, etc)",
inputSchema: {
type: "object",
properties: {},
required: []
}
},
{
name: "get_host_detail",
description: "Get detailed information for a specific host by hostname",
inputSchema: {
type: "object",
properties: {
hostname: {
type: "string",
description: "Hostname to get details for"
}
},
required: ["hostname"]
}
},
{
name: "get_namespace_metrics",
description: "Get total resource usage and limits for a Kubernetes namespace",
inputSchema: {
type: "object",
properties: {
namespace: {
type: "string",
description: "Kubernetes namespace (e.g., pijarsekolah-prod)"
},
from: {
type: "integer",
description: "Start time in Unix timestamp (seconds)"
},
to: {
type: "integer",
description: "End time in Unix timestamp (seconds)"
},
cluster: {
type: "string",
description: "Kubernetes cluster name (optional)"
}
},
required: ["namespace", "from", "to"]
}
}
]
};
});
// Handle tool calls
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const tool = request.params.name;
const args = (request.params.arguments ?? {});
console.error(`[DEBUG] Tool called: ${tool}`);
console.error(`[DEBUG] Arguments:`, JSON.stringify(args, null, 2));
try {
switch (tool) {
case 'list_services':
return await listServices();
case 'query_metrics':
return await queryMetrics(args);
case 'query_kubernetes_metrics':
return await queryKubernetesMetrics(args);
case 'query_deployment_pods':
return await queryDeploymentPods(args);
case 'query_apm_metrics':
return await queryAPMMetrics(args);
case 'query_host_metrics':
return await queryHostMetrics(args);
case 'query_database_metrics':
return await queryDatabaseMetrics(args);
case 'list_hosts':
return await listHosts(args);
case 'list_databases':
return await listDatabases(args);
case 'list_all_hosts':
return await listAllHosts();
case 'get_host_detail':
return await getHostDetail(args);
case 'get_namespace_metrics':
return await getNamespaceMetrics(args);
default:
throw new Error(`Unknown tool: ${tool}`);
}
} catch (error) {
console.error(`[ERROR] Tool execution failed:`, error.message);
console.error(`[ERROR] Stack:`, error.stack);
return {
content: [
{
type: "text",
text: `Error executing tool: ${error.message}`
}
],
isError: true
};
}
});
// Tool implementations
async function listServices() {
const services = [
{ name: "list_services", description: "List all available tools/services" },
{ name: "query_metrics", description: "General purpose query for any Datadog metric" },
{ name: "query_kubernetes_metrics", description: "Query Kubernetes metrics" },
{ name: "query_deployment_pods", description: "Query pod count for a deployment" },
{ name: "query_apm_metrics", description: "Query APM metrics" },
{ name: "query_host_metrics", description: "Query host/infrastructure metrics" },
{ name: "query_database_metrics", description: "Query database monitoring metrics" },
{ name: "list_hosts", description: "List all monitored hosts" },
{ name: "list_databases", description: "List all monitored databases" },
{ name: "list_all_hosts", description: "List all hosts with complete system info" },
{ name: "get_host_detail", description: "Get detailed host information" },
{ name: "get_namespace_metrics", description: "Get namespace resource usage" }
];
return {
content: [
{
type: "text",
text: JSON.stringify({ total_services: services.length, services }, null, 2)
}
]
};
}
async function queryMetrics(args) {
const from = parseInt(args.from);
const to = parseInt(args.to);
const now = Math.floor(Date.now() / 1000);
const maxFuture = now + (24 * 3600);
console.error(`[DEBUG] queryMetrics - from: ${from}, to: ${to}, now: ${now}`);
console.error(`[DEBUG] queryMetrics - query: ${args.query}`);
if (from <= 0 || to <= 0 || from > 2000000000 || to > 2000000000) {
console.error(`[ERROR] Invalid timestamp detected`);
return {
content: [
{
type: "text",
text: `Error: Invalid timestamp. from=${from}, to=${to}. Use Unix timestamp in seconds (not milliseconds).`
}
]
};
}
if (from > maxFuture || to > maxFuture) {
console.error(`[ERROR] Timestamp too far in future`);
return {
content: [
{
type: "text",
text: `Error: Timestamp in future. from=${new Date(from * 1000).toISOString()}, to=${new Date(to * 1000).toISOString()}. Current time=${new Date(now * 1000).toISOString()}`
}
]
};
}
const query = args.query;
const output = executeCliQuery(query, from, to);
console.error(`[DEBUG] Query result length: ${output.length} chars`);
return {
content: [{ type: "text", text: output }]
};
}
async function queryKubernetesMetrics(args) {
const from = parseInt(args.from);
const to = parseInt(args.to);
const now = Math.floor(Date.now() / 1000);
const maxFuture = now + (24 * 3600);
console.error(`[DEBUG] queryKubernetesMetrics - from: ${from}, to: ${to}, now: ${now}`);
console.error(`[DEBUG] queryKubernetesMetrics - original query: ${args.query}`);
if (from <= 0 || to <= 0 || from > 2000000000 || to > 2000000000) {
console.error(`[ERROR] Invalid timestamp detected`);
return {
content: [
{
type: "text",
text: `Error: Invalid timestamp. from=${from}, to=${to}. Use Unix timestamp in seconds (not milliseconds).`
}
]
};
}
if (from > maxFuture || to > maxFuture) {
console.error(`[ERROR] Timestamp too far in future`);
return {
content: [
{
type: "text",
text: `Error: Timestamp in future. from=${new Date(from * 1000).toISOString()}, to=${new Date(to * 1000).toISOString()}. Current=${new Date(now * 1000).toISOString()}`
}
]
};
}
let query = args.query;
// Add cluster filter if provided
if (args.cluster) {
if (query.includes('{')) {
query = query.replace('{', `{kube_cluster_name:${args.cluster},`);
} else {
query += `{kube_cluster_name:${args.cluster}}`;
}
}
console.error(`[DEBUG] queryKubernetesMetrics - final query: ${query}`);
const output = executeCliQuery(query, from, to);
console.error(`[DEBUG] Query result length: ${output.length} chars`);
return {
content: [{ type: "text", text: output }]
};
}
async function queryDeploymentPods(args) {
const namespace = args.namespace;
const deployment = args.deployment;
const cluster = args.cluster || null;
const apmServiceName = args.apm_service_name || null;
const from = args.from;
const to = args.to;
console.error(`[DEBUG] queryDeploymentPods - namespace: ${namespace}, deployment: ${deployment}`);
console.error(`[DEBUG] queryDeploymentPods - from: ${from}, to: ${to}`);
console.error(`[DEBUG] queryDeploymentPods - cluster: ${cluster}, apmServiceName: ${apmServiceName}`);
const clusterFilter = cluster ? `, kube_cluster_name:${cluster}` : '';
const podQuery = `sum:kubernetes.pods.running{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
const cpuQuery = `avg:kubernetes.cpu.usage.total{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
const memoryQuery = `avg:kubernetes.memory.usage{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
const cpuLimitQuery = `avg:kubernetes.cpu.limits{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
const memoryLimitQuery = `avg:kubernetes.memory.limits{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
const networkInQuery = `sum:kubernetes.network.rx_bytes{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
const networkOutQuery = `sum:kubernetes.network.tx_bytes{kube_namespace:${namespace}, kube_deployment:${deployment}${clusterFilter}}`;
let envName = 'production';
if (namespace.includes('-prod')) {
envName = 'production';
} else if (namespace.includes('-stage')) {
envName = 'staging';
}
const serviceName = apmServiceName || deployment;
const responseTimeMetric = 'trace.http.request';
let result = {
cluster: cluster,
namespace: namespace,
deployment: deployment,
pods: 'N/A',
cpu_usage: 'N/A',
cpu_limit: 'N/A',
memory_usage: 'N/A',
memory_limit: 'N/A',
network_in: 'N/A',
network_out: 'N/A',
response_time: 'N/A'
};
try {
result.pods = executeCliQuery(podQuery, from, to).trim();
} catch (e) {
result.pods = `Error: ${e.message}`;
}
try {
result.cpu_usage = executeCliQuery(cpuQuery, from, to).trim();
} catch (e) {
result.cpu_usage = `Error: ${e.message}`;
}
try {
result.cpu_limit = executeCliQuery(cpuLimitQuery, from, to).trim();
} catch (e) {
result.cpu_limit = `Error: ${e.message}`;
}
try {
result.memory_usage = executeCliQuery(memoryQuery, from, to).trim();
} catch (e) {
result.memory_usage = `Error: ${e.message}`;
}
try {
result.memory_limit = executeCliQuery(memoryLimitQuery, from, to).trim();
} catch (e) {
result.memory_limit = `Error: ${e.message}`;
}
try {
result.network_in = executeCliQuery(networkInQuery, from, to).trim();
} catch (e) {
result.network_in = `Error: ${e.message}`;
}
try {
result.network_out = executeCliQuery(networkOutQuery, from, to).trim();
} catch (e) {
result.network_out = `Error: ${e.message}`;
}
try {
const query = `max:${responseTimeMetric}{service:${serviceName}, env:${envName}${cluster ? `, kube_cluster_name:${cluster}` : ''}}`;
const responseTimeOutput = executeCliQuery(query, from, to);
const trimmed = responseTimeOutput.trim();
if (trimmed && !trimmed.includes('No data found')) {
result.response_time = trimmed;
} else {
result.response_time = 'No data';
}
} catch (e) {
result.response_time = `Error: ${e.message}`;
}
return {
content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
};
}
async function queryAPMMetrics(args) {
const metric = args.metric || 'avg:trace.web.request.duration';
const service = args.service || '*';
const query = `${metric}{service:${service}}`;
const output = executeCliQuery(query, args.from, args.to);
return {
content: [{ type: "text", text: output }]
};
}
async function queryHostMetrics(args) {
const metric = args.metric || 'avg:system.cpu.user';
const hostname = args.hostname || '*';
const query = `${metric}{host:${hostname}}`;
const output = executeCliQuery(query, args.from, args.to);
return {
content: [{ type: "text", text: output }]
};
}
async function queryDatabaseMetrics(args) {
const dbType = args.db_type || 'postgresql';
const metric = args.metric || `avg:${dbType}.queries.active`;
const dbHost = args.db_host || '*';
const query = `${metric}{dbhost:${dbHost}}`;
const output = executeCliQuery(query, args.from, args.to);
return {
content: [{ type: "text", text: output }]
};
}
async function listHosts(args) {
const query = `avg:system.cpu.user{*} by {host}`;
const output = executeCliQuery(query, args.from, args.to);
return {
content: [{ type: "text", text: output }]
};
}
async function listDatabases(args) {
const dbType = args.db_type || 'postgresql';
const query = `avg:${dbType}.queries.active{*} by {dbhost}`;
const output = executeCliQuery(query, args.from, args.to);
return {
content: [{ type: "text", text: output }]
};
}
async function listAllHosts() {
const response = await datadogClient.get('/api/v1/hosts');
const hosts = response.data.host_list || [];
const hostInfo = hosts.map(host => ({
hostname: host.name,
status: host.up ? 'up' : 'down',
last_reported: host.last_reported_time ? new Date(host.last_reported_time * 1000).toISOString() : 'unknown',
os: host.meta?.platform || 'unknown',
processor: host.meta?.processor || 'unknown',
kernel: host.meta?.kernel_release || 'unknown',
agent_version: host.meta?.agent_version || 'unknown',
applications: (host.apps || []).slice(0, 8).join(', '),
tags: (host.tags || []).join(', ') || 'none',
cpu_cores: host.meta?.cpuCores || 'unknown',
total_memory: host.meta?.memory?.total ? (parseInt(host.meta.memory.total.replace(/[^0-9]/g, '')) / 1024).toFixed(2) + ' GB' : 'unknown',
cpu_usage: host.metrics?.cpu ? host.metrics.cpu.toFixed(2) + '%' : 'N/A',
memory_usage: host.metrics?.iowait ? host.metrics.iowait.toFixed(2) + '%' : 'N/A',
host_id: host.meta?.host_id || 'unknown'
}));
hostInfo.sort((a, b) => a.hostname.localeCompare(b.hostname));
return {
content: [{ type: "text", text: JSON.stringify(hostInfo, null, 2) }]
};
}
async function getHostDetail(args) {
const response = await datadogClient.get('/api/v1/hosts');
const hosts = response.data.host_list || [];
const host = hosts.find(h => h.name === args.hostname || h.name.toLowerCase() === args.hostname.toLowerCase());
if (!host) {
return {
content: [{ type: "text", text: `Host '${args.hostname}' not found` }]
};
}
const hostDetail = {
hostname: host.name,
status: host.up ? 'up' : 'down',
last_reported: host.last_reported_time ? new Date(host.last_reported_time * 1000).toISOString() : 'unknown',
os: host.meta?.platform || 'unknown',
processor: host.meta?.processor || 'unknown',
kernel: host.meta?.kernel_release || 'unknown',
agent_version: host.meta?.agent_version || 'unknown',
applications: (host.apps || []).join(', ') || 'none',
tags: (host.tags || []).join(', ') || 'none',
cpu_cores: host.meta?.cpuCores || 'unknown',
total_memory: host.meta?.memory?.total ? (parseInt(host.meta.memory.total.replace(/[^0-9]/g, '')) / 1024).toFixed(2) + ' GB' : 'unknown',
cpu_usage: host.metrics?.cpu ? host.metrics.cpu.toFixed(2) + '%' : 'N/A',
memory_usage: host.metrics?.iowait ? host.metrics.iowait.toFixed(2) + '%' : 'N/A',
host_id: host.meta?.host_id || 'unknown',
socket_fqdn: host.meta?.socket_fqdn || 'unknown',
socket_hostname: host.meta?.socket_hostname || 'unknown'
};
return {
content: [{ type: "text", text: JSON.stringify(hostDetail, null, 2) }]
};
}
async function getNamespaceMetrics(args) {
const namespace = args.namespace;
const cluster = args.cluster || null;
const from = args.from;
const to = args.to;
const clusterFilter = cluster ? `, kube_cluster_name:${cluster}` : '';
const podQuery = `sum:kubernetes.pods.running{kube_namespace:${namespace}${clusterFilter}}`;
const cpuUsageQuery = `sum:kubernetes.cpu.usage.total{kube_namespace:${namespace}${clusterFilter}}`;
const cpuLimitQuery = `sum:kubernetes.cpu.limits{kube_namespace:${namespace}${clusterFilter}}`;
const memoryUsageQuery = `sum:kubernetes.memory.usage{kube_namespace:${namespace}${clusterFilter}}`;
const memoryLimitQuery = `sum:kubernetes.memory.limits{kube_namespace:${namespace}${clusterFilter}}`;
const networkInQuery = `sum:kubernetes.network.rx_bytes{kube_namespace:${namespace}${clusterFilter}}`;
const networkOutQuery = `sum:kubernetes.network.tx_bytes{kube_namespace:${namespace}${clusterFilter}}`;
let result = {
cluster: cluster,
namespace: namespace,
total_pods: 'N/A',
cpu_usage_total: 'N/A',
cpu_limit_total: 'N/A',
memory_usage_total: 'N/A',
memory_limit_total: 'N/A',
network_in_total: 'N/A',
network_out_total: 'N/A'
};
try {
result.total_pods = executeCliQuery(podQuery, from, to).trim();
} catch (e) {
result.total_pods = `Error: ${e.message}`;
}
try {
result.cpu_usage_total = executeCliQuery(cpuUsageQuery, from, to).trim();
} catch (e) {
result.cpu_usage_total = `Error: ${e.message}`;
}
try {
result.cpu_limit_total = executeCliQuery(cpuLimitQuery, from, to).trim();
} catch (e) {
result.cpu_limit_total = `Error: ${e.message}`;
}
try {
result.memory_usage_total = executeCliQuery(memoryUsageQuery, from, to).trim();
} catch (e) {
result.memory_usage_total = `Error: ${e.message}`;
}
try {
result.memory_limit_total = executeCliQuery(memoryLimitQuery, from, to).trim();
} catch (e) {
result.memory_limit_total = `Error: ${e.message}`;
}
try {
result.network_in_total = executeCliQuery(networkInQuery, from, to).trim();
} catch (e) {
result.network_in_total = `Error: ${e.message}`;
}
try {
result.network_out_total = executeCliQuery(networkOutQuery, from, to).trim();
} catch (e) {
result.network_out_total = `Error: ${e.message}`;
}
return {
content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
};
}
// Run the server
async function run() {
const transport = new StdioServerTransport();
await server.connect(transport);
console.error("MCP Datadog Server running on stdio");
}
run().catch((err) => {
console.error("Fatal error:", err);
process.exit(1);
});