# =============================================================================
# PROMETHEUS-MCP INTEGRATION
# =============================================================================
# Add this file to your EKS Terraform to deploy prometheus-mcp
#
# QUICK START:
# Just run: terraform apply
# - Auto-discovers your AMP workspace (if you have exactly one)
# - Uses sensible defaults for everything else
#
# CUSTOMIZATION:
# Override any variable via:
# 1. Command line: terraform apply -var="amp_workspace_id=ws-xyz"
# 2. Environment: export TF_VAR_amp_workspace_id="ws-xyz" && terraform apply
# 3. tfvars file: Copy terraform.tfvars.example to terraform.tfvars
#
# This file creates:
# - IAM role for prometheus-mcp pods (IRSA)
# - IAM policy for AMP query access
# - Dedicated node group for prometheus-mcp (optional)
# - Helm release to deploy the chart
#
# =============================================================================
# -----------------------------------------------------------------------------
# VARIABLES - All have sensible defaults!
# -----------------------------------------------------------------------------
variable "amp_workspace_id" {
description = "AMP workspace ID. Leave empty to auto-discover (if you have exactly one workspace)"
type = string
default = "" # Empty = auto-discover
}
variable "prometheus_mcp_image" {
description = "Container image for prometheus-mcp"
type = string
default = "ghcr.io/deeptrace/prometheus-mcp:0.1.0"
}
variable "prometheus_mcp_replicas" {
description = "Number of prometheus-mcp pod replicas"
type = number
default = 2
}
variable "prometheus_mcp_node_selector" {
description = "Node selector for prometheus-mcp pods. Set to {} to run on any node."
type = map(string)
default = { workload = "prometheus-mcp" } # Dedicated node group by default
}
variable "create_dedicated_node_group" {
description = "Create a dedicated node group for prometheus-mcp"
type = bool
default = true
}
variable "prometheus_mcp_node_instance_type" {
description = "Instance type for prometheus-mcp node group"
type = string
default = "t3.medium"
}
# -----------------------------------------------------------------------------
# AUTO-DISCOVER AMP WORKSPACE
# -----------------------------------------------------------------------------
# Find all AMP workspaces in the account/region
data "aws_prometheus_workspaces" "all" {}
locals {
# Auto-discover: use provided ID, or first available workspace, or fail with helpful message
discovered_workspace_id = length(data.aws_prometheus_workspaces.all.ids) > 0 ? data.aws_prometheus_workspaces.all.ids[0] : ""
amp_workspace_id = coalesce(
var.amp_workspace_id,
local.discovered_workspace_id
)
prometheus_mcp_namespace = "prometheus-mcp"
prometheus_mcp_serviceaccount = "prometheus-mcp"
prometheus_mcp_oidc_url = replace(module.eks.cluster_oidc_issuer_url, "https://", "")
}
# Validate that we have a workspace ID
resource "null_resource" "validate_amp_workspace" {
count = local.amp_workspace_id == "" ? 1 : 0
provisioner "local-exec" {
command = "echo 'ERROR: No AMP workspace found. Either create one or set amp_workspace_id variable.' && exit 1"
}
}
# -----------------------------------------------------------------------------
# NODE GROUP (add this to your eks_managed_node_groups in the EKS module)
# -----------------------------------------------------------------------------
#
# Add this node group to your EKS module's eks_managed_node_groups:
#
# prometheus-mcp = {
# name = "prometheus-mcp"
# instance_types = ["t3.medium"]
#
# min_size = 1
# max_size = 3
# desired_size = 1
#
# ami_type = "AL2_x86_64"
#
# labels = {
# workload = "prometheus-mcp"
# }
#
# # NOTE: EKS managed node groups AUTOMATICALLY get these IAM policies:
# # - AmazonEKSWorkerNodePolicy (register with EKS)
# # - AmazonEC2ContainerRegistryReadOnly (pull images from ECR) <-- Handles ECR access!
# # - AmazonEKS_CNI_Policy (pod networking)
# #
# # Optional: Add SSM for debugging
# # iam_role_additional_policies = {
# # ssm = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
# # }
#
# # Optional: Taint to ensure only prometheus-mcp pods run here
# # taints = [{
# # key = "dedicated"
# # value = "prometheus-mcp"
# # effect = "NO_SCHEDULE"
# # }]
# }
#
# -----------------------------------------------------------------------------
# IAM ROLE FOR PROMETHEUS-MCP PODS (IRSA)
# -----------------------------------------------------------------------------
resource "aws_iam_role" "prometheus_mcp_pod" {
name = "${var.cluster_name}-prometheus-mcp-pod"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Federated = module.eks.oidc_provider_arn
}
Action = "sts:AssumeRoleWithWebIdentity"
Condition = {
StringEquals = {
"${local.prometheus_mcp_oidc_url}:sub" = "system:serviceaccount:${local.prometheus_mcp_namespace}:${local.prometheus_mcp_serviceaccount}"
"${local.prometheus_mcp_oidc_url}:aud" = "sts.amazonaws.com"
}
}
}
]
})
tags = {
Component = "prometheus-mcp"
}
}
# -----------------------------------------------------------------------------
# IAM POLICY - PERMISSIONS TO QUERY AMP
# -----------------------------------------------------------------------------
resource "aws_iam_policy" "prometheus_mcp_query" {
name = "${var.cluster_name}-prometheus-mcp-query"
description = "Allows prometheus-mcp pods to query AWS Managed Prometheus"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"aps:QueryMetrics",
"aps:GetLabels",
"aps:GetSeries",
"aps:GetMetricMetadata"
]
Resource = "arn:aws:aps:${var.aws_region}:${data.aws_caller_identity.current.account_id}:workspace/${local.amp_workspace_id}"
}
]
})
}
resource "aws_iam_role_policy_attachment" "prometheus_mcp_pod" {
role = aws_iam_role.prometheus_mcp_pod.name
policy_arn = aws_iam_policy.prometheus_mcp_query.arn
}
# -----------------------------------------------------------------------------
# DEDICATED NODE GROUP (optional, enabled by default)
# -----------------------------------------------------------------------------
# Creates a separate node group just for prometheus-mcp workloads.
# Set create_dedicated_node_group = false if you want to use existing nodes.
# NOTE: This is a TEMPLATE - your coworker should integrate this into their
# existing eks_managed_node_groups block in their EKS module configuration.
#
# Example to add to existing EKS module:
#
# eks_managed_node_groups = {
# # ... existing node groups ...
#
# prometheus-mcp = {
# name = "prometheus-mcp"
# instance_types = [var.prometheus_mcp_node_instance_type]
# min_size = var.prometheus_mcp_replicas
# max_size = var.prometheus_mcp_replicas + 2
# desired_size = var.prometheus_mcp_replicas
#
# labels = var.prometheus_mcp_node_selector
#
# # Optional: SSM for debugging
# iam_role_additional_policies = {
# ssm = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
# }
# }
# }
# -----------------------------------------------------------------------------
# HELM RELEASE - DEPLOY PROMETHEUS-MCP
# -----------------------------------------------------------------------------
resource "helm_release" "prometheus_mcp" {
name = "prometheus-mcp"
namespace = local.prometheus_mcp_namespace
create_namespace = true
# Path to the Helm chart (relative to Terraform working directory)
# Adjust this path based on your repo structure
chart = "${path.module}/../charts/prometheus-mcp"
# AWS configuration
set {
name = "aws.region"
value = var.aws_region
}
# AMP configuration (uses auto-discovered or provided workspace ID)
set {
name = "amp.workspaceId"
value = local.amp_workspace_id
}
# IRSA configuration
set {
name = "irsa.enabled"
value = "true"
}
set {
name = "irsa.roleArn"
value = aws_iam_role.prometheus_mcp_pod.arn
}
# Container image
set {
name = "image.repository"
value = split(":", var.prometheus_mcp_image)[0]
}
set {
name = "image.tag"
value = length(split(":", var.prometheus_mcp_image)) > 1 ? split(":", var.prometheus_mcp_image)[1] : "latest"
}
# Replicas
set {
name = "replicaCount"
value = var.prometheus_mcp_replicas
}
# Node selector (for dedicated node group)
dynamic "set" {
for_each = var.prometheus_mcp_node_selector
content {
name = "nodeSelector.${set.key}"
value = set.value
}
}
# Wait for deployment to be ready
wait = true
timeout = 300
depends_on = [
module.eks,
aws_iam_role_policy_attachment.prometheus_mcp_pod
]
}
# -----------------------------------------------------------------------------
# OUTPUTS
# -----------------------------------------------------------------------------
output "prometheus_mcp_role_arn" {
description = "IAM role ARN for prometheus-mcp pods"
value = aws_iam_role.prometheus_mcp_pod.arn
}
output "prometheus_mcp_namespace" {
description = "Kubernetes namespace for prometheus-mcp"
value = local.prometheus_mcp_namespace
}
output "prometheus_mcp_service" {
description = "Kubernetes service name for prometheus-mcp"
value = "prometheus-mcp"
}