# CI/CD & Infrastructure as Code 2025
**Updated**: 2025-11-23 | **Stack**: GitHub Actions, Terraform, Docker, Kubernetes
---
## CI/CD Pipeline
### GitHub Actions Workflow
```yaml
# .github/workflows/ci-cd.yml
name: CI/CD Pipeline
on:
push:
branches: [main, develop]
pull_request:
branches: [main]
env:
NODE_VERSION: '20.x'
DOCKER_REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
test:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:15
env:
POSTGRES_PASSWORD: postgres
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
cache: 'npm'
- name: Install dependencies
run: npm ci
- name: Run linter
run: npm run lint
- name: Run tests
run: npm test
env:
DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test
- name: Upload coverage
uses: codecov/codecov-action@v3
with:
files: ./coverage/lcov.info
build:
needs: test
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v4
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.DOCKER_REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=sha,prefix={{branch}}-
type=semver,pattern={{version}}
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
deploy:
needs: build
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Update kubeconfig
run: |
aws eks update-kubeconfig --name production-cluster --region us-east-1
- name: Deploy to Kubernetes
run: |
kubectl set image deployment/api api=${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:main-${{ github.sha }}
kubectl rollout status deployment/api --timeout=5m
- name: Notify Slack
if: always()
uses: 8398a7/action-slack@v3
with:
status: ${{ job.status }}
text: 'Deployment ${{ job.status }}'
webhook_url: ${{ secrets.SLACK_WEBHOOK }}
```
---
## Docker & Containers
### Multi-stage Dockerfile
```dockerfile
# Dockerfile (optimized for production)
# Build stage
FROM node:20-alpine AS builder
WORKDIR /app
# Copy package files
COPY package*.json ./
# Install dependencies
RUN npm ci --only=production && npm cache clean --force
# Copy source code
COPY . .
# Build application
RUN npm run build
# Production stage
FROM node:20-alpine AS production
WORKDIR /app
# Create non-root user
RUN addgroup -g 1001 -S nodejs && \
adduser -S nodejs -u 1001
# Copy built application from builder
COPY --from=builder --chown=nodejs:nodejs /app/dist ./dist
COPY --from=builder --chown=nodejs:nodejs /app/node_modules ./node_modules
COPY --from=builder --chown=nodejs:nodejs /app/package*.json ./
# Switch to non-root user
USER nodejs
# Expose port
EXPOSE 3000
# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD node healthcheck.js
# Start application
CMD ["node", "dist/index.js"]
# RESULT:
# Builder stage: 800MB
# Production image: 150MB (only runtime dependencies)
```
### Docker Compose (Local Development)
```yaml
# docker-compose.yml
version: '3.8'
services:
api:
build:
context: .
target: development
ports:
- "3000:3000"
volumes:
- .:/app
- /app/node_modules # Anonymous volume for node_modules
environment:
- NODE_ENV=development
- DATABASE_URL=postgresql://postgres:postgres@db:5432/myapp
- REDIS_URL=redis://redis:6379
depends_on:
db:
condition: service_healthy
redis:
condition: service_started
command: npm run dev # Hot reload
db:
image: postgres:15-alpine
ports:
- "5432:5432"
environment:
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=postgres
- POSTGRES_DB=myapp
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s
timeout: 5s
retries: 5
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
nginx:
image: nginx:alpine
ports:
- "80:80"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
depends_on:
- api
volumes:
postgres_data:
redis_data:
# Usage:
# docker-compose up -d
# docker-compose logs -f api
# docker-compose exec api npm run migrate
# docker-compose down -v
```
---
## Infrastructure as Code
### Terraform (AWS EKS Cluster)
```hcl
# main.tf
terraform {
required_version = ">= 1.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
}
backend "s3" {
bucket = "my-terraform-state"
key = "eks/terraform.tfstate"
region = "us-east-1"
encrypt = true
}
}
provider "aws" {
region = var.aws_region
}
# VPC
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "5.0.0"
name = "${var.cluster_name}-vpc"
cidr = "10.0.0.0/16"
azs = ["us-east-1a", "us-east-1b", "us-east-1c"]
private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
public_subnets = ["10.0.101.0/24", "10.0.102.0/24", "10.0.103.0/24"]
enable_nat_gateway = true
single_nat_gateway = false # High availability
enable_dns_hostnames = true
public_subnet_tags = {
"kubernetes.io/role/elb" = "1"
}
private_subnet_tags = {
"kubernetes.io/role/internal-elb" = "1"
}
}
# EKS Cluster
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "19.0.0"
cluster_name = var.cluster_name
cluster_version = "1.28"
vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets
cluster_endpoint_public_access = true
eks_managed_node_groups = {
general = {
desired_size = 2
min_size = 1
max_size = 4
instance_types = ["t3.medium"]
capacity_type = "ON_DEMAND"
}
}
tags = {
Environment = var.environment
Terraform = "true"
}
}
# RDS Database
resource "aws_db_instance" "postgres" {
identifier = "${var.cluster_name}-db"
engine = "postgres"
engine_version = "15.3"
instance_class = "db.t3.micro"
allocated_storage = 20
storage_encrypted = true
db_name = var.db_name
username = var.db_username
password = var.db_password
vpc_security_group_ids = [aws_security_group.rds.id]
db_subnet_group_name = aws_db_subnet_group.main.name
backup_retention_period = 7
skip_final_snapshot = false
final_snapshot_identifier = "${var.cluster_name}-final-snapshot"
tags = {
Name = "${var.cluster_name}-db"
}
}
# Outputs
output "cluster_endpoint" {
value = module.eks.cluster_endpoint
}
output "cluster_name" {
value = module.eks.cluster_name
}
output "db_endpoint" {
value = aws_db_instance.postgres.endpoint
}
# variables.tf
variable "aws_region" {
default = "us-east-1"
}
variable "cluster_name" {
default = "production"
}
variable "environment" {
default = "prod"
}
variable "db_name" {
sensitive = true
}
variable "db_username" {
sensitive = true
}
variable "db_password" {
sensitive = true
}
# Usage:
# terraform init
# terraform plan -var-file="prod.tfvars"
# terraform apply -var-file="prod.tfvars"
# terraform destroy -var-file="prod.tfvars"
```
---
## Kubernetes Deployment
```yaml
# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: api
namespace: production
labels:
app: api
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0 # Zero downtime
selector:
matchLabels:
app: api
template:
metadata:
labels:
app: api
version: v1
spec:
containers:
- name: api
image: ghcr.io/myorg/api:latest
ports:
- containerPort: 3000
env:
- name: NODE_ENV
value: "production"
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: api-secrets
key: database-url
- name: REDIS_URL
valueFrom:
secretKeyRef:
name: api-secrets
key: redis-url
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: 3000
initialDelaySeconds: 10
periodSeconds: 5
failureThreshold: 3
imagePullSecrets:
- name: ghcr-secret
---
apiVersion: v1
kind: Service
metadata:
name: api
namespace: production
spec:
selector:
app: api
ports:
- protocol: TCP
port: 80
targetPort: 3000
type: ClusterIP
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: api
namespace: production
annotations:
kubernetes.io/ingress.class: nginx
cert-manager.io/cluster-issuer: letsencrypt-prod
spec:
tls:
- hosts:
- api.example.com
secretName: api-tls
rules:
- host: api.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: api
port:
number: 80
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: api
namespace: production
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: api
minReplicas: 3
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
# Apply:
# kubectl apply -f k8s/
# kubectl get pods -n production
# kubectl logs -f deployment/api -n production
# kubectl describe deployment api -n production
```
---
## Monitoring & Observability
### Prometheus + Grafana
```yaml
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
- job_name: 'kubernetes-nodes'
kubernetes_sd_configs:
- role: node
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
# Application metrics (Node.js with prom-client)
import express from 'express';
import promClient from 'prom-client';
const app = express();
// Collect default metrics
const register = promClient.register;
promClient.collectDefaultMetrics({ register });
// Custom metrics
const httpRequestDuration = new promClient.Histogram({
name: 'http_request_duration_seconds',
help: 'Duration of HTTP requests in seconds',
labelNames: ['method', 'route', 'status_code'],
buckets: [0.1, 0.5, 1, 2, 5]
});
const httpRequestTotal = new promClient.Counter({
name: 'http_requests_total',
help: 'Total number of HTTP requests',
labelNames: ['method', 'route', 'status_code']
});
// Middleware to track metrics
app.use((req, res, next) => {
const start = Date.now();
res.on('finish', () => {
const duration = (Date.now() - start) / 1000;
httpRequestDuration.labels(req.method, req.route?.path || req.path, res.statusCode).observe(duration);
httpRequestTotal.labels(req.method, req.route?.path || req.path, res.statusCode).inc();
});
next();
});
// Expose /metrics endpoint
app.get('/metrics', async (req, res) => {
res.set('Content-Type', register.contentType);
res.end(await register.metrics());
});
app.listen(3000);
```
---
## Key Takeaways
1. **Automate everything** - CI/CD pipeline, infrastructure, deployments
2. **Immutable infrastructure** - Containers, never SSH to prod
3. **Monitor proactively** - Metrics, logs, alerts
4. **Zero-downtime deployments** - Rolling updates, health checks
5. **Infrastructure as Code** - Terraform, version controlled
---
## References
- "The Phoenix Project" - Gene Kim
- "Kubernetes in Action" - Marko Lukša
- "Terraform: Up & Running" - Yevgeniy Brikman
**Related**: `kubernetes-deep-dive.md`, `terraform-best-practices.md`, `monitoring-alerting.md`