Skip to main content
Glama
cicd-infrastructure-2025.md14.2 kB
# CI/CD & Infrastructure as Code 2025 **Updated**: 2025-11-23 | **Stack**: GitHub Actions, Terraform, Docker, Kubernetes --- ## CI/CD Pipeline ### GitHub Actions Workflow ```yaml # .github/workflows/ci-cd.yml name: CI/CD Pipeline on: push: branches: [main, develop] pull_request: branches: [main] env: NODE_VERSION: '20.x' DOCKER_REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }} jobs: test: runs-on: ubuntu-latest services: postgres: image: postgres:15 env: POSTGRES_PASSWORD: postgres options: >- --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 ports: - 5432:5432 steps: - uses: actions/checkout@v4 - name: Setup Node.js uses: actions/setup-node@v4 with: node-version: ${{ env.NODE_VERSION }} cache: 'npm' - name: Install dependencies run: npm ci - name: Run linter run: npm run lint - name: Run tests run: npm test env: DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test - name: Upload coverage uses: codecov/codecov-action@v3 with: files: ./coverage/lcov.info build: needs: test runs-on: ubuntu-latest if: github.ref == 'refs/heads/main' permissions: contents: read packages: write steps: - uses: actions/checkout@v4 - name: Login to GitHub Container Registry uses: docker/login-action@v3 with: registry: ${{ env.DOCKER_REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Extract metadata id: meta uses: docker/metadata-action@v5 with: images: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }} tags: | type=ref,event=branch type=sha,prefix={{branch}}- type=semver,pattern={{version}} - name: Build and push Docker image uses: docker/build-push-action@v5 with: context: . push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max deploy: needs: build runs-on: ubuntu-latest if: github.ref == 'refs/heads/main' steps: - uses: actions/checkout@v4 - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: us-east-1 - name: Update kubeconfig run: | aws eks update-kubeconfig --name production-cluster --region us-east-1 - name: Deploy to Kubernetes run: | kubectl set image deployment/api api=${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:main-${{ github.sha }} kubectl rollout status deployment/api --timeout=5m - name: Notify Slack if: always() uses: 8398a7/action-slack@v3 with: status: ${{ job.status }} text: 'Deployment ${{ job.status }}' webhook_url: ${{ secrets.SLACK_WEBHOOK }} ``` --- ## Docker & Containers ### Multi-stage Dockerfile ```dockerfile # Dockerfile (optimized for production) # Build stage FROM node:20-alpine AS builder WORKDIR /app # Copy package files COPY package*.json ./ # Install dependencies RUN npm ci --only=production && npm cache clean --force # Copy source code COPY . . # Build application RUN npm run build # Production stage FROM node:20-alpine AS production WORKDIR /app # Create non-root user RUN addgroup -g 1001 -S nodejs && \ adduser -S nodejs -u 1001 # Copy built application from builder COPY --from=builder --chown=nodejs:nodejs /app/dist ./dist COPY --from=builder --chown=nodejs:nodejs /app/node_modules ./node_modules COPY --from=builder --chown=nodejs:nodejs /app/package*.json ./ # Switch to non-root user USER nodejs # Expose port EXPOSE 3000 # Health check HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ CMD node healthcheck.js # Start application CMD ["node", "dist/index.js"] # RESULT: # Builder stage: 800MB # Production image: 150MB (only runtime dependencies) ``` ### Docker Compose (Local Development) ```yaml # docker-compose.yml version: '3.8' services: api: build: context: . target: development ports: - "3000:3000" volumes: - .:/app - /app/node_modules # Anonymous volume for node_modules environment: - NODE_ENV=development - DATABASE_URL=postgresql://postgres:postgres@db:5432/myapp - REDIS_URL=redis://redis:6379 depends_on: db: condition: service_healthy redis: condition: service_started command: npm run dev # Hot reload db: image: postgres:15-alpine ports: - "5432:5432" environment: - POSTGRES_USER=postgres - POSTGRES_PASSWORD=postgres - POSTGRES_DB=myapp volumes: - postgres_data:/var/lib/postgresql/data healthcheck: test: ["CMD-SHELL", "pg_isready -U postgres"] interval: 5s timeout: 5s retries: 5 redis: image: redis:7-alpine ports: - "6379:6379" volumes: - redis_data:/data nginx: image: nginx:alpine ports: - "80:80" volumes: - ./nginx.conf:/etc/nginx/nginx.conf:ro depends_on: - api volumes: postgres_data: redis_data: # Usage: # docker-compose up -d # docker-compose logs -f api # docker-compose exec api npm run migrate # docker-compose down -v ``` --- ## Infrastructure as Code ### Terraform (AWS EKS Cluster) ```hcl # main.tf terraform { required_version = ">= 1.0" required_providers { aws = { source = "hashicorp/aws" version = "~> 5.0" } } backend "s3" { bucket = "my-terraform-state" key = "eks/terraform.tfstate" region = "us-east-1" encrypt = true } } provider "aws" { region = var.aws_region } # VPC module "vpc" { source = "terraform-aws-modules/vpc/aws" version = "5.0.0" name = "${var.cluster_name}-vpc" cidr = "10.0.0.0/16" azs = ["us-east-1a", "us-east-1b", "us-east-1c"] private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"] public_subnets = ["10.0.101.0/24", "10.0.102.0/24", "10.0.103.0/24"] enable_nat_gateway = true single_nat_gateway = false # High availability enable_dns_hostnames = true public_subnet_tags = { "kubernetes.io/role/elb" = "1" } private_subnet_tags = { "kubernetes.io/role/internal-elb" = "1" } } # EKS Cluster module "eks" { source = "terraform-aws-modules/eks/aws" version = "19.0.0" cluster_name = var.cluster_name cluster_version = "1.28" vpc_id = module.vpc.vpc_id subnet_ids = module.vpc.private_subnets cluster_endpoint_public_access = true eks_managed_node_groups = { general = { desired_size = 2 min_size = 1 max_size = 4 instance_types = ["t3.medium"] capacity_type = "ON_DEMAND" } } tags = { Environment = var.environment Terraform = "true" } } # RDS Database resource "aws_db_instance" "postgres" { identifier = "${var.cluster_name}-db" engine = "postgres" engine_version = "15.3" instance_class = "db.t3.micro" allocated_storage = 20 storage_encrypted = true db_name = var.db_name username = var.db_username password = var.db_password vpc_security_group_ids = [aws_security_group.rds.id] db_subnet_group_name = aws_db_subnet_group.main.name backup_retention_period = 7 skip_final_snapshot = false final_snapshot_identifier = "${var.cluster_name}-final-snapshot" tags = { Name = "${var.cluster_name}-db" } } # Outputs output "cluster_endpoint" { value = module.eks.cluster_endpoint } output "cluster_name" { value = module.eks.cluster_name } output "db_endpoint" { value = aws_db_instance.postgres.endpoint } # variables.tf variable "aws_region" { default = "us-east-1" } variable "cluster_name" { default = "production" } variable "environment" { default = "prod" } variable "db_name" { sensitive = true } variable "db_username" { sensitive = true } variable "db_password" { sensitive = true } # Usage: # terraform init # terraform plan -var-file="prod.tfvars" # terraform apply -var-file="prod.tfvars" # terraform destroy -var-file="prod.tfvars" ``` --- ## Kubernetes Deployment ```yaml # k8s/deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: name: api namespace: production labels: app: api spec: replicas: 3 strategy: type: RollingUpdate rollingUpdate: maxSurge: 1 maxUnavailable: 0 # Zero downtime selector: matchLabels: app: api template: metadata: labels: app: api version: v1 spec: containers: - name: api image: ghcr.io/myorg/api:latest ports: - containerPort: 3000 env: - name: NODE_ENV value: "production" - name: DATABASE_URL valueFrom: secretKeyRef: name: api-secrets key: database-url - name: REDIS_URL valueFrom: secretKeyRef: name: api-secrets key: redis-url resources: requests: memory: "256Mi" cpu: "250m" limits: memory: "512Mi" cpu: "500m" livenessProbe: httpGet: path: /health port: 3000 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /ready port: 3000 initialDelaySeconds: 10 periodSeconds: 5 failureThreshold: 3 imagePullSecrets: - name: ghcr-secret --- apiVersion: v1 kind: Service metadata: name: api namespace: production spec: selector: app: api ports: - protocol: TCP port: 80 targetPort: 3000 type: ClusterIP --- apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: api namespace: production annotations: kubernetes.io/ingress.class: nginx cert-manager.io/cluster-issuer: letsencrypt-prod spec: tls: - hosts: - api.example.com secretName: api-tls rules: - host: api.example.com http: paths: - path: / pathType: Prefix backend: service: name: api port: number: 80 --- apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: api namespace: production spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: api minReplicas: 3 maxReplicas: 10 metrics: - type: Resource resource: name: cpu target: type: Utilization averageUtilization: 70 - type: Resource resource: name: memory target: type: Utilization averageUtilization: 80 # Apply: # kubectl apply -f k8s/ # kubectl get pods -n production # kubectl logs -f deployment/api -n production # kubectl describe deployment api -n production ``` --- ## Monitoring & Observability ### Prometheus + Grafana ```yaml # prometheus.yml global: scrape_interval: 15s evaluation_interval: 15s scrape_configs: - job_name: 'kubernetes-apiservers' kubernetes_sd_configs: - role: endpoints scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - job_name: 'kubernetes-nodes' kubernetes_sd_configs: - role: node - job_name: 'kubernetes-pods' kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep regex: true # Application metrics (Node.js with prom-client) import express from 'express'; import promClient from 'prom-client'; const app = express(); // Collect default metrics const register = promClient.register; promClient.collectDefaultMetrics({ register }); // Custom metrics const httpRequestDuration = new promClient.Histogram({ name: 'http_request_duration_seconds', help: 'Duration of HTTP requests in seconds', labelNames: ['method', 'route', 'status_code'], buckets: [0.1, 0.5, 1, 2, 5] }); const httpRequestTotal = new promClient.Counter({ name: 'http_requests_total', help: 'Total number of HTTP requests', labelNames: ['method', 'route', 'status_code'] }); // Middleware to track metrics app.use((req, res, next) => { const start = Date.now(); res.on('finish', () => { const duration = (Date.now() - start) / 1000; httpRequestDuration.labels(req.method, req.route?.path || req.path, res.statusCode).observe(duration); httpRequestTotal.labels(req.method, req.route?.path || req.path, res.statusCode).inc(); }); next(); }); // Expose /metrics endpoint app.get('/metrics', async (req, res) => { res.set('Content-Type', register.contentType); res.end(await register.metrics()); }); app.listen(3000); ``` --- ## Key Takeaways 1. **Automate everything** - CI/CD pipeline, infrastructure, deployments 2. **Immutable infrastructure** - Containers, never SSH to prod 3. **Monitor proactively** - Metrics, logs, alerts 4. **Zero-downtime deployments** - Rolling updates, health checks 5. **Infrastructure as Code** - Terraform, version controlled --- ## References - "The Phoenix Project" - Gene Kim - "Kubernetes in Action" - Marko Lukša - "Terraform: Up & Running" - Yevgeniy Brikman **Related**: `kubernetes-deep-dive.md`, `terraform-best-practices.md`, `monitoring-alerting.md`

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/seanshin0214/persona-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server