System Initiative

Official

Apache 2.0

1,784

si
.github
workflows

e2e-validation.yml•18.5 kB

name: Cypress E2E Tests on: workflow_call: inputs: environment: type: string required: true description: "where to test" test_dir: type: string required: true description: "which test directory to execute" workflow_dispatch: inputs: environment: type: choice required: true description: "where to test" default: "tools" options: - tools - production - perf - ec2-node test_dir: type: string required: true description: "which test directory to execute" default: "cypress/e2e" jobs: define-test-matrix: runs-on: ubuntu-latest outputs: tests: ${{ steps.tests.outputs.tests }} steps: - uses: actions/checkout@v4 - id: tests working-directory: app/web/${{ inputs.test_dir }} run: | test_dirs=$(find . -mindepth 1 -maxdepth 1 -type d | sed 's|^\./||') test_array="[]" for d in $test_dirs; do test_array=$(echo "$test_array" | jq --arg d "$d" '. += [$d]') done test_array=$(echo "$test_array" | jq -c '.') echo "$test_array" echo "tests=$test_array" >> "$GITHUB_OUTPUT" launch-ec2-node: environment: ${{ inputs.environment }} runs-on: ubuntu-latest if: ${{ inputs.environment == 'ec2-node' }} outputs: remote-ip: ${{ steps.get-ip.outputs.remote_ip }} steps: - name: Checkout code uses: actions/checkout@v4 - name: Delete any lingering nodes working-directory: .ci/ run: | export SI_API_TOKEN="${{ secrets.SI_API_TOKEN }}" export SI_WORKSPACE_ID="${{ vars.MANAGEMENT_WORKSPACE_ID }}" python3 ./delete-stacks.py - name: Deploy EC2 node working-directory: .ci/ run: | export SI_API_TOKEN="${{ secrets.SI_API_TOKEN }}" export SI_WORKSPACE_ID="${{ vars.MANAGEMENT_WORKSPACE_ID }}" python3 ./deploy-stack.py - name: Upload deployment error (if any) if: failure() uses: actions/upload-artifact@v4 with: name: deployment-error path: .ci/error if-no-files-found: ignore retention-days: 1 - name: Save IP id: get-ip working-directory: .ci/ run: | remote_ip=$(grep -Eo '([0-9]{1,3}\.){3}[0-9]{1,3}' "./ip") echo "Remote IP set to ${remote_ip}" echo "remote_ip=$remote_ip" >> "$GITHUB_OUTPUT" echo "remote_ip=$remote_ip" >> "$GITHUB_ENV" - name: Validate Service's are healthy if: ${{ inputs.environment == 'ec2-node' }} working-directory: .ci/ run: | echo "$SSH_KEY" > ssh-key.pem chmod 600 ssh-key.pem echo "Tunneling EC2 node @ $remote_ip" # Start SSH tunnel in background for 3020 (Bedrock) with retry logic tunnel_retries=0 max_tunnel_retries=5 while [ $tunnel_retries -lt $max_tunnel_retries ]; do echo "Attempting to establish SSH tunnel for port 3020 (attempt $((tunnel_retries + 1))/$max_tunnel_retries)..." # Kill any existing SSH processes to this host pkill -f "ssh.*arch@$remote_ip.*3020" || true sleep 2 # Start SSH tunnel nohup ssh -o StrictHostKeyChecking=no -o ConnectTimeout=30 -o ServerAliveInterval=60 -L 3020:localhost:3020 "arch@$remote_ip" -i ssh-key.pem -N & ssh_pid=$! # Give SSH time to establish connection sleep 5 # Verify tunnel is working if nc -z localhost 3020; then echo "✅ SSH tunnel for port 3020 established successfully" break else echo "⚠️ SSH tunnel attempt $((tunnel_retries + 1)) failed, retrying..." kill $ssh_pid 2>/dev/null || true tunnel_retries=$((tunnel_retries + 1)) sleep 3 fi done if [ $tunnel_retries -eq $max_tunnel_retries ]; then echo "❌ Failed to establish SSH tunnel for port 3020 after $max_tunnel_retries attempts" exit 1 fi # Wait for Bedrock (EC2 localhost:3020) to be ready echo "Waiting for Bedrock to be ready..." for i in {1..180}; do if curl --fail --silent --max-time 2 http://localhost:3020/; then echo "✅ Bedrock service is up and returned a valid response, preparing db" curl --location 'http://localhost:3020/prepare' \ --header 'Content;' \ --header 'Content-Type: application/json' \ --data '{ "recording_id": "W=01JYPR32SD5RKR3AMG298J7263-CS=01JZ3W5XX6QHQZ6PYSBHK4SB3K (39 components)", "parameters": {}, "executionParameters": {} }' break fi echo "⏳ Attempt $i/180: Bedrock not responding yet. Retrying in 10s..." sleep 10 done # Fail if still not up after 30 min if ! nc -z localhost 3020; then echo "❌ Timed out waiting for bedrock service on port 3020" exit 1 fi # Start SSH tunnel in background for 8080 (Web App) with retry logic tunnel_retries=0 max_tunnel_retries=5 while [ $tunnel_retries -lt $max_tunnel_retries ]; do echo "Attempting to establish SSH tunnel for port 8080 (attempt $((tunnel_retries + 1))/$max_tunnel_retries)..." # Kill any existing SSH processes to this host pkill -f "ssh.*arch@$remote_ip.*8080" || true sleep 2 # Start SSH tunnel nohup ssh -o StrictHostKeyChecking=no -o ConnectTimeout=30 -o ServerAliveInterval=60 -L 8080:localhost:8080 "arch@$remote_ip" -i ssh-key.pem -N & ssh_pid=$! # Give SSH time to establish connection sleep 5 # Verify tunnel is working if nc -z localhost 8080; then echo "✅ SSH tunnel for port 8080 established successfully" break else echo "⚠️ SSH tunnel attempt $((tunnel_retries + 1)) failed, retrying..." kill $ssh_pid 2>/dev/null || true tunnel_retries=$((tunnel_retries + 1)) sleep 3 fi done if [ $tunnel_retries -eq $max_tunnel_retries ]; then echo "❌ Failed to establish SSH tunnel for port 8080 after $max_tunnel_retries attempts" exit 1 fi # Wait for tunnel Web App (EC2 localhost:8080) to be ready echo "Waiting up to 30 minutes for remote web app to be ready..." for i in {1..180}; do if curl --fail --silent --max-time 2 http://localhost:8080/health; then echo "✅ Remote service is up and returned a valid response!" break fi echo "⏳ Attempt $i/180: Service not responding yet. Retrying in 10s..." sleep 10 done # Fail if still not up after 30 min if ! curl --fail --silent --max-time 2 http://localhost:8080/health; then echo "❌ Timed out waiting for web app health endpoint to respond" echo "📋 Checking cloud-init logs for debugging..." ssh -o StrictHostKeyChecking=no -o ConnectTimeout=30 -i ssh-key.pem "arch@$remote_ip" "tail -50 /var/log/cloud-init-output.log" || echo "⚠️ Could not retrieve cloud-init logs" exit 1 fi env: SSH_KEY: ${{ secrets.SSH_KEY }} cypress-tests: environment: ${{ inputs.environment }} runs-on: ubuntu-latest needs: [define-test-matrix, launch-ec2-node] if: always() && (needs.define-test-matrix.result == 'success') && (inputs.environment != 'ec2-node' || needs.launch-ec2-node.result == 'success') strategy: fail-fast: true matrix: tests: ${{ fromJSON(needs.define-test-matrix.outputs.tests) }} steps: - name: Checkout code uses: actions/checkout@v4 - name: Setup Node.js uses: actions/setup-node@v4 with: node-version: '18.18.2' - name: Setup pnpm uses: pnpm/action-setup@v4 - name: Install Deps working-directory: app/web run: | pnpm i npx cypress install - name: install uuid run: | sudo apt update sudo apt install uuid -y - name: Setup SSH tunnel if ec2-node for web access if: ${{ inputs.environment == 'ec2-node' }} working-directory: .ci/ run: | echo "$SSH_KEY" > ssh-key.pem chmod 600 ssh-key.pem remote_ip="${{ needs.launch-ec2-node.outputs.remote-ip }}" echo "Tunneling EC2 node @ $remote_ip" # Start SSH tunnel in background for 8080 (Web App) with retry logic tunnel_retries=0 max_tunnel_retries=5 while [ $tunnel_retries -lt $max_tunnel_retries ]; do echo "Attempting to establish SSH tunnel for port 8080 (attempt $((tunnel_retries + 1))/$max_tunnel_retries)..." # Kill any existing SSH processes to this host pkill -f "ssh.*arch@$remote_ip.*8080" || true sleep 2 # Start SSH tunnel nohup ssh -o StrictHostKeyChecking=no -o ConnectTimeout=30 -o ServerAliveInterval=60 -L 8080:localhost:8080 arch@$remote_ip -i ssh-key.pem -N & ssh_pid=$! # Give SSH time to establish connection sleep 5 # Verify tunnel is working if nc -z localhost 8080; then echo "✅ SSH tunnel for port 8080 established successfully" break else echo "⚠️ SSH tunnel attempt $((tunnel_retries + 1)) failed, retrying..." kill $ssh_pid 2>/dev/null || true tunnel_retries=$((tunnel_retries + 1)) sleep 3 fi done if [ $tunnel_retries -eq $max_tunnel_retries ]; then echo "❌ Failed to establish SSH tunnel for port 8080 after $max_tunnel_retries attempts" exit 1 fi # Wait for tunnel Web App (EC2 localhost:8080) to be ready echo "Waiting up to 30 minutes for remote web app to be ready..." for i in {1..180}; do if curl --fail --silent --max-time 2 http://localhost:8080/health; then echo "✅ Remote service is up and returned a valid response!" break fi echo "⏳ Attempt $i/180: Service not responding yet. Retrying in 10s..." sleep 10 done # Fail if still not up after 30 min if ! curl --fail --silent --max-time 2 http://localhost:8080/health; then echo "❌ Timed out waiting for web app health endpoint to respond" echo "📋 Checking cloud-init logs for debugging..." ssh -o StrictHostKeyChecking=no -o ConnectTimeout=30 -i ssh-key.pem "arch@$remote_ip" "tail -50 /var/log/cloud-init-output.log" || echo "⚠️ Could not retrieve cloud-init logs" exit 1 fi env: SSH_KEY: ${{ secrets.SSH_KEY }} - name: Run Cypress Tests working-directory: app/web run: | export VITE_AUTH0_USERNAME="${{ secrets.VITE_AUTH0_USERNAME }}" export VITE_AUTH0_PASSWORD="${{ secrets.VITE_AUTH0_PASSWORD }}" export VITE_SI_CYPRESS_MULTIPLIER="${{ vars.VITE_SI_CYPRESS_MULTIPLIER }}" export VITE_SI_WORKSPACE_URL="${{ vars.VITE_SI_WORKSPACE_URL }}" export VITE_HOST_URL="${{ vars.VITE_SI_WORKSPACE_URL }}" export VITE_SI_WORKSPACE_ID="${{ vars.VITE_SI_WORKSPACE_ID }}" VITE_UUID="$(uuid)" export VITE_UUID export VITE_AUTH_API_URL="https://auth-api.systeminit.com" export VITE_AUTH_PORTAL_URL="https://auth.systeminit.com" n=0 max_retries=3 until [ $n -ge $max_retries ]; do unset exit_code || echo "exit_code not set" npx cypress run --spec "${{ inputs.test_dir }}/${{ matrix.tests }}/**" || exit_code=$? if [ -z "$exit_code" ]; then echo "Cypress Test task succeeded!" break fi n=$((n+1)) echo "Attempt $n/$max_retries failed with exit code $exit_code! Retrying..." done if [ $n -ge $max_retries ]; then echo "All $max_retries attempts failed." exit 1 fi - name: 'Upload Cypress Recordings to Github' uses: actions/upload-artifact@v4 if: failure() with: name: cypress-recordings-run-${{ matrix.tests }} path: app/web/cypress/videos/**/*.mp4 retention-days: 5 - name: Check Test Results if: failure() run: exit 1 cleanup: name: Cleanup EC2 Nodes runs-on: ubuntu-latest needs: cypress-tests environment: ${{ inputs.environment }} if: inputs.environment == 'ec2-node' && always() steps: - name: Checkout code uses: actions/checkout@v4 - name: Delete EC2 nodes working-directory: .ci/ run: | export SI_API_TOKEN="${{ secrets.SI_API_TOKEN }}" export SI_WORKSPACE_ID="${{ vars.MANAGEMENT_WORKSPACE_ID }}" python3 ./delete-stacks.py on-failure: runs-on: ubuntu-latest needs: [cypress-tests, launch-ec2-node] environment: ${{ inputs.environment }} if: failure() && always() steps: - name: Download all artifacts uses: actions/download-artifact@v4 with: path: artifacts - name: Check for deployment error if: ${{ inputs.environment == 'ec2-node' && needs.launch-ec2-node.result == 'failure' }} id: deployment-error run: | if [ -f "./artifacts/deployment-error/error" ]; then error_message=$(cat ./artifacts/deployment-error/error) { echo "deployment_error<<EOF" echo "$error_message" echo "EOF" echo "has_deployment_error=true" } >> "$GITHUB_OUTPUT" else echo "has_deployment_error=false" >> "$GITHUB_OUTPUT" fi - name: Check for failed Cypress tests id: failed-tests run: | failed_tests="" has_test_failures=false # Check for video artifacts which indicate test failures for artifact_dir in artifacts/cypress-recordings-run-*; do if [ -d "$artifact_dir" ]; then # Extract test name from artifact directory name test_name=$(basename "$artifact_dir" | sed 's/cypress-recordings-run-//') if [ -n "$failed_tests" ]; then failed_tests="$failed_tests, $test_name" else failed_tests="$test_name" fi has_test_failures=true fi done { echo "failed_tests=$failed_tests" echo "has_test_failures=$has_test_failures" } >> "$GITHUB_OUTPUT" echo "Failed tests: $failed_tests" - run: | has_artifacts=false for marker in artifacts/*/*.mp4; do if [ -f "$marker" ]; then echo "Artifact detected for failed test: $marker" has_artifacts=true break fi done if [ "$has_artifacts" = true ] && [ "${{ github.ref_name }}" = "main" ]; then curl --location "${{ secrets.FIREHYDRANT_WEBHOOK_URL }}" \ --header "Content-Type: application/json" \ --data "{ \"summary\": \"E2E ${{ inputs.environment }} Tests Fail\", \"body\": \"E2E Tests have failed for ${{ inputs.environment }}.\", \"links\": [ { \"href\": \"https://github.com/systeminit/si/actions/runs/$GITHUB_RUN_ID\", \"text\": \"E2E Test Run ${{ inputs.environment }}\" } ], \"tags\": [ \"service:github\" ] }" fi - name: Send Slack notification with deployment error if: ${{ inputs.environment == 'ec2-node' && steps.deployment-error.outputs.has_deployment_error == 'true' }} run: | error_message="${{ steps.deployment-error.outputs.deployment_error }}" escaped_error=$(echo "$error_message" | sed 's/"/\\"/g' | tr '\n' ' ') curl -X POST \ --header 'Content-type: application/json' \ --data "{\"text\": \":si: Failed EC2 Deployment for E2E Test: <https://github.com/systeminit/si/actions/runs/$GITHUB_RUN_ID|:test_tube: Link>\n\`\`\`$escaped_error\`\`\`\"}" \ ${{ secrets.SLACK_WEBHOOK_URL }} - name: Send regular Slack notification if: ${{ inputs.environment != 'ec2-node' || (inputs.environment == 'ec2-node' && needs.launch-ec2-node.result != 'failure') }} run: | failed_tests="${{ steps.failed-tests.outputs.failed_tests }}" if [ -n "$failed_tests" ]; then curl -X POST \ --header 'Content-type: application/json' \ --data "{\"text\": \":si: Failed Cypress E2E Test for ${{ inputs.environment }}: <https://github.com/systeminit/si/actions/runs/$GITHUB_RUN_ID|:test_tube: Link>\n\`\`\`Failed tests: $failed_tests\`\`\`\"}" \ ${{ secrets.SLACK_WEBHOOK_URL }} else curl -X POST \ --header 'Content-type: application/json' \ --data "{\"text\": \":si: Failed Cypress E2E Test for ${{ inputs.environment }}: <https://github.com/systeminit/si/actions/runs/$GITHUB_RUN_ID|:test_tube: Link>\"}" \ ${{ secrets.SLACK_WEBHOOK_URL }} fi

Latest Blog Posts

The 50MB Markdown Files That Broke Our Server
By punkpeye on December 3, 2025.
react
react-router
node-js
OpenTelemetry for Model Context Protocol (MCP) Analytics and Agent Observability
By Om-Shree-0709 on November 29, 2025.
observability
mcp
opentelemetry
Securing Enterprise AI Agents with Unique Identities in the Model Context Protocol (MCP)
By Om-Shree-0709 on November 27, 2025.

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/systeminit/si'

If you have feedback or need assistance with the MCP directory API, please join our Discord server