Skip to main content
Glama

hny-mcp

by honeycombio
MIT License
2
36
  • Linux
  • Apple
name: Tests & Evaluation on: push: branches: [ main ] pull_request: # Run on all pull requests, regardless of target branch # Add permissions to allow PR comments permissions: contents: read pull-requests: write actions: read jobs: test: name: Test runs-on: ubuntu-latest strategy: matrix: node-version: [18.x, 20.x] steps: - uses: actions/checkout@v4 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v4 with: node-version: ${{ matrix.node-version }} # Setup PNPM - must be before setting up Node.js cache - name: Setup PNPM uses: pnpm/action-setup@v2 # Setup Node.js cache after PNPM is installed - name: Setup Node.js with cache uses: actions/setup-node@v4 with: node-version: ${{ matrix.node-version }} cache: 'pnpm' - name: Install dependencies run: pnpm install - name: Typecheck (entire codebase) run: pnpm typecheck - name: Run tests run: pnpm test - name: Run test with coverage run: pnpm test:coverage - name: Build run: pnpm build # New job that runs after all test matrix jobs complete evaluate: name: Run Evaluations # This job will only run if all test jobs succeed needs: test runs-on: ubuntu-latest # Special handling for main branch if: success() steps: - uses: actions/checkout@v4 - name: Setup PNPM uses: pnpm/action-setup@v2 - name: Setup Node.js uses: actions/setup-node@v4 with: node-version: '18' cache: 'pnpm' - name: Install dependencies run: pnpm install - name: Build project for evaluation run: pnpm run build - name: Configure MCP environment run: echo "Using environment variable-based configuration" # Verify the build file exists before running evals - name: Verify build file exists run: | mkdir -p eval/reports if [ ! -f "build/index.mjs" ]; then echo "ERROR: build/index.mjs does not exist after build step!" echo '<!DOCTYPE html>' > eval/reports/build-failed.html echo '<html><head><title>Build Failed</title></head>' >> eval/reports/build-failed.html echo '<body><h1>Evaluation Failed</h1>' >> eval/reports/build-failed.html echo '<p>The MCP build output file does not exist. Check the build step for errors.</p>' >> eval/reports/build-failed.html echo '</body></html>' >> eval/reports/build-failed.html exit 1 else echo "Build file found, proceeding with evaluation" fi - name: Run evaluations id: run_evals run: | echo "Running evaluations..." if ! pnpm run eval; then echo "::error::Evaluation failed during execution" echo "EVAL_OUTCOME=failed" >> $GITHUB_ENV # Create a failure report but don't exit yet - we want to collect all artifacts mkdir -p eval/reports echo '<!DOCTYPE html>' > eval/reports/eval-failed.html echo '<html><head><title>Evaluation Failed</title></head>' >> eval/reports/eval-failed.html echo '<body><h1>Evaluation Failed</h1>' >> eval/reports/eval-failed.html echo '<p>The evaluation process encountered an error. Check the logs for details.</p>' >> eval/reports/eval-failed.html echo '<h2>Configuration Information</h2>' >> eval/reports/eval-failed.html echo '<pre>' >> eval/reports/eval-failed.html if [ -n "$HONEYCOMB_API_KEY" ]; then echo "Honeycomb API key is set (length: ${#HONEYCOMB_API_KEY})" >> eval/reports/eval-failed.html else echo "Honeycomb API key is not set!" >> eval/reports/eval-failed.html echo "Make sure HONEYCOMB_API_KEY is set in GitHub secrets and passed to the workflow" >> eval/reports/eval-failed.html fi echo '</pre>' >> eval/reports/eval-failed.html echo '</body></html>' >> eval/reports/eval-failed.html # Print environment variables (excluding secrets) for debugging echo "Environment variables for debugging:" env | grep -v -E "HONEYCOMB_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY" | sort else echo "EVAL_OUTCOME=success" >> $GITHUB_ENV fi env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} # Use Honeycomb API key for environment variable-based config HONEYCOMB_API_KEY: ${{ secrets.HONEYCOMB_API_KEY }} # Use only limited models for CI to save costs EVAL_MODELS: '{"openai":"gpt-4o-mini","anthropic":"claude-3-5-haiku-latest"}' EVAL_CONCURRENCY: 2 EVAL_JUDGE_PROVIDER: "anthropic" EVAL_JUDGE_MODEL: "claude-3-5-haiku-latest" MCP_SERVER_COMMAND: "node build/index.mjs" - name: Ensure reports directory exists run: mkdir -p eval/reports - name: Create index file if no reports are generated run: | # Check if any HTML reports exist if [ -z "$(find eval/reports -name '*.html' 2>/dev/null)" ]; then echo "No reports were generated, creating a placeholder" echo '<!DOCTYPE html>' > eval/reports/no-reports.html echo '<html><head><title>No Reports</title></head>' >> eval/reports/no-reports.html echo '<body><h1>No evaluation reports generated</h1>' >> eval/reports/no-reports.html echo '<p>This could be due to missing API keys or configuration.</p>' >> eval/reports/no-reports.html echo '</body></html>' >> eval/reports/no-reports.html fi - name: Find latest report id: find-report run: | LATEST_REPORT=$(ls -t eval/reports/*.html 2>/dev/null | head -1 || echo "eval/reports/no-reports.html") echo "latest_report=$LATEST_REPORT" >> $GITHUB_OUTPUT - name: Post report summary run: | if [ "$EVAL_OUTCOME" == "failed" ]; then echo "## ❌ Evaluation Failed" > $GITHUB_STEP_SUMMARY echo "The evaluation process encountered errors. See logs for details." >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "Error report: $(basename ${{ steps.find-report.outputs.latest_report }})" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "The error report is available as a workflow artifact." >> $GITHUB_STEP_SUMMARY else echo "## ✅ Evaluation Results" > $GITHUB_STEP_SUMMARY echo "Ran evaluations with OpenAI and Anthropic models." >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "### Summary" >> $GITHUB_STEP_SUMMARY echo "Latest report: $(basename ${{ steps.find-report.outputs.latest_report }})" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "The full report is available as a workflow artifact." >> $GITHUB_STEP_SUMMARY fi # Add PR comment if we're on a PR if [ "${{ github.event_name }}" == "pull_request" ]; then # Start with basic PR comment header echo "## Honeycomb MCP Evaluation Results" > pr_comment.txt echo "" >> pr_comment.txt if [ "$EVAL_OUTCOME" == "failed" ]; then echo "❌ Evaluation process failed" >> pr_comment.txt echo "" >> pr_comment.txt echo "The evaluation process encountered errors. See workflow logs for details." >> pr_comment.txt else # Find the latest summary file LATEST_SUMMARY=$(find eval/results -name "summary-*.json" -type f | sort -r | head -1) if [ -n "$LATEST_SUMMARY" ] && [ -f "$LATEST_SUMMARY" ]; then echo "Found summary file: $LATEST_SUMMARY" # Extract key metrics RATE=$(jq -r '.successRate' "$LATEST_SUMMARY" 2>/dev/null || echo "0") # Calculate percentage with bc (more reliable than jq for math) SUCCESS_RATE=$(echo "$RATE * 100" | bc -l | awk '{printf "%.1f", $0}') PASSED=$(jq -r '.passed' "$LATEST_SUMMARY" 2>/dev/null || echo "N/A") TOTAL=$(jq -r '.totalTests' "$LATEST_SUMMARY" 2>/dev/null || echo "N/A") # Use bc for reliable floating point comparison if (( $(echo "$RATE >= 0.75" | bc -l) )); then echo "✅ Evaluations completed successfully: **${SUCCESS_RATE}%** pass rate (${PASSED}/${TOTAL} tests)" >> pr_comment.txt elif (( $(echo "$RATE >= 0.5" | bc -l) )); then echo "⚠️ Evaluations completed with mixed results: **${SUCCESS_RATE}%** pass rate (${PASSED}/${TOTAL} tests)" >> pr_comment.txt else echo "❌ Evaluations completed with poor results: **${SUCCESS_RATE}%** pass rate (${PASSED}/${TOTAL} tests)" >> pr_comment.txt fi echo "" >> pr_comment.txt # Basic metrics table echo "### Evaluation Summary" >> pr_comment.txt echo "" >> pr_comment.txt echo "| Metric | Value |" >> pr_comment.txt echo "|--------|-------|" >> pr_comment.txt echo "| Success Rate | ${SUCCESS_RATE}% |" >> pr_comment.txt echo "| Tests Passed | $PASSED / $TOTAL |" >> pr_comment.txt # Add latency if available AVG_LATENCY=$(jq -r '.averageLatency' "$LATEST_SUMMARY" 2>/dev/null || echo "N/A") if [ "$AVG_LATENCY" != "N/A" ] && [ "$AVG_LATENCY" != "null" ]; then AVG_LATENCY_INT=$(echo "$AVG_LATENCY" | awk '{printf "%.0f", $0}') echo "| Avg Latency | ${AVG_LATENCY_INT}ms |" >> pr_comment.txt fi # Add basic model information echo "" >> pr_comment.txt echo "### Models Tested" >> pr_comment.txt echo "" >> pr_comment.txt # Extract providers directly echo "| Provider | Model |" >> pr_comment.txt echo "|----------|-------|" >> pr_comment.txt # OpenAI models OPENAI_MODELS=$(jq -r '.results[] | select(.provider == "openai") | .model' "$LATEST_SUMMARY" 2>/dev/null | sort -u) if [ -n "$OPENAI_MODELS" ]; then while read -r model; do if [ -n "$model" ]; then echo "| OpenAI | $model |" >> pr_comment.txt fi done <<< "$OPENAI_MODELS" fi # Anthropic models ANTHROPIC_MODELS=$(jq -r '.results[] | select(.provider == "anthropic") | .model' "$LATEST_SUMMARY" 2>/dev/null | sort -u) if [ -n "$ANTHROPIC_MODELS" ]; then while read -r model; do if [ -n "$model" ]; then echo "| Anthropic | $model |" >> pr_comment.txt fi done <<< "$ANTHROPIC_MODELS" fi else echo "✅ Evaluations completed successfully" >> pr_comment.txt echo "" >> pr_comment.txt echo "No detailed metrics available" >> pr_comment.txt fi # Always add a link to the artifacts echo "" >> pr_comment.txt echo "📊 [View full report in workflow artifacts](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})" >> pr_comment.txt fi # Post the comment to the PR gh pr comment ${{ github.event.pull_request.number }} --body-file pr_comment.txt fi env: GH_TOKEN: ${{ github.token }} # Create report index if it doesn't exist - name: Generate report index if needed run: | if [ ! -f "eval/reports/index.html" ]; then echo "Generating index.html for reports using the update-index script" pnpm run eval:update-index fi # Upload evaluation reports as artifacts - name: Upload evaluation reports uses: actions/upload-artifact@v4 with: name: evaluation-reports path: eval/reports/ retention-days: 30 # Final step to fail the job if evaluations failed - name: Check final evaluation status if: env.EVAL_OUTCOME == 'failed' run: | echo "::error::Evaluation failed - see artifacts for error report" exit 1

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/honeycombio/honeycomb-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server