name: MCP Evaluations
on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- "simplenote_mcp/**"
- "simplenote_mcp_server.py"
- "evals/**"
- "pyproject.toml"
workflow_dispatch: # Allow manual triggering
schedule:
# Run weekly to catch issues early
- cron: "0 2 * * 1" # Mondays at 2 AM UTC
permissions:
contents: read
pull-requests: write
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
evaluate-mcp-server:
runs-on: ubuntu-latest
timeout-minutes: 20
permissions:
contents: read
pull-requests: write
outputs:
has-openai-key: ${{ steps.check-openai.outputs.has-openai-key }}
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: "3.12"
cache: "pip"
- name: Setup Node.js for mcp-evals
uses: actions/setup-node@v6
with:
node-version: "20"
# Note: npm cache disabled - package-lock.json may not exist on scheduled runs
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -e .[dev,test] || pip install -e . || echo "Package installation failed"
- name: Install mcp-evals
continue-on-error: true
run: |
npm init -y
npm install mcp-evals || echo "mcp-evals installation failed"
- name: Prepare test environment
run: |
# Create a simple test server configuration
echo "SIMPLENOTE_EMAIL=test@example.com" >> .env.test
echo "SIMPLENOTE_PASSWORD=test_password" >> .env.test
echo "SIMPLENOTE_OFFLINE_MODE=true" >> .env.test
# Verify server can start
python -c "import simplenote_mcp.server; print('✅ Server module import successful')" || echo "⚠️ Server import failed"
- name: Check OpenAI API key availability
id: check-openai
run: |
if [[ -n "${{ secrets.OPENAI_API_KEY }}" ]]; then
echo "has-openai-key=true" >> $GITHUB_OUTPUT
echo "✅ OpenAI API key is available"
else
echo "has-openai-key=false" >> $GITHUB_OUTPUT
echo "⚠️ OpenAI API key is not available - evaluations will be skipped"
fi
- name: Run smoke tests
if: steps.check-openai.outputs.has-openai-key == 'true'
uses: mclenhard/mcp-evals@v1.0.14
timeout-minutes: 10
with:
evals_path: "evals/smoke-tests.yaml"
server_path: "simplenote_mcp_server.py"
openai_api_key: ${{ secrets.OPENAI_API_KEY }}
model: "gpt-4o-mini"
continue-on-error: true
- name: Run manual smoke tests (fallback)
if: steps.check-openai.outputs.has-openai-key == 'false'
run: |
echo "🧪 Running manual smoke tests without OpenAI..."
# Test server startup
timeout 30 python simplenote_mcp_server.py --help || echo "Server help command failed"
# Test basic functionality
python -c "
import sys
sys.path.insert(0, '.')
try:
from simplenote_mcp.server import SimplenoteServer
print('✅ Server class import successful')
except Exception as e:
print(f'❌ Server import failed: {e}')
try:
from simplenote_mcp.tools import note_tools
print('✅ Tools import successful')
except Exception as e:
print(f'❌ Tools import failed: {e}')
" || echo "Manual tests completed with errors"
- name: Run comprehensive evaluations
if: (github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'comprehensive-eval')) && steps.check-openai.outputs.has-openai-key == 'true'
uses: mclenhard/mcp-evals@v1.0.14
with:
evals_path: "evals/comprehensive-evals.yaml"
server_path: "simplenote_mcp_server.py"
openai_api_key: ${{ secrets.OPENAI_API_KEY }}
model: "gpt-4o"
continue-on-error: true
- name: Run basic evaluations
if: github.event_name != 'workflow_dispatch' && steps.check-openai.outputs.has-openai-key == 'true'
uses: mclenhard/mcp-evals@v1.0.14
with:
evals_path: "evals/simplenote-evals.yaml"
server_path: "simplenote_mcp_server.py"
openai_api_key: ${{ secrets.OPENAI_API_KEY }}
model: "gpt-4o-mini"
continue-on-error: true
- name: Generate evaluation summary
if: always()
run: |
echo "# MCP Evaluation Summary" > evaluation-summary.md
echo "" >> evaluation-summary.md
echo "## Configuration" >> evaluation-summary.md
echo "- OpenAI API Key Available: ${{ steps.check-openai.outputs.has-openai-key }}" >> evaluation-summary.md
echo "- Trigger: ${{ github.event_name }}" >> evaluation-summary.md
echo "- Repository: ${{ github.repository }}" >> evaluation-summary.md
echo "- Commit: ${{ github.sha }}" >> evaluation-summary.md
echo "" >> evaluation-summary.md
if [[ "${{ steps.check-openai.outputs.has-openai-key }}" == "false" ]]; then
echo "## Notice" >> evaluation-summary.md
echo "OpenAI API key was not available, so only manual tests were run." >> evaluation-summary.md
echo "To run full evaluations, please configure the OPENAI_API_KEY secret." >> evaluation-summary.md
else
echo "## Evaluation Status" >> evaluation-summary.md
echo "Full MCP evaluations were attempted with OpenAI integration." >> evaluation-summary.md
fi
- name: Upload evaluation summary
if: always()
uses: actions/upload-artifact@v6
with:
name: evaluation-summary
path: evaluation-summary.md
retention-days: 30
evaluate-pr-changes:
runs-on: ubuntu-latest
timeout-minutes: 10
if: github.event_name == 'pull_request'
needs: evaluate-mcp-server
permissions:
contents: read
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 0 # Need full history for diff
- name: Setup Node.js for mcp-evals
uses: actions/setup-node@v6
with:
node-version: "20"
# Note: npm cache disabled - package-lock.json may not exist
- name: Install mcp-evals
run: |
npm init -y
npm install mcp-evals
- name: Check for evaluation file changes
id: eval-changes
run: |
if git diff --name-only origin/main...HEAD | grep -E "^evals/.*\.yaml$"; then
echo "eval_files_changed=true" >> $GITHUB_OUTPUT
else
echo "eval_files_changed=false" >> $GITHUB_OUTPUT
fi
- name: Validate evaluation files
if: steps.eval-changes.outputs.eval_files_changed == 'true'
run: |
# Basic YAML validation
for file in evals/*.yaml; do
if [ -f "$file" ]; then
echo "Validating $file"
python -c "
import yaml
import sys
try:
with open('$file', 'r') as f:
yaml.safe_load(f)
print('✅ $file is valid YAML')
except Exception as e:
print(f'❌ Invalid YAML in $file: {e}')
sys.exit(1)
" || {
echo "❌ YAML validation failed for $file"
exit 1
}
fi
done
# Check for required fields in evaluation files
echo "Checking evaluation file structure..."
python -c "
import yaml
import os
required_fields = ['model', 'evals']
for yaml_file in [f for f in os.listdir('evals') if f.endswith('.yaml')]:
filepath = os.path.join('evals', yaml_file)
try:
with open(filepath, 'r') as f:
data = yaml.safe_load(f)
if not isinstance(data, dict):
print(f'⚠️ {yaml_file}: Root should be a dictionary')
continue
missing_fields = [field for field in required_fields if field not in data]
if missing_fields:
print(f'⚠️ {yaml_file}: Missing required fields: {missing_fields}')
else:
print(f'✅ {yaml_file}: Structure looks good')
except Exception as e:
print(f'❌ {yaml_file}: Error checking structure: {e}')
"
- name: Comment on PR with evaluation info
if: steps.eval-changes.outputs.eval_files_changed == 'true' && github.event_name == 'pull_request'
uses: actions/github-script@v8
continue-on-error: true
with:
script: |
const changedFiles = process.env.CHANGED_FILES || 'See file diff above';
const hasOpenAI = '${{ needs.evaluate-mcp-server.outputs.has-openai-key || 'false' }}' === 'true';
const body = `## 🧪 Evaluation Files Updated
This PR includes changes to evaluation files. The MCP evaluations will run automatically.
**OpenAI Integration**: ${hasOpenAI ? '✅ Available' : '⚠️ Not configured - manual tests only'}
**Tip**: Add the \`comprehensive-eval\` label to run the full evaluation suite.
### Modified evaluation files:
${changedFiles}
${hasOpenAI ? '' : '\n**Note**: To run full AI-powered evaluations, configure the `OPENAI_API_KEY` repository secret.'}
`;
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: body
});