#!/usr/bin/env python3
"""
Comprehensive test execution script for the Grants MCP Server.
This script simulates the GitHub Actions pipeline locally.
"""
import asyncio
import json
import os
import subprocess
import sys
import time
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import argparse
class Colors:
"""ANSI color codes for terminal output."""
GREEN = '\033[92m'
RED = '\033[91m'
YELLOW = '\033[93m'
BLUE = '\033[94m'
MAGENTA = '\033[95m'
CYAN = '\033[96m'
RESET = '\033[0m'
BOLD = '\033[1m'
class TestRunner:
"""Main test execution orchestrator."""
def __init__(self, project_root: Path):
self.project_root = project_root
self.results = {}
self.start_time = time.time()
def print_header(self, title: str):
"""Print a formatted header."""
print(f"\n{Colors.BOLD}{Colors.BLUE}{'='*60}")
print(f" {title}")
print(f"{'='*60}{Colors.RESET}\n")
def print_step(self, step: str):
"""Print a test step."""
print(f"{Colors.CYAN}▶ {step}...{Colors.RESET}")
def print_success(self, message: str):
"""Print a success message."""
print(f"{Colors.GREEN}✅ {message}{Colors.RESET}")
def print_warning(self, message: str):
"""Print a warning message."""
print(f"{Colors.YELLOW}⚠️ {message}{Colors.RESET}")
def print_error(self, message: str):
"""Print an error message."""
print(f"{Colors.RED}❌ {message}{Colors.RESET}")
def run_command(self, cmd: List[str], description: str, timeout: int = 300) -> Tuple[bool, str, str]:
"""Run a shell command and return success status and output."""
self.print_step(f"{description}")
start = time.time()
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
cwd=self.project_root
)
duration = time.time() - start
if result.returncode == 0:
self.print_success(f"{description} completed in {duration:.1f}s")
return True, result.stdout, result.stderr
else:
self.print_error(f"{description} failed (exit code {result.returncode})")
print(f"{Colors.RED}STDOUT: {result.stdout}{Colors.RESET}")
print(f"{Colors.RED}STDERR: {result.stderr}{Colors.RESET}")
return False, result.stdout, result.stderr
except subprocess.TimeoutExpired:
self.print_error(f"{description} timed out after {timeout}s")
return False, "", "Timeout expired"
except Exception as e:
self.print_error(f"{description} failed with exception: {e}")
return False, "", str(e)
def check_dependencies(self) -> bool:
"""Check if required dependencies are available."""
self.print_header("Checking Dependencies")
dependencies = [
(["python3", "--version"], "Python 3"),
(["pip", "--version"], "pip"),
(["node", "--version"], "Node.js"),
(["npm", "--version"], "npm"),
(["docker", "--version"], "Docker"),
]
all_ok = True
for cmd, name in dependencies:
success, stdout, stderr = self.run_command(cmd, f"Checking {name}")
if success:
version = stdout.strip().split('\n')[0]
print(f" {name}: {version}")
else:
all_ok = False
return all_ok
def install_dependencies(self) -> bool:
"""Install Python and Node.js dependencies."""
self.print_header("Installing Dependencies")
# Install Python dependencies
python_success, _, _ = self.run_command(
["python3", "-m", "pip", "install", "-r", "requirements.txt", "-r", "requirements-dev.txt"],
"Installing Python dependencies",
timeout=300
)
# Install Node.js dependencies
node_success, _, _ = self.run_command(
["npm", "ci"],
"Installing Node.js dependencies",
timeout=180
)
return python_success and node_success
def run_linting(self) -> bool:
"""Run code linting and type checking."""
self.print_header("Code Quality Checks")
# Python linting
flake8_success, _, _ = self.run_command(
["flake8", "src/", "tests/", "--max-line-length=100", "--extend-ignore=E203,W503"],
"Running flake8 linting"
)
# Type checking
mypy_success, _, _ = self.run_command(
["mypy", "src/", "--ignore-missing-imports"],
"Running mypy type checking"
)
# TypeScript build (also checks TypeScript syntax)
ts_success, _, _ = self.run_command(
["npm", "run", "build"],
"Building TypeScript"
)
return flake8_success and mypy_success and ts_success
def run_security_scans(self) -> bool:
"""Run security scanning tools."""
self.print_header("Security Scans")
# Bandit security scan
bandit_success, _, _ = self.run_command(
["bandit", "-r", "src/", "-f", "json", "-o", "security-report.json"],
"Running Bandit security scan"
)
# Safety dependency check
safety_success, _, _ = self.run_command(
["safety", "check", "--json", "--output", "security-deps.json"],
"Running Safety dependency check"
)
# Note: These tools might not be critical failures
if not bandit_success:
self.print_warning("Bandit security scan had issues (non-critical)")
if not safety_success:
self.print_warning("Safety dependency check had issues (non-critical)")
return True # Don't fail the entire pipeline on security warnings
def run_unit_tests(self) -> bool:
"""Run unit tests."""
self.print_header("Unit Tests")
success, stdout, stderr = self.run_command(
["pytest", "tests/unit/", "-v", "-m", "unit", "--junit-xml=junit-unit.xml"],
"Running unit tests"
)
self.results['unit_tests'] = {
'success': success,
'output': stdout,
'errors': stderr
}
return success
def run_integration_tests(self) -> bool:
"""Run integration tests."""
self.print_header("Integration Tests")
success, stdout, stderr = self.run_command(
["pytest", "tests/integration/", "-v", "-m", "integration", "--junit-xml=junit-integration.xml"],
"Running integration tests"
)
self.results['integration_tests'] = {
'success': success,
'output': stdout,
'errors': stderr
}
return success
def run_contract_tests(self) -> bool:
"""Run contract tests."""
self.print_header("Contract Tests")
success, stdout, stderr = self.run_command(
["pytest", "tests/contract/", "-v", "-m", "contract", "--junit-xml=junit-contract.xml"],
"Running contract tests"
)
self.results['contract_tests'] = {
'success': success,
'output': stdout,
'errors': stderr
}
return success
def run_edge_case_tests(self) -> bool:
"""Run edge case tests."""
self.print_header("Edge Case Tests")
success, stdout, stderr = self.run_command(
["pytest", "tests/edge_cases/", "-v", "-m", "edge_case", "--junit-xml=junit-edge.xml"],
"Running edge case tests"
)
self.results['edge_case_tests'] = {
'success': success,
'output': stdout,
'errors': stderr
}
return success
def run_performance_tests(self) -> bool:
"""Run performance tests."""
self.print_header("Performance Tests")
success, stdout, stderr = self.run_command(
["pytest", "tests/performance/", "-v", "-m", "performance", "--benchmark-only", "--benchmark-json=benchmark-results.json"],
"Running performance tests",
timeout=600 # Longer timeout for performance tests
)
self.results['performance_tests'] = {
'success': success,
'output': stdout,
'errors': stderr
}
return success
def run_typescript_tests(self) -> bool:
"""Run TypeScript tests."""
self.print_header("TypeScript Tests")
success, stdout, stderr = self.run_command(
["npm", "test"],
"Running TypeScript tests"
)
self.results['typescript_tests'] = {
'success': success,
'output': stdout,
'errors': stderr
}
return success
def run_live_api_tests(self, api_key: Optional[str] = None) -> bool:
"""Run live API tests if API key is provided."""
if not api_key:
self.print_warning("Skipping live API tests (no API key provided)")
return True
self.print_header("Live API Tests")
env = os.environ.copy()
env['USE_REAL_API'] = 'true'
env['API_KEY'] = api_key
process = subprocess.run(
["pytest", "tests/live/", "-v", "-m", "real_api", "--junit-xml=junit-live.xml"],
capture_output=True,
text=True,
timeout=900, # 15 minutes for live API tests
cwd=self.project_root,
env=env
)
success = process.returncode == 0
if success:
self.print_success("Live API tests completed")
else:
self.print_error("Live API tests failed")
print(f"{Colors.RED}STDOUT: {process.stdout}{Colors.RESET}")
print(f"{Colors.RED}STDERR: {process.stderr}{Colors.RESET}")
self.results['live_api_tests'] = {
'success': success,
'output': process.stdout,
'errors': process.stderr
}
return success
def run_docker_tests(self) -> bool:
"""Run Docker deployment tests."""
self.print_header("Docker Tests")
# Build Docker image
build_success, _, _ = self.run_command(
["docker", "build", "-t", "grants-mcp:test", "."],
"Building Docker image",
timeout=600
)
if not build_success:
return False
# Start container
start_success, _, _ = self.run_command(
["docker", "run", "-d", "--name", "grants-mcp-test", "-p", "8080:8080",
"-e", "SIMPLER_GRANTS_API_KEY=test_key", "grants-mcp:test"],
"Starting Docker container"
)
if not start_success:
return False
try:
# Wait for container to be ready
self.print_step("Waiting for container to be ready")
time.sleep(15)
# Test health endpoint
health_success, _, _ = self.run_command(
["curl", "-f", "http://localhost:8080/health"],
"Testing health endpoint"
)
# Test MCP endpoint
mcp_success, _, _ = self.run_command(
["curl", "-X", "POST", "http://localhost:8080/mcp",
"-H", "Content-Type: application/json",
"-d", '{"jsonrpc":"2.0","method":"tools/list","id":1}'],
"Testing MCP endpoint"
)
return health_success and mcp_success
finally:
# Cleanup container
self.run_command(
["docker", "stop", "grants-mcp-test"],
"Stopping Docker container"
)
self.run_command(
["docker", "rm", "grants-mcp-test"],
"Removing Docker container"
)
def run_coverage_tests(self) -> bool:
"""Run tests with coverage reporting."""
self.print_header("Coverage Tests")
success, stdout, stderr = self.run_command(
["pytest", "tests/unit/", "tests/integration/", "tests/contract/",
"--cov=src", "--cov-branch", "--cov-report=term-missing",
"--cov-report=html:htmlcov", "--cov-report=xml:coverage.xml"],
"Running tests with coverage"
)
if success:
self.print_success("Coverage report generated in htmlcov/index.html")
return success
def generate_report(self):
"""Generate a final test report."""
self.print_header("Test Results Summary")
total_duration = time.time() - self.start_time
print(f"Total execution time: {total_duration:.1f} seconds\n")
# Count results
total_tests = len(self.results)
passed_tests = sum(1 for result in self.results.values() if result.get('success', False))
failed_tests = total_tests - passed_tests
print(f"Test Categories: {total_tests}")
print(f"Passed: {passed_tests} ✅")
print(f"Failed: {failed_tests} ❌")
print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%" if total_tests > 0 else "N/A")
print("\nDetailed Results:")
for test_name, result in self.results.items():
status = "✅ PASS" if result.get('success', False) else "❌ FAIL"
print(f" {test_name}: {status}")
# Save results to JSON
report_data = {
'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
'duration_seconds': total_duration,
'total_tests': total_tests,
'passed_tests': passed_tests,
'failed_tests': failed_tests,
'success_rate': (passed_tests/total_tests)*100 if total_tests > 0 else 0,
'results': self.results
}
with open(self.project_root / 'test-report.json', 'w') as f:
json.dump(report_data, f, indent=2)
self.print_success("Test report saved to test-report.json")
return failed_tests == 0
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(description='Run comprehensive tests for Grants MCP Server')
parser.add_argument('--api-key', help='API key for live tests')
parser.add_argument('--skip-docker', action='store_true', help='Skip Docker tests')
parser.add_argument('--skip-performance', action='store_true', help='Skip performance tests')
parser.add_argument('--skip-live', action='store_true', help='Skip live API tests')
parser.add_argument('--quick', action='store_true', help='Run only unit and integration tests')
args = parser.parse_args()
# Get project root
project_root = Path(__file__).parent.parent
# Initialize test runner
runner = TestRunner(project_root)
print(f"{Colors.BOLD}{Colors.MAGENTA}")
print("╔═══════════════════════════════════════════════════════════════╗")
print("║ Grants MCP Server - Comprehensive Test Suite ║")
print("╚═══════════════════════════════════════════════════════════════╝")
print(f"{Colors.RESET}\n")
# Track overall success
overall_success = True
# Phase 1: Setup and Dependencies
if not runner.check_dependencies():
runner.print_error("Dependency check failed")
sys.exit(1)
if not runner.install_dependencies():
runner.print_error("Failed to install dependencies")
sys.exit(1)
# Phase 2: Code Quality
if not runner.run_linting():
runner.print_error("Code quality checks failed")
overall_success = False
runner.run_security_scans() # Non-critical
# Phase 3: Core Tests
if not runner.run_unit_tests():
overall_success = False
if not runner.run_integration_tests():
overall_success = False
if not runner.run_contract_tests():
overall_success = False
if not runner.run_edge_case_tests():
overall_success = False
if not runner.run_typescript_tests():
overall_success = False
# Phase 4: Extended Tests (if not quick mode)
if not args.quick:
if not args.skip_performance and not runner.run_performance_tests():
overall_success = False
if not args.skip_docker and not runner.run_docker_tests():
overall_success = False
if not args.skip_live and not runner.run_live_api_tests(args.api_key):
overall_success = False
# Phase 5: Coverage Report
if not runner.run_coverage_tests():
overall_success = False
# Generate final report
report_success = runner.generate_report()
# Final status
if overall_success and report_success:
runner.print_success("All tests completed successfully! 🎉")
sys.exit(0)
else:
runner.print_error("Some tests failed. Check the report for details.")
sys.exit(1)
if __name__ == '__main__':
main()