#!/usr/bin/env python3
"""
Comprehensive test suite for Gemini MCP Server.
Runs 25+ tests to verify robustness and error handling.
"""
import sys
import os
# Add the server directory to the path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from server import (
run_gemini,
gemini_resume,
gemini_list_sessions,
gemini_list_extensions,
gemini_version,
strip_ansi,
filter_stderr,
process_gemini_output
)
import subprocess
from typing import Callable, Any
import time
# Test counters
passed = 0
failed = 0
test_results = []
def test(name: str, func: Callable, expected_check: Callable[[Any], bool], description: str = ""):
"""Run a single test and record results."""
global passed, failed
try:
result = func()
success = expected_check(result)
status = "✅ PASS" if success else "❌ FAIL"
if success:
passed += 1
else:
failed += 1
test_results.append({
"name": name,
"status": status,
"result": result[:200] + "..." if len(str(result)) > 200 else result,
"description": description
})
print(f"{status}: {name}")
if not success:
print(f" Result: {result[:100]}...")
except Exception as e:
failed += 1
test_results.append({
"name": name,
"status": "❌ ERROR",
"result": str(e),
"description": description
})
print(f"❌ ERROR: {name} - {e}")
def run_all_tests():
"""Execute all tests."""
print("=" * 60)
print("GEMINI MCP SERVER - COMPREHENSIVE TEST SUITE")
print("=" * 60)
print()
# ========== CATEGORY 1: BASIC FUNCTIONALITY ==========
print("\n--- Category 1: Basic Functionality ---\n")
# Test 1: Version check
test(
"T01: gemini_version()",
lambda: gemini_version(),
lambda r: r and "." in r and not r.startswith("Error"),
"Basic version retrieval"
)
# Test 2: List extensions
test(
"T02: gemini_list_extensions()",
lambda: gemini_list_extensions(),
lambda r: "extensions" in r.lower() or "-" in r,
"List available extensions"
)
# Test 3: List sessions
test(
"T03: gemini_list_sessions()",
lambda: gemini_list_sessions(),
lambda r: r is not None and not r.startswith("Error: 'gemini'"),
"List available sessions"
)
# ========== CATEGORY 2: MATH PROMPTS ==========
print("\n--- Category 2: Math Prompts ---\n")
# Test 4: Simple arithmetic
test(
"T04: Simple math (5 + 5)",
lambda: run_gemini("What is 5 + 5? Answer with just the number."),
lambda r: "10" in r,
"Basic addition"
)
# Test 5: Multiplication
test(
"T05: Multiplication (12 * 12)",
lambda: run_gemini("What is 12 * 12? Answer with just the number."),
lambda r: "144" in r,
"Basic multiplication"
)
# Test 6: Division
test(
"T06: Division (100 / 5)",
lambda: run_gemini("What is 100 / 5? Answer with just the number."),
lambda r: "20" in r,
"Basic division"
)
# Test 7: Complex math
test(
"T07: Complex math (sqrt of 81)",
lambda: run_gemini("What is the square root of 81? Answer with just the number."),
lambda r: "9" in r,
"Square root calculation"
)
# ========== CATEGORY 3: CODE GENERATION ==========
print("\n--- Category 3: Code Generation ---\n")
# Test 8: Python function
test(
"T08: Python function generation",
lambda: run_gemini("Write a Python function that adds two numbers. Just the code, no explanation."),
lambda r: "def " in r and "return" in r,
"Generate Python function"
)
# Test 9: JavaScript code
test(
"T09: JavaScript code generation",
lambda: run_gemini("Write a JavaScript arrow function that doubles a number. Just the code."),
lambda r: "=>" in r or "function" in r,
"Generate JavaScript function"
)
# Test 10: SQL query
test(
"T10: SQL query generation",
lambda: run_gemini("Write a SQL query to select all users older than 18. Just the query."),
lambda r: "SELECT" in r.upper() and "FROM" in r.upper(),
"Generate SQL query"
)
# ========== CATEGORY 4: KNOWLEDGE QUERIES ==========
print("\n--- Category 4: Knowledge Queries ---\n")
# Test 11: General knowledge
test(
"T11: General knowledge (capital of France)",
lambda: run_gemini("What is the capital of France? One word answer."),
lambda r: "Paris" in r,
"Basic factual question"
)
# Test 12: Science question
test(
"T12: Science (chemical symbol for water)",
lambda: run_gemini("What is the chemical formula for water? Just the formula."),
lambda r: "H2O" in r or "H₂O" in r,
"Chemistry question"
)
# Test 13: Technology question
test(
"T13: Technology (what is JSON)",
lambda: run_gemini("What does JSON stand for? Just the full name."),
lambda r: "JavaScript" in r and "Object" in r and "Notation" in r,
"Technology acronym"
)
# ========== CATEGORY 5: EDGE CASES ==========
print("\n--- Category 5: Edge Cases ---\n")
# Test 14: Very short prompt
test(
"T14: Very short prompt ('Hi')",
lambda: run_gemini("Hi"),
lambda r: r and len(r) > 0 and not r.startswith("Error"),
"Handle minimal input"
)
# Test 15: Prompt with special characters
test(
"T15: Special characters prompt",
lambda: run_gemini("What is 5 + 5? @#$%^& (ignore symbols)"),
lambda r: "10" in r,
"Handle special characters"
)
# Test 16: Prompt with quotes
test(
"T16: Prompt with quotes",
lambda: run_gemini('What is the meaning of "hello"?'),
lambda r: r and len(r) > 5 and not r.startswith("Error"),
"Handle quotes in prompt"
)
# Test 17: Prompt with newlines
test(
"T17: Prompt with newlines",
lambda: run_gemini("Line 1\nLine 2\nWhat is 2+2?"),
lambda r: "4" in r or r and not r.startswith("Error"),
"Handle newlines in prompt"
)
# Test 18: Unicode prompt
test(
"T18: Unicode prompt",
lambda: run_gemini("What is the emoji for a heart? Just the emoji."),
lambda r: r and len(r) > 0,
"Handle unicode/emoji"
)
# ========== CATEGORY 6: PARAMETER VARIATIONS ==========
print("\n--- Category 6: Parameter Variations ---\n")
# Test 19: With sandbox mode
test(
"T19: Sandbox mode enabled",
lambda: run_gemini("What is 3 + 3?", sandbox=True),
lambda r: r and not r.startswith("Error: 'gemini'"),
"Run with sandbox mode"
)
# Test 20: Without yolo mode
test(
"T20: Without yolo mode",
lambda: run_gemini("What is 4 + 4?", yolo=False),
lambda r: r and not r.startswith("Error: 'gemini'"),
"Run without yolo mode"
)
# Test 21: With specific model (if exists)
test(
"T21: With model parameter",
lambda: run_gemini("Say 'test'", model="gemini-2.0-flash-exp"),
lambda r: r and not r.startswith("Error: 'gemini'"),
"Run with specific model"
)
# ========== CATEGORY 7: HELPER FUNCTIONS ==========
print("\n--- Category 7: Helper Functions ---\n")
# Test 22: strip_ansi function
test(
"T22: strip_ansi with ANSI codes",
lambda: strip_ansi("\x1b[32mGreen\x1b[0m text"),
lambda r: r == "Green text",
"Strip ANSI escape codes"
)
# Test 23: strip_ansi with no ANSI
test(
"T23: strip_ansi without ANSI codes",
lambda: strip_ansi("Plain text"),
lambda r: r == "Plain text",
"Handle plain text in strip_ansi"
)
# Test 24: filter_stderr with noise
test(
"T24: filter_stderr removes noise",
lambda: filter_stderr("[STARTUP] 0.01s Loading extension: test\nReal error message"),
lambda r: "Real error message" in r and "[STARTUP]" not in r,
"Filter out stderr noise"
)
# Test 25: filter_stderr with empty input
test(
"T25: filter_stderr with empty string",
lambda: filter_stderr(""),
lambda r: r == "",
"Handle empty stderr"
)
# Test 26: filter_stderr keep_extensions
test(
"T26: filter_stderr keep_extensions=True",
lambda: filter_stderr("Installed extensions:\n- test-ext\n- another-ext", keep_extensions=True),
lambda r: "test-ext" in r and "another-ext" in r,
"Keep extensions when requested"
)
# ========== CATEGORY 8: SESSION HANDLING ==========
print("\n--- Category 8: Session Handling ---\n")
# Test 27: Resume with invalid session
test(
"T27: Resume invalid session",
lambda: gemini_resume(session="nonexistent_session_12345"),
lambda r: r is not None, # Should return something, not crash
"Handle invalid session gracefully"
)
# Test 28: Resume latest (may or may not exist)
test(
"T28: Resume latest session",
lambda: gemini_resume(session="latest"),
lambda r: r is not None, # Should return something, not crash
"Resume latest session"
)
# ========== CATEGORY 9: CREATIVE/COMPLEX PROMPTS ==========
print("\n--- Category 9: Creative/Complex Prompts ---\n")
# Test 29: List generation
test(
"T29: Generate a list",
lambda: run_gemini("List 3 programming languages. Just the names, one per line."),
lambda r: len(r.split('\n')) >= 1 or any(lang in r for lang in ['Python', 'Java', 'JavaScript', 'C', 'Go', 'Rust']),
"Generate list content"
)
# Test 30: Boolean question
test(
"T30: Boolean question",
lambda: run_gemini("Is the sky blue? Answer only yes or no."),
lambda r: "yes" in r.lower() or "no" in r.lower(),
"Handle yes/no questions"
)
# ========== SUMMARY ==========
print("\n" + "=" * 60)
print("TEST SUMMARY")
print("=" * 60)
print(f"Total Tests: {passed + failed}")
print(f"Passed: {passed} ✅")
print(f"Failed: {failed} ❌")
print(f"Success Rate: {(passed / (passed + failed) * 100):.1f}%")
print("=" * 60)
if failed > 0:
print("\nFailed Tests:")
for t in test_results:
if "FAIL" in t["status"] or "ERROR" in t["status"]:
print(f" - {t['name']}: {t['result'][:80]}...")
return failed == 0
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)