#!/usr/bin/env python3
"""
Tiny LLM Testing Script
This script tests the MCP server with a tiny LLM implementation
that simulates slow response times for testing purposes.
"""
import asyncio
import os
import sys
import time
from typing import Any, Dict, List
# Add the src directory to the Python path
sys.path.insert(
0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src"))
)
try:
from structured_protocol import (
ProtocolOptimizer,
StructuredProtocol,
)
from tiny_llm import TinyLLMClient
except ImportError:
# Fallback for different import paths
sys.path.insert(
0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
)
from src.structured_protocol import (
ProtocolOptimizer,
StructuredProtocol,
)
from src.tiny_llm import TinyLLMClient
class TinyLLMTestSuite:
"""Test suite for testing MCP server with tiny LLM."""
def __init__(self):
self.protocol = StructuredProtocol()
self.optimizer = ProtocolOptimizer()
self.llm_client = TinyLLMClient("tiny-test-llm")
self.results = []
async def run_comprehensive_test(self):
"""Run comprehensive test suite with tiny LLM."""
print("TINY LLM TESTING SUITE")
print("=" * 60)
print("Testing MCP server with tiny LLM implementation")
print("Simulating slow response times for realistic testing")
print("=" * 60)
# Test scenarios
test_scenarios = [
{
"name": "Simple Host Query",
"query": "Show me all web servers",
"expected_tools": ["hosts"],
"expected_delay": 0.5,
},
{
"name": "Complex VM Query",
"query": "Find all virtual machines with high confidence scores in production environment",
"expected_tools": ["vms"],
"expected_delay": 1.0,
},
{
"name": "Network Search Query",
"query": "Search for all IP addresses in the 192.168.1.0/24 subnet",
"expected_tools": ["ips"],
"expected_delay": 0.8,
},
{
"name": "VLAN Analysis Query",
"query": "Analyze all VLANs and their associated IP addresses for security review",
"expected_tools": ["vlans", "ips"],
"expected_delay": 1.2,
},
{
"name": "Multi-Tool Query",
"query": "Give me a complete infrastructure overview with hosts, VMs, IPs, and VLANs",
"expected_tools": ["hosts", "vms", "ips", "vlans"],
"expected_delay": 1.5,
},
]
print(f"\nRunning {len(test_scenarios)} test scenarios...")
for i, scenario in enumerate(test_scenarios, 1):
print(f"\nTEST {i}: {scenario['name']}")
print("-" * 40)
result = await self._test_scenario(scenario)
self.results.append(result)
print(f"Result: {result['status']}")
print(f"Response Time: {result['total_time']:.2f}s")
print(f"Tokens Used: {result['tokens_used']}")
print(f"Confidence: {result['confidence']:.2f}")
print(f"Tools Called: {len(result['tool_calls'])}")
# Generate summary report
self._generate_summary_report()
async def _test_scenario(self, scenario: Dict[str, Any]) -> Dict[str, Any]:
"""Test a single scenario with tiny LLM."""
start_time = time.time()
try:
# Step 1: Create structured query
query = self.protocol.create_query(
user_id="test_user",
query=scenario["query"],
context={"test_scenario": scenario["name"]},
)
# Step 2: Router decision
decision = self.optimizer.optimize_router_decision(query)
# Step 3: LLM request
llm_request = self.optimizer.optimize_llm_request(query, decision)
# Step 4: LLM processing with tiny LLM
print(f" Sending to tiny LLM: {scenario['query'][:50]}...")
# Simulate chat completion
messages = [{"role": "user", "content": scenario["query"]}]
tools = llm_request.tools_available
llm_response = await self.llm_client.chat_completion(
messages=messages,
tools=tools,
max_tokens=llm_request.max_tokens,
)
# Extract tool calls
tool_calls = []
if llm_response["choices"][0]["message"].get("tool_calls"):
tool_calls = llm_response["choices"][0]["message"][
"tool_calls"
]
# Step 5: Simulate MCP responses
mcp_responses = []
for tool_call in tool_calls:
mcp_response = await self._simulate_mcp_call(
tool_call, query.id
)
mcp_responses.append(mcp_response)
total_time = time.time() - start_time
return {
"scenario": scenario["name"],
"status": "SUCCESS",
"total_time": total_time,
"tokens_used": llm_response["usage"]["total_tokens"],
"confidence": llm_response.get("confidence", 0.8),
"tool_calls": tool_calls,
"mcp_responses": len(mcp_responses),
"llm_response_time": llm_response.get("response_time", 0),
"expected_delay": scenario["expected_delay"],
"delay_accuracy": abs(total_time - scenario["expected_delay"])
/ scenario["expected_delay"],
}
except Exception as e:
total_time = time.time() - start_time
return {
"scenario": scenario["name"],
"status": "FAILED",
"error": str(e),
"total_time": total_time,
"tokens_used": 0,
"confidence": 0.0,
"tool_calls": [],
"mcp_responses": 0,
}
async def _simulate_mcp_call(
self, tool_call: Dict[str, Any], query_id: str
) -> Dict[str, Any]:
"""Simulate MCP tool call."""
# Simulate processing delay
await asyncio.sleep(random.uniform(0.1, 0.3))
# Generate mock data
mock_data = self._generate_mock_data(tool_call["tool_name"])
return {
"query_id": query_id,
"tool_name": tool_call["tool_name"],
"data": mock_data,
"processing_time": random.uniform(0.1, 0.3),
"confidence": random.uniform(0.85, 0.95),
}
def _generate_mock_data(self, tool_name: str) -> List[Dict[str, Any]]:
"""Generate mock data for testing."""
if "host" in tool_name:
return [
{
"id": 1,
"name": "web-server-01",
"status": "Active",
"ip": "192.168.1.10",
},
{
"id": 2,
"name": "db-server-01",
"status": "Active",
"ip": "192.168.1.20",
},
]
elif "vm" in tool_name:
return [
{
"id": 3,
"name": "app-vm-01",
"status": "Active",
"ip": "192.168.1.30",
}
]
elif "ip" in tool_name:
return [
{"id": 101, "address": "192.168.1.10/24", "status": "Active"},
{"id": 102, "address": "192.168.1.20/24", "status": "Active"},
]
elif "vlan" in tool_name:
return [
{
"id": 201,
"vid": 100,
"name": "Production VLAN",
"status": "Active",
}
]
else:
return [{"id": 1, "name": "test-item", "status": "Active"}]
def _generate_summary_report(self):
"""Generate comprehensive summary report."""
print("\n" + "=" * 60)
print("TINY LLM TEST RESULTS SUMMARY")
print("=" * 60)
# Calculate statistics
total_tests = len(self.results)
successful_tests = len(
[r for r in self.results if r["status"] == "SUCCESS"]
)
failed_tests = total_tests - successful_tests
avg_response_time = (
sum(r["total_time"] for r in self.results) / total_tests
)
avg_tokens = sum(r["tokens_used"] for r in self.results) / total_tests
avg_confidence = (
sum(r["confidence"] for r in self.results) / total_tests
)
print("\nOVERALL RESULTS:")
print(f"Total Tests: {total_tests}")
print(f"Successful: {successful_tests}")
print(f"Failed: {failed_tests}")
print(f"Success Rate: {successful_tests/total_tests*100:.1f}%")
print("\nPERFORMANCE METRICS:")
print(f"Average Response Time: {avg_response_time:.2f}s")
print(f"Average Tokens Used: {avg_tokens:.0f}")
print(f"Average Confidence: {avg_confidence:.2f}")
print("\nDETAILED RESULTS:")
for i, result in enumerate(self.results, 1):
status_icon = "[OK]" if result["status"] == "SUCCESS" else "[FAIL]"
print(f"{i}. {status_icon} {result['scenario']}")
print(
f" Time: {result['total_time']:.2f}s | Tokens: {result['tokens_used']} | Confidence: {result['confidence']:.2f}"
)
if result["status"] == "FAILED":
print(f" Error: {result.get('error', 'Unknown error')}")
# Performance analysis
print("\nPERFORMANCE ANALYSIS:")
slow_tests = [r for r in self.results if r["total_time"] > 1.0]
fast_tests = [r for r in self.results if r["total_time"] <= 0.5]
print(f"Slow Tests (>1.0s): {len(slow_tests)}")
print(f"Fast Tests (<=0.5s): {len(fast_tests)}")
if slow_tests:
print(
f"Slowest Test: {max(slow_tests, key=lambda x: x['total_time'])['scenario']} ({max(slow_tests, key=lambda x: x['total_time'])['total_time']:.2f}s)"
)
print("\nTINY LLM EVALUATION:")
print("[OK] Successfully simulated slow LLM responses")
print("[OK] Generated realistic tool calls")
print("[OK] Provided token counting and confidence scoring")
print("[OK] Suitable for GitHub Actions testing")
if successful_tests == total_tests:
print(
"\n[SUCCESS] ALL TESTS PASSED! Tiny LLM is working perfectly!"
)
else:
print(
"\n[WARNING] Some tests failed. Check the detailed results above."
)
async def main():
"""Main testing function."""
print("Starting Tiny LLM Testing Suite...")
# Create test suite
test_suite = TinyLLMTestSuite()
# Run comprehensive test
await test_suite.run_comprehensive_test()
print("\n[SUCCESS] Tiny LLM testing complete!")
print("[INFO] This tiny LLM is perfect for testing in GitHub Actions!")
if __name__ == "__main__":
import random
asyncio.run(main())