#!/usr/bin/env python3
"""
Comprehensive Feature Validation Script
This script validates ALL features of the NetBox MCP Server without requiring
a live user or external services. It proves that every component works correctly.
"""
import asyncio
import json
import time
import sys
import os
from typing import Dict, Any, List, Tuple
from dataclasses import dataclass
# Add the src directory to the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src')))
try:
from tiny_llm import TinyLLM, TinyLLMClient
from structured_protocol import (
StructuredProtocol, ProtocolOptimizer, StructuredQuery, RouterDecision,
LLMRequest, LLMResponse, MCPRequest, MCPResponse, FinalResponse,
ToolCategory, ModelLocation
)
from server import list_tools, call_tool
from vault_client import VaultClient
from netbox_client import NetBoxClient
from state_confidence import StateConfidenceClient
except ImportError:
# Fallback for different import paths
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from src.tiny_llm import TinyLLM, TinyLLMClient
from src.structured_protocol import (
StructuredProtocol, ProtocolOptimizer, StructuredQuery, RouterDecision,
LLMRequest, LLMResponse, MCPRequest, MCPResponse, FinalResponse,
ToolCategory, ModelLocation
)
from src.server import list_tools, call_tool
from src.vault_client import VaultClient
from src.netbox_client import NetBoxClient
from src.state_confidence import StateConfidenceClient
@dataclass
class ValidationResult:
"""Result of a validation test."""
feature: str
status: str
duration: float
details: str
error: str = None
class ComprehensiveValidator:
"""Comprehensive validator for all MCP server features."""
def __init__(self):
self.results: List[ValidationResult] = []
self.protocol = StructuredProtocol()
self.optimizer = ProtocolOptimizer()
self.llm_client = TinyLLMClient("validation-llm")
async def validate_all_features(self) -> bool:
"""Validate all features comprehensively."""
print("COMPREHENSIVE FEATURE VALIDATION")
print("=" * 60)
print("Validating ALL MCP server features without live user")
print("=" * 60)
# Core MCP Server Features
await self._validate_mcp_server_core()
# Tiny LLM Features
await self._validate_tiny_llm_features()
# Structured Protocol Features
await self._validate_structured_protocol_features()
# Performance Features
await self._validate_performance_features()
# Integration Features
await self._validate_integration_features()
# Error Handling Features
await self._validate_error_handling_features()
# Edge Case Features
await self._validate_edge_case_features()
# Generate comprehensive report
return self._generate_validation_report()
async def _validate_mcp_server_core(self):
"""Validate core MCP server functionality."""
print("\n1. VALIDATING MCP SERVER CORE FEATURES")
print("-" * 40)
# Test tool discovery
start_time = time.time()
try:
tools = await list_tools()
duration = time.time() - start_time
assert len(tools) > 0, "No tools discovered"
assert all(hasattr(tool, 'name') for tool in tools), "Tools missing name attribute"
assert all(hasattr(tool, 'description') for tool in tools), "Tools missing description attribute"
self.results.append(ValidationResult(
feature="Tool Discovery",
status="PASS",
duration=duration,
details=f"Discovered {len(tools)} tools successfully"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Tool Discovery",
status="FAIL",
duration=time.time() - start_time,
details="Failed to discover tools",
error=str(e)
))
# Test tool categories
start_time = time.time()
try:
tools = await list_tools()
tool_names = [tool.name for tool in tools]
expected_categories = {
'hosts': ['list_hosts', 'get_host', 'search_hosts'],
'vms': ['list_vms', 'get_vm', 'list_vm_interfaces'],
'ips': ['list_ips', 'get_ip', 'search_ips'],
'vlans': ['list_vlans', 'get_vlan', 'list_vlan_ips']
}
for category, expected_tools in expected_categories.items():
for tool_name in expected_tools:
assert tool_name in tool_names, f"Missing {category} tool: {tool_name}"
self.results.append(ValidationResult(
feature="Tool Categories",
status="PASS",
duration=time.time() - start_time,
details="All expected tool categories present"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Tool Categories",
status="FAIL",
duration=time.time() - start_time,
details="Missing expected tool categories",
error=str(e)
))
async def _validate_tiny_llm_features(self):
"""Validate tiny LLM functionality."""
print("\n2. VALIDATING TINY LLM FEATURES")
print("-" * 40)
# Test basic response generation
start_time = time.time()
try:
llm = TinyLLM("test-llm", base_delay=0.01, max_delay=0.02)
response = await llm.generate_response("Test query", tools=None, max_tokens=50)
assert response.content is not None, "No content generated"
assert response.tokens_used > 0, "No tokens counted"
assert response.processing_time > 0, "No processing time recorded"
assert 0.0 <= response.confidence <= 1.0, "Invalid confidence score"
self.results.append(ValidationResult(
feature="Tiny LLM Response Generation",
status="PASS",
duration=time.time() - start_time,
details=f"Generated {response.tokens_used} tokens with {response.confidence:.2f} confidence"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Tiny LLM Response Generation",
status="FAIL",
duration=time.time() - start_time,
details="Failed to generate response",
error=str(e)
))
# Test tool call generation
start_time = time.time()
try:
tools = [{"name": "list_hosts", "category": "hosts"}]
tool_calls = await llm.generate_tool_calls("Show me all servers", tools)
assert isinstance(tool_calls, list), "Tool calls not returned as list"
assert len(tool_calls) >= 0, "Invalid tool calls count"
self.results.append(ValidationResult(
feature="Tiny LLM Tool Calls",
status="PASS",
duration=time.time() - start_time,
details=f"Generated {len(tool_calls)} tool calls"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Tiny LLM Tool Calls",
status="FAIL",
duration=time.time() - start_time,
details="Failed to generate tool calls",
error=str(e)
))
# Test client API
start_time = time.time()
try:
client = TinyLLMClient("test-client")
messages = [{"role": "user", "content": "Test query"}]
response = await client.chat_completion(messages, max_tokens=50)
assert "choices" in response, "Missing choices in response"
assert "usage" in response, "Missing usage in response"
assert response["choices"][0]["message"]["role"] == "assistant", "Invalid message role"
self.results.append(ValidationResult(
feature="Tiny LLM Client API",
status="PASS",
duration=time.time() - start_time,
details="Client API working correctly"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Tiny LLM Client API",
status="FAIL",
duration=time.time() - start_time,
details="Client API failed",
error=str(e)
))
async def _validate_structured_protocol_features(self):
"""Validate structured protocol functionality."""
print("\n3. VALIDATING STRUCTURED PROTOCOL FEATURES")
print("-" * 40)
# Test query creation
start_time = time.time()
try:
query = self.protocol.create_query(
user_id="test_user",
query="Show me all servers",
context={"test": True}
)
assert query.user_id == "test_user", "Invalid user_id"
assert query.query == "Show me all servers", "Invalid query"
assert query.context["test"] is True, "Invalid context"
self.results.append(ValidationResult(
feature="Structured Query Creation",
status="PASS",
duration=time.time() - start_time,
details="Query created successfully"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Structured Query Creation",
status="FAIL",
duration=time.time() - start_time,
details="Failed to create query",
error=str(e)
))
# Test router decision
start_time = time.time()
try:
decision = self.optimizer.optimize_router_decision(query)
assert decision.model_location in [ModelLocation.LOCAL, ModelLocation.CLOUD], "Invalid model location"
assert isinstance(decision.tools_needed, list), "Tools needed not a list"
assert decision.priority >= 1, "Invalid priority"
self.results.append(ValidationResult(
feature="Router Decision Optimization",
status="PASS",
duration=time.time() - start_time,
details=f"Decision made with priority {decision.priority}"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Router Decision Optimization",
status="FAIL",
duration=time.time() - start_time,
details="Failed to optimize router decision",
error=str(e)
))
# Test LLM request optimization
start_time = time.time()
try:
llm_request = self.optimizer.optimize_llm_request(query, decision)
assert isinstance(llm_request.tools_available, list), "Tools available not a list"
assert llm_request.max_tokens > 0, "Invalid max tokens"
assert llm_request.temperature >= 0.0, "Invalid temperature"
self.results.append(ValidationResult(
feature="LLM Request Optimization",
status="PASS",
duration=time.time() - start_time,
details=f"Request optimized with {llm_request.max_tokens} max tokens"
))
except Exception as e:
self.results.append(ValidationResult(
feature="LLM Request Optimization",
status="FAIL",
duration=time.time() - start_time,
details="Failed to optimize LLM request",
error=str(e)
))
async def _validate_performance_features(self):
"""Validate performance features."""
print("\n4. VALIDATING PERFORMANCE FEATURES")
print("-" * 40)
# Test response time
start_time = time.time()
try:
llm = TinyLLM("perf-test", base_delay=0.1, max_delay=0.2)
response = await llm.generate_response("Performance test", tools=None, max_tokens=50)
assert response.processing_time > 0, "No processing time recorded"
assert response.processing_time <= 0.5, "Response too slow"
self.results.append(ValidationResult(
feature="Response Time Performance",
status="PASS",
duration=time.time() - start_time,
details=f"Response time: {response.processing_time:.3f}s"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Response Time Performance",
status="FAIL",
duration=time.time() - start_time,
details="Performance test failed",
error=str(e)
))
# Test concurrent operations
start_time = time.time()
try:
tasks = []
for i in range(5):
task = self.llm_client.chat_completion(
[{"role": "user", "content": f"Test query {i}"}],
max_tokens=50
)
tasks.append(task)
responses = await asyncio.gather(*tasks)
assert len(responses) == 5, "Not all concurrent requests completed"
assert all("choices" in r for r in responses), "Some responses missing choices"
self.results.append(ValidationResult(
feature="Concurrent Operations",
status="PASS",
duration=time.time() - start_time,
details=f"Completed {len(responses)} concurrent requests"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Concurrent Operations",
status="FAIL",
duration=time.time() - start_time,
details="Concurrent operations failed",
error=str(e)
))
async def _validate_integration_features(self):
"""Validate integration features."""
print("\n5. VALIDATING INTEGRATION FEATURES")
print("-" * 40)
# Test end-to-end workflow
start_time = time.time()
try:
# Create query
query = self.protocol.create_query(
user_id="integration_test",
query="Show me all web servers",
context={"integration": True}
)
# Router decision
decision = self.optimizer.optimize_router_decision(query)
# LLM request
llm_request = self.optimizer.optimize_llm_request(query, decision)
# LLM processing
messages = [{"role": "user", "content": query.query}]
llm_response = await self.llm_client.chat_completion(
messages=messages,
tools=llm_request.tools_available,
max_tokens=llm_request.max_tokens
)
# Verify response
assert llm_response["choices"][0]["message"]["role"] == "assistant", "Invalid LLM response"
assert llm_response["usage"]["total_tokens"] > 0, "No tokens used"
self.results.append(ValidationResult(
feature="End-to-End Integration",
status="PASS",
duration=time.time() - start_time,
details="Complete workflow executed successfully"
))
except Exception as e:
self.results.append(ValidationResult(
feature="End-to-End Integration",
status="FAIL",
duration=time.time() - start_time,
details="End-to-end integration failed",
error=str(e)
))
async def _validate_error_handling_features(self):
"""Validate error handling features."""
print("\n6. VALIDATING ERROR HANDLING FEATURES")
print("-" * 40)
# Test empty prompt handling
start_time = time.time()
try:
llm = TinyLLM("error-test")
response = await llm.generate_response("", tools=None, max_tokens=50)
assert response.content is not None, "No content for empty prompt"
assert response.tokens_used >= 0, "Invalid token count for empty prompt"
self.results.append(ValidationResult(
feature="Empty Prompt Handling",
status="PASS",
duration=time.time() - start_time,
details="Empty prompt handled gracefully"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Empty Prompt Handling",
status="FAIL",
duration=time.time() - start_time,
details="Empty prompt handling failed",
error=str(e)
))
# Test invalid parameters
start_time = time.time()
try:
client = TinyLLMClient("error-test")
# Test with empty messages
try:
await client.chat_completion([], max_tokens=50)
assert False, "Should have raised ValueError"
except ValueError:
pass # Expected
self.results.append(ValidationResult(
feature="Invalid Parameter Handling",
status="PASS",
duration=time.time() - start_time,
details="Invalid parameters handled correctly"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Invalid Parameter Handling",
status="FAIL",
duration=time.time() - start_time,
details="Invalid parameter handling failed",
error=str(e)
))
async def _validate_edge_case_features(self):
"""Validate edge case features."""
print("\n7. VALIDATING EDGE CASE FEATURES")
print("-" * 40)
# Test very long prompt
start_time = time.time()
try:
llm = TinyLLM("edge-test")
long_prompt = "x" * 10000
response = await llm.generate_response(long_prompt, tools=None, max_tokens=100)
assert response.content is not None, "No content for long prompt"
assert response.tokens_used > 0, "No tokens for long prompt"
self.results.append(ValidationResult(
feature="Long Prompt Handling",
status="PASS",
duration=time.time() - start_time,
details=f"Handled {len(long_prompt)} character prompt"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Long Prompt Handling",
status="FAIL",
duration=time.time() - start_time,
details="Long prompt handling failed",
error=str(e)
))
# Test many tools
start_time = time.time()
try:
many_tools = [{"name": f"tool_{i}", "category": "test"} for i in range(20)]
tool_calls = await llm.generate_tool_calls("Test query", many_tools)
assert isinstance(tool_calls, list), "Tool calls not returned as list"
assert len(tool_calls) <= 3, "Too many tool calls generated"
self.results.append(ValidationResult(
feature="Many Tools Handling",
status="PASS",
duration=time.time() - start_time,
details=f"Handled {len(many_tools)} tools, generated {len(tool_calls)} calls"
))
except Exception as e:
self.results.append(ValidationResult(
feature="Many Tools Handling",
status="FAIL",
duration=time.time() - start_time,
details="Many tools handling failed",
error=str(e)
))
def _generate_validation_report(self) -> bool:
"""Generate comprehensive validation report."""
print(f"\n" + "=" * 60)
print("COMPREHENSIVE VALIDATION REPORT")
print("=" * 60)
# Calculate statistics
total_tests = len(self.results)
passed_tests = len([r for r in self.results if r.status == "PASS"])
failed_tests = total_tests - passed_tests
avg_duration = sum(r.duration for r in self.results) / total_tests
total_duration = sum(r.duration for r in self.results)
print(f"\nOVERALL RESULTS:")
print(f"Total Tests: {total_tests}")
print(f"Passed: {passed_tests}")
print(f"Failed: {failed_tests}")
print(f"Success Rate: {passed_tests/total_tests*100:.1f}%")
print(f"Total Duration: {total_duration:.2f}s")
print(f"Average Duration: {avg_duration:.3f}s")
print(f"\nDETAILED RESULTS:")
for i, result in enumerate(self.results, 1):
status_icon = "[PASS]" if result.status == "PASS" else "[FAIL]"
print(f"{i:2d}. {status_icon} {result.feature}")
print(f" Duration: {result.duration:.3f}s | {result.details}")
if result.error:
print(f" Error: {result.error}")
# Feature categories
categories = {}
for result in self.results:
category = result.feature.split()[0]
if category not in categories:
categories[category] = {"passed": 0, "total": 0}
categories[category]["total"] += 1
if result.status == "PASS":
categories[category]["passed"] += 1
print(f"\nFEATURE CATEGORIES:")
for category, stats in categories.items():
success_rate = stats["passed"] / stats["total"] * 100
print(f"{category}: {stats['passed']}/{stats['total']} ({success_rate:.1f}%)")
# Performance analysis
slow_tests = [r for r in self.results if r.duration > 1.0]
fast_tests = [r for r in self.results if r.duration <= 0.1]
print(f"\nPERFORMANCE ANALYSIS:")
print(f"Slow Tests (>1.0s): {len(slow_tests)}")
print(f"Fast Tests (<=0.1s): {len(fast_tests)}")
if slow_tests:
slowest = max(slow_tests, key=lambda x: x.duration)
print(f"Slowest Test: {slowest.feature} ({slowest.duration:.3f}s)")
# Final assessment
print(f"\nFINAL ASSESSMENT:")
if passed_tests == total_tests:
print(f"[SUCCESS] ALL FEATURES VALIDATED SUCCESSFULLY!")
print(f"[INFO] MCP server is ready for production use")
print(f"[INFO] All features work without live user interaction")
return True
else:
print(f"[WARNING] {failed_tests} features failed validation")
print(f"[INFO] Check failed features before production use")
return False
async def main():
"""Main validation function."""
print("Starting Comprehensive Feature Validation...")
validator = ComprehensiveValidator()
success = await validator.validate_all_features()
if success:
print(f"\n[SUCCESS] All features validated successfully!")
print(f"[INFO] MCP server is production-ready!")
return 0
else:
print(f"\n[FAILURE] Some features failed validation!")
print(f"[INFO] Check the detailed report above")
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)