Skip to main content
Glama

NetBox MCP Server

by fringemonkey
test_llm_optimization.py14 kB
"""Tests for LLM-specific optimizations.""" import pytest import time import json from unittest.mock import Mock, patch from src.llm_optimizer import LLMOptimizer, LLMResponseFormatter, LLMCache, TokenEstimator class TestLLMOptimizer: """Test LLM optimization functionality.""" @pytest.fixture def llm_optimizer(self): """Create LLM optimizer for testing.""" return LLMOptimizer(max_workers=2) @pytest.fixture def sample_netbox_data(self): """Sample NetBox data for testing.""" return [ { "id": 1, "name": "web-server-01", "display": "web-server-01", "status": {"label": "Active", "value": "active"}, "primary_ip4": {"address": "192.168.1.10/24", "id": 101}, "device_role": {"slug": "web-server", "display": "Web Server"}, "site": {"slug": "dc1", "display": "Data Center 1"}, "certainty_score": 0.95 }, { "id": 2, "name": "db-server-01", "display": "db-server-01", "status": {"label": "Active", "value": "active"}, "primary_ip4": {"address": "192.168.1.20/24", "id": 102}, "device_role": {"slug": "database-server", "display": "Database Server"}, "site": {"slug": "dc1", "display": "Data Center 1"}, "certainty_score": 0.92 } ] def test_optimize_list_response(self, llm_optimizer, sample_netbox_data): """Test optimization of list responses for LLMs.""" response = llm_optimizer.optimize_for_llm(sample_netbox_data, "list") assert isinstance(response.content, str) assert "Found 2 items" in response.content assert "web-server-01" in response.content assert "db-server-01" in response.content assert "Device (2 items)" in response.content assert response.confidence > 0.9 assert response.token_count > 0 def test_optimize_detail_response(self, llm_optimizer, sample_netbox_data): """Test optimization of detail responses for LLMs.""" response = llm_optimizer.optimize_for_llm([sample_netbox_data[0]], "detail") assert isinstance(response.content, str) assert "web-server-01 Details" in response.content assert "**Name**: web-server-01" in response.content assert "**Status**:" in response.content and "Active" in response.content assert "**Primary Ip4**: 192.168.1.10/24" in response.content def test_optimize_search_response(self, llm_optimizer, sample_netbox_data): """Test optimization of search responses for LLMs.""" response = llm_optimizer.optimize_for_llm(sample_netbox_data, "search") assert isinstance(response.content, str) assert "Found 2 matching results" in response.content assert "1. **web-server-01**" in response.content assert "2. **db-server-01**" in response.content assert "Type: device" in response.content def test_empty_response_handling(self, llm_optimizer): """Test handling of empty responses.""" response = llm_optimizer.optimize_for_llm([], "list") assert response.content == "No items found." assert response.confidence == 0.0 assert response.token_count >= 0 # Empty response still has some tokens def test_confidence_calculation(self, llm_optimizer): """Test confidence score calculation.""" data_with_scores = [ {"name": "test1", "certainty_score": 0.8}, {"name": "test2", "certainty_score": 0.9} ] response = llm_optimizer.optimize_for_llm(data_with_scores, "list") assert abs(response.confidence - 0.85) < 0.01 # Average of 0.8 and 0.9 (with floating point tolerance) def test_metadata_creation(self, llm_optimizer, sample_netbox_data): """Test LLM metadata creation.""" response = llm_optimizer.optimize_for_llm(sample_netbox_data, "list") assert response.metadata['count'] == 2 assert response.metadata['type'] == "list" assert response.metadata['optimized_for_llm'] is True assert 'device' in response.metadata['data_types'] assert response.metadata['has_confidence_scores'] is True def test_response_time_measurement(self, llm_optimizer, sample_netbox_data): """Test response time measurement.""" response = llm_optimizer.optimize_for_llm(sample_netbox_data, "list") assert response.response_time >= 0 assert response.response_time < 1.0 # Should be very fast def test_token_estimation(self, llm_optimizer, sample_netbox_data): """Test token count estimation.""" response = llm_optimizer.optimize_for_llm(sample_netbox_data, "list") assert response.token_count > 0 assert isinstance(response.token_count, int) @pytest.mark.asyncio async def test_batch_optimization(self, llm_optimizer, sample_netbox_data): """Test batch optimization of multiple responses.""" data_batches = [sample_netbox_data, sample_netbox_data] response_types = ["list", "detail"] responses = await llm_optimizer.batch_optimize(data_batches, response_types) assert len(responses) == 2 assert responses[0].metadata['type'] == "list" assert responses[1].metadata['type'] == "detail" def test_cleanup(self, llm_optimizer): """Test proper cleanup of resources.""" llm_optimizer.close() # Should not raise any exceptions class TestLLMResponseFormatter: """Test LLM response formatting.""" def test_format_for_chat_completion(self): """Test formatting for OpenAI chat completion API.""" from src.llm_optimizer import LLMResponse response = LLMResponse( content="Test content", metadata={"test": "value"}, confidence=0.95, response_time=0.1, token_count=10 ) formatted = LLMResponseFormatter.format_for_chat_completion(response) assert formatted['role'] == 'assistant' assert formatted['content'] == "Test content" assert formatted['metadata']['confidence'] == 0.95 assert formatted['metadata']['test'] == "value" def test_format_for_function_calling(self): """Test formatting for OpenAI function calling.""" from src.llm_optimizer import LLMResponse response = LLMResponse( content="Test content", metadata={"test": "value"}, confidence=0.95, response_time=0.1, token_count=10 ) formatted = LLMResponseFormatter.format_for_function_calling(response) assert formatted['function_name'] == 'netbox_query' assert formatted['arguments']['result'] == "Test content" assert formatted['arguments']['confidence'] == 0.95 def test_format_for_streaming(self): """Test formatting for streaming API.""" from src.llm_optimizer import LLMResponse response = LLMResponse( content="This is a test content that should be split into chunks for streaming.", metadata={"test": "value"}, confidence=0.95, response_time=0.1, token_count=10 ) chunks = LLMResponseFormatter.format_for_streaming(response) assert len(chunks) >= 1 # Should have at least one chunk assert all('delta' in chunk for chunk in chunks) assert all('content' in chunk['delta'] for chunk in chunks) assert chunks[0]['metadata'] is not None # First chunk has metadata class TestLLMCache: """Test LLM caching functionality.""" @pytest.fixture def llm_cache(self): """Create LLM cache for testing.""" return LLMCache(max_size=3, ttl=1) # Small cache for testing def test_cache_put_and_get(self, llm_cache): """Test basic cache operations.""" from src.llm_optimizer import LLMResponse response = LLMResponse( content="Test content", metadata={}, confidence=0.95, response_time=0.1, token_count=10 ) # Put in cache llm_cache.put("test_key", response) # Get from cache cached = llm_cache.get("test_key") assert cached is not None assert cached.content == "Test content" assert cached.confidence == 0.95 def test_cache_miss(self, llm_cache): """Test cache miss behavior.""" cached = llm_cache.get("nonexistent_key") assert cached is None def test_cache_ttl_expiration(self, llm_cache): """Test cache TTL expiration.""" from src.llm_optimizer import LLMResponse response = LLMResponse( content="Test content", metadata={}, confidence=0.95, response_time=0.1, token_count=10 ) # Put in cache llm_cache.put("test_key", response) # Wait for TTL to expire time.sleep(1.1) # Should be expired cached = llm_cache.get("test_key") assert cached is None def test_cache_eviction(self, llm_cache): """Test cache eviction when full.""" from src.llm_optimizer import LLMResponse # Fill cache beyond max_size for i in range(5): response = LLMResponse( content=f"Test content {i}", metadata={}, confidence=0.95, response_time=0.1, token_count=10 ) llm_cache.put(f"key_{i}", response) # Cache should only contain max_size items assert len(llm_cache._cache) == 3 def test_cache_clear(self, llm_cache): """Test cache clearing.""" from src.llm_optimizer import LLMResponse response = LLMResponse( content="Test content", metadata={}, confidence=0.95, response_time=0.1, token_count=10 ) llm_cache.put("test_key", response) assert len(llm_cache._cache) == 1 llm_cache.clear() assert len(llm_cache._cache) == 0 def test_cache_stats(self, llm_cache): """Test cache statistics.""" stats = llm_cache.stats() assert 'size' in stats assert 'max_size' in stats assert 'ttl' in stats assert 'hit_rate' in stats class TestTokenEstimator: """Test token estimation functionality.""" @pytest.fixture def token_estimator(self): """Create token estimator for testing.""" return TokenEstimator() def test_estimate_text(self, token_estimator): """Test text token estimation.""" text = "This is a test sentence with multiple words." tokens = token_estimator.estimate(text) assert tokens > 0 assert isinstance(tokens, int) # Should be roughly 1/4 of character count assert tokens <= len(text) // 3 def test_estimate_empty_text(self, token_estimator): """Test estimation of empty text.""" tokens = token_estimator.estimate("") assert tokens == 0 def test_estimate_json(self, token_estimator): """Test JSON token estimation.""" data = {"name": "test", "value": 123, "items": [1, 2, 3]} tokens = token_estimator.estimate_json(data) assert tokens > 0 assert isinstance(tokens, int) def test_caching(self, token_estimator): """Test that estimation results are cached.""" text = "This is a test sentence." # First call tokens1 = token_estimator.estimate(text) # Second call should use cache tokens2 = token_estimator.estimate(text) assert tokens1 == tokens2 class TestLLMPerformance: """Test LLM-specific performance optimizations.""" def test_optimization_speed(self): """Test that optimization is fast.""" optimizer = LLMOptimizer() data = [{"name": f"item_{i}", "value": i} for i in range(100)] start_time = time.time() response = optimizer.optimize_for_llm(data, "list") end_time = time.time() assert end_time - start_time < 0.1 # Should be very fast assert response.response_time < 0.1 def test_memory_efficiency(self): """Test that optimization doesn't use excessive memory.""" optimizer = LLMOptimizer() # Process large dataset data = [{"name": f"item_{i}", "value": i} for i in range(1000)] response = optimizer.optimize_for_llm(data, "list") # Should complete without memory issues assert response.content is not None assert len(response.content) > 0 def test_concurrent_optimization(self): """Test concurrent optimization performance.""" optimizer = LLMOptimizer(max_workers=4) data_sets = [ [{"name": f"item_{i}_{j}", "value": i} for i in range(50)] for j in range(10) ] start_time = time.time() # Process all datasets responses = [] for data in data_sets: response = optimizer.optimize_for_llm(data, "list") responses.append(response) end_time = time.time() assert len(responses) == 10 assert end_time - start_time < 1.0 # Should be fast even with 10 datasets

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/fringemonkey/mcp-dc'

If you have feedback or need assistance with the MCP directory API, please join our Discord server