test_llm_optimization.py•14 kB
"""Tests for LLM-specific optimizations."""
import pytest
import time
import json
from unittest.mock import Mock, patch
from src.llm_optimizer import LLMOptimizer, LLMResponseFormatter, LLMCache, TokenEstimator
class TestLLMOptimizer:
"""Test LLM optimization functionality."""
@pytest.fixture
def llm_optimizer(self):
"""Create LLM optimizer for testing."""
return LLMOptimizer(max_workers=2)
@pytest.fixture
def sample_netbox_data(self):
"""Sample NetBox data for testing."""
return [
{
"id": 1,
"name": "web-server-01",
"display": "web-server-01",
"status": {"label": "Active", "value": "active"},
"primary_ip4": {"address": "192.168.1.10/24", "id": 101},
"device_role": {"slug": "web-server", "display": "Web Server"},
"site": {"slug": "dc1", "display": "Data Center 1"},
"certainty_score": 0.95
},
{
"id": 2,
"name": "db-server-01",
"display": "db-server-01",
"status": {"label": "Active", "value": "active"},
"primary_ip4": {"address": "192.168.1.20/24", "id": 102},
"device_role": {"slug": "database-server", "display": "Database Server"},
"site": {"slug": "dc1", "display": "Data Center 1"},
"certainty_score": 0.92
}
]
def test_optimize_list_response(self, llm_optimizer, sample_netbox_data):
"""Test optimization of list responses for LLMs."""
response = llm_optimizer.optimize_for_llm(sample_netbox_data, "list")
assert isinstance(response.content, str)
assert "Found 2 items" in response.content
assert "web-server-01" in response.content
assert "db-server-01" in response.content
assert "Device (2 items)" in response.content
assert response.confidence > 0.9
assert response.token_count > 0
def test_optimize_detail_response(self, llm_optimizer, sample_netbox_data):
"""Test optimization of detail responses for LLMs."""
response = llm_optimizer.optimize_for_llm([sample_netbox_data[0]], "detail")
assert isinstance(response.content, str)
assert "web-server-01 Details" in response.content
assert "**Name**: web-server-01" in response.content
assert "**Status**:" in response.content and "Active" in response.content
assert "**Primary Ip4**: 192.168.1.10/24" in response.content
def test_optimize_search_response(self, llm_optimizer, sample_netbox_data):
"""Test optimization of search responses for LLMs."""
response = llm_optimizer.optimize_for_llm(sample_netbox_data, "search")
assert isinstance(response.content, str)
assert "Found 2 matching results" in response.content
assert "1. **web-server-01**" in response.content
assert "2. **db-server-01**" in response.content
assert "Type: device" in response.content
def test_empty_response_handling(self, llm_optimizer):
"""Test handling of empty responses."""
response = llm_optimizer.optimize_for_llm([], "list")
assert response.content == "No items found."
assert response.confidence == 0.0
assert response.token_count >= 0 # Empty response still has some tokens
def test_confidence_calculation(self, llm_optimizer):
"""Test confidence score calculation."""
data_with_scores = [
{"name": "test1", "certainty_score": 0.8},
{"name": "test2", "certainty_score": 0.9}
]
response = llm_optimizer.optimize_for_llm(data_with_scores, "list")
assert abs(response.confidence - 0.85) < 0.01 # Average of 0.8 and 0.9 (with floating point tolerance)
def test_metadata_creation(self, llm_optimizer, sample_netbox_data):
"""Test LLM metadata creation."""
response = llm_optimizer.optimize_for_llm(sample_netbox_data, "list")
assert response.metadata['count'] == 2
assert response.metadata['type'] == "list"
assert response.metadata['optimized_for_llm'] is True
assert 'device' in response.metadata['data_types']
assert response.metadata['has_confidence_scores'] is True
def test_response_time_measurement(self, llm_optimizer, sample_netbox_data):
"""Test response time measurement."""
response = llm_optimizer.optimize_for_llm(sample_netbox_data, "list")
assert response.response_time >= 0
assert response.response_time < 1.0 # Should be very fast
def test_token_estimation(self, llm_optimizer, sample_netbox_data):
"""Test token count estimation."""
response = llm_optimizer.optimize_for_llm(sample_netbox_data, "list")
assert response.token_count > 0
assert isinstance(response.token_count, int)
@pytest.mark.asyncio
async def test_batch_optimization(self, llm_optimizer, sample_netbox_data):
"""Test batch optimization of multiple responses."""
data_batches = [sample_netbox_data, sample_netbox_data]
response_types = ["list", "detail"]
responses = await llm_optimizer.batch_optimize(data_batches, response_types)
assert len(responses) == 2
assert responses[0].metadata['type'] == "list"
assert responses[1].metadata['type'] == "detail"
def test_cleanup(self, llm_optimizer):
"""Test proper cleanup of resources."""
llm_optimizer.close()
# Should not raise any exceptions
class TestLLMResponseFormatter:
"""Test LLM response formatting."""
def test_format_for_chat_completion(self):
"""Test formatting for OpenAI chat completion API."""
from src.llm_optimizer import LLMResponse
response = LLMResponse(
content="Test content",
metadata={"test": "value"},
confidence=0.95,
response_time=0.1,
token_count=10
)
formatted = LLMResponseFormatter.format_for_chat_completion(response)
assert formatted['role'] == 'assistant'
assert formatted['content'] == "Test content"
assert formatted['metadata']['confidence'] == 0.95
assert formatted['metadata']['test'] == "value"
def test_format_for_function_calling(self):
"""Test formatting for OpenAI function calling."""
from src.llm_optimizer import LLMResponse
response = LLMResponse(
content="Test content",
metadata={"test": "value"},
confidence=0.95,
response_time=0.1,
token_count=10
)
formatted = LLMResponseFormatter.format_for_function_calling(response)
assert formatted['function_name'] == 'netbox_query'
assert formatted['arguments']['result'] == "Test content"
assert formatted['arguments']['confidence'] == 0.95
def test_format_for_streaming(self):
"""Test formatting for streaming API."""
from src.llm_optimizer import LLMResponse
response = LLMResponse(
content="This is a test content that should be split into chunks for streaming.",
metadata={"test": "value"},
confidence=0.95,
response_time=0.1,
token_count=10
)
chunks = LLMResponseFormatter.format_for_streaming(response)
assert len(chunks) >= 1 # Should have at least one chunk
assert all('delta' in chunk for chunk in chunks)
assert all('content' in chunk['delta'] for chunk in chunks)
assert chunks[0]['metadata'] is not None # First chunk has metadata
class TestLLMCache:
"""Test LLM caching functionality."""
@pytest.fixture
def llm_cache(self):
"""Create LLM cache for testing."""
return LLMCache(max_size=3, ttl=1) # Small cache for testing
def test_cache_put_and_get(self, llm_cache):
"""Test basic cache operations."""
from src.llm_optimizer import LLMResponse
response = LLMResponse(
content="Test content",
metadata={},
confidence=0.95,
response_time=0.1,
token_count=10
)
# Put in cache
llm_cache.put("test_key", response)
# Get from cache
cached = llm_cache.get("test_key")
assert cached is not None
assert cached.content == "Test content"
assert cached.confidence == 0.95
def test_cache_miss(self, llm_cache):
"""Test cache miss behavior."""
cached = llm_cache.get("nonexistent_key")
assert cached is None
def test_cache_ttl_expiration(self, llm_cache):
"""Test cache TTL expiration."""
from src.llm_optimizer import LLMResponse
response = LLMResponse(
content="Test content",
metadata={},
confidence=0.95,
response_time=0.1,
token_count=10
)
# Put in cache
llm_cache.put("test_key", response)
# Wait for TTL to expire
time.sleep(1.1)
# Should be expired
cached = llm_cache.get("test_key")
assert cached is None
def test_cache_eviction(self, llm_cache):
"""Test cache eviction when full."""
from src.llm_optimizer import LLMResponse
# Fill cache beyond max_size
for i in range(5):
response = LLMResponse(
content=f"Test content {i}",
metadata={},
confidence=0.95,
response_time=0.1,
token_count=10
)
llm_cache.put(f"key_{i}", response)
# Cache should only contain max_size items
assert len(llm_cache._cache) == 3
def test_cache_clear(self, llm_cache):
"""Test cache clearing."""
from src.llm_optimizer import LLMResponse
response = LLMResponse(
content="Test content",
metadata={},
confidence=0.95,
response_time=0.1,
token_count=10
)
llm_cache.put("test_key", response)
assert len(llm_cache._cache) == 1
llm_cache.clear()
assert len(llm_cache._cache) == 0
def test_cache_stats(self, llm_cache):
"""Test cache statistics."""
stats = llm_cache.stats()
assert 'size' in stats
assert 'max_size' in stats
assert 'ttl' in stats
assert 'hit_rate' in stats
class TestTokenEstimator:
"""Test token estimation functionality."""
@pytest.fixture
def token_estimator(self):
"""Create token estimator for testing."""
return TokenEstimator()
def test_estimate_text(self, token_estimator):
"""Test text token estimation."""
text = "This is a test sentence with multiple words."
tokens = token_estimator.estimate(text)
assert tokens > 0
assert isinstance(tokens, int)
# Should be roughly 1/4 of character count
assert tokens <= len(text) // 3
def test_estimate_empty_text(self, token_estimator):
"""Test estimation of empty text."""
tokens = token_estimator.estimate("")
assert tokens == 0
def test_estimate_json(self, token_estimator):
"""Test JSON token estimation."""
data = {"name": "test", "value": 123, "items": [1, 2, 3]}
tokens = token_estimator.estimate_json(data)
assert tokens > 0
assert isinstance(tokens, int)
def test_caching(self, token_estimator):
"""Test that estimation results are cached."""
text = "This is a test sentence."
# First call
tokens1 = token_estimator.estimate(text)
# Second call should use cache
tokens2 = token_estimator.estimate(text)
assert tokens1 == tokens2
class TestLLMPerformance:
"""Test LLM-specific performance optimizations."""
def test_optimization_speed(self):
"""Test that optimization is fast."""
optimizer = LLMOptimizer()
data = [{"name": f"item_{i}", "value": i} for i in range(100)]
start_time = time.time()
response = optimizer.optimize_for_llm(data, "list")
end_time = time.time()
assert end_time - start_time < 0.1 # Should be very fast
assert response.response_time < 0.1
def test_memory_efficiency(self):
"""Test that optimization doesn't use excessive memory."""
optimizer = LLMOptimizer()
# Process large dataset
data = [{"name": f"item_{i}", "value": i} for i in range(1000)]
response = optimizer.optimize_for_llm(data, "list")
# Should complete without memory issues
assert response.content is not None
assert len(response.content) > 0
def test_concurrent_optimization(self):
"""Test concurrent optimization performance."""
optimizer = LLMOptimizer(max_workers=4)
data_sets = [
[{"name": f"item_{i}_{j}", "value": i} for i in range(50)]
for j in range(10)
]
start_time = time.time()
# Process all datasets
responses = []
for data in data_sets:
response = optimizer.optimize_for_llm(data, "list")
responses.append(response)
end_time = time.time()
assert len(responses) == 10
assert end_time - start_time < 1.0 # Should be fast even with 10 datasets