test_context_management.py•12.5 kB
#!/usr/bin/env python3
"""
Test Suite for Context Management System
Tests token counting, compression, and large file handling
"""
import asyncio
import sys
import json
from pathlib import Path
# Add src to path
sys.path.insert(0, str(Path(__file__).parent))
from src.core.db_wrapper import ThreadSafeDB
from src.core.context_manager import ContextManager
from src.core.context_strategies import StrategyManager
from src.core.context_aware_orchestrator import ContextAwareOrchestrator
def test_context_manager():
"""Test basic context manager functionality"""
print("\n=== Testing Context Manager ===")
# Test 1: Token estimation
print("\n1. Token Estimation:")
cm = ContextManager("claude-3-opus")
test_cases = [
("Hello world", 3), # ~3 tokens
("def test(): pass", 5), # ~5 tokens for code
("x" * 1000, 250), # 1000 chars ≈ 250 tokens
]
for content, expected in test_cases:
estimated = cm.estimate_tokens(content)
print(f" '{content[:20]}...' → {estimated} tokens (expected ~{expected})")
print(" ✅ Token estimation working")
# Test 2: Priority-based context management
print("\n2. Priority Management:")
cm.clear_context()
# Add items with different priorities
cm.add_context("Critical error", priority=1, content_type="error")
cm.add_context("Important finding", priority=3, content_type="analysis")
cm.add_context("Normal result", priority=5, content_type="result")
cm.add_context("Low priority info", priority=8, content_type="history")
print(f" Items added: {len(cm.context_items)}")
print(f" Total tokens: {cm.current_tokens}")
# Force optimization by adding large content
large_content = "x" * 100000 # Will trigger optimization
cm.add_context(large_content, priority=9, content_type="file")
print(f" After optimization: {len(cm.context_items)} items")
# Check that high priority items are kept
has_error = any(item.content_type == "error" for item in cm.context_items)
assert has_error, "Error should be kept during optimization"
print(" ✅ Priority management working")
# Test 3: Compression
print("\n3. Content Compression:")
# Test code compression
code = """
def example_function():
# This is a comment that should be removed
x = 1 # Another comment
y = 2
# More comments
return x + y
"""
compressed = cm._compress_content(code, "code", target_ratio=0.5)
original_len = len(code)
compressed_len = len(compressed)
ratio = compressed_len / original_len
print(f" Original: {original_len} chars")
print(f" Compressed: {compressed_len} chars")
print(f" Ratio: {ratio:.2f}")
assert ratio < 0.8, "Code should be compressed"
assert "comment" not in compressed.lower(), "Comments should be removed"
print(" ✅ Compression working")
# Test 4: Sliding window
print("\n4. Sliding Window:")
cm.clear_context()
# Add many items
for i in range(20):
cm.add_context(f"Item {i}", priority=5, content_type="analysis")
print(f" Before window: {len(cm.context_items)} items")
# Create sliding window
cm.create_sliding_window(window_size=5)
print(f" After window: {len(cm.context_items)} items")
# Check that we have summaries
has_summary = any(
item.content_type == "history_summary"
for item in cm.context_items
)
assert has_summary, "Should have history summary"
print(" ✅ Sliding window working")
return True
def test_context_strategies():
"""Test context strategies for different flows"""
print("\n=== Testing Context Strategies ===")
sm = StrategyManager()
# Test 1: Get strategies
print("\n1. Available Strategies:")
strategies = sm.list_strategies()
for strategy_name in strategies[:5]:
strategy = sm.get_strategy(strategy_name)
print(f" - {strategy_name}: max_tokens={strategy.max_file_tokens}")
assert "instant_review" in strategies
assert "debt_orchestrator" in strategies
print(" ✅ Strategies loaded")
# Test 2: Priority mapping
print("\n2. Priority Mapping:")
# Test instant_review priorities
review_priority = sm.get_priority_for_content("instant_review", "error")
assert review_priority == 1, "Errors should be highest priority"
analysis_priority = sm.get_priority_for_content("instant_review", "analysis_result")
assert analysis_priority >= 5, "Analysis should be medium priority"
print(f" instant_review/error: priority {review_priority}")
print(f" instant_review/analysis: priority {analysis_priority}")
print(" ✅ Priority mapping working")
# Test 3: Compression decisions
print("\n3. Compression Decisions:")
should_compress_error = sm.should_compress("instant_review", "error", False)
should_compress_success = sm.should_compress("instant_review", "analysis", True)
print(f" Compress error: {should_compress_error}")
print(f" Compress success: {should_compress_success}")
assert not should_compress_error, "Errors should not be compressed"
assert should_compress_success, "Success results should be compressed"
print(" ✅ Compression decisions working")
return True
async def test_context_aware_orchestrator():
"""Test context-aware orchestrator"""
print("\n=== Testing Context-Aware Orchestrator ===")
# Check if database exists
db_path = ".claude-symbols/search.db"
if not Path(db_path).exists():
print(" ⚠️ Database not found, skipping orchestrator tests")
return True
db = ThreadSafeDB(db_path)
orchestrator = ContextAwareOrchestrator(db, ".", "claude-3-opus")
# Test 1: Basic execution with context
print("\n1. Context-Aware Execution:")
test_code = """
import os
import sys
def test_function():
return "Hello"
"""
result = await orchestrator.execute_flow("import_optimizer", {"code": test_code})
# Check context metadata
if "_context_metadata" in result:
meta = result["_context_metadata"]
print(f" Model: {meta['model']}")
print(f" Tokens used: {meta['total_tokens']}")
print(f" Utilization: {meta['utilization']}")
print(f" Items in context: {meta['items']}")
print(" ✅ Context-aware execution working")
# Test 2: Large file handling
print("\n2. Large File Handling:")
# Create a large code sample
large_code = """
def function_1(): pass
def function_2(): pass
""" * 1000 # Create large content
# This should chunk automatically
result = await orchestrator.execute_flow("instant_review", {"code": large_code})
if "total_chunks" in result:
print(f" File chunked into {result['total_chunks']} parts")
print(" ✅ Large file handling working")
# Test 3: Context statistics
print("\n3. Context Statistics:")
stats = orchestrator.get_context_statistics()
print(f" Current tokens: {stats['current_tokens']}")
print(f" Max tokens: {stats['max_tokens']}")
print(f" Utilization: {stats['utilization']}")
if "type_distribution" in stats:
print(" Content types:")
for content_type, tokens in stats["type_distribution"].items():
print(f" - {content_type}: {tokens} tokens")
print(" ✅ Statistics working")
return True
def test_large_file_chunking():
"""Test intelligent file chunking"""
print("\n=== Testing File Chunking ===")
cm = ContextManager("gpt-4") # Use smaller model to test chunking
# Test 1: Code chunking
print("\n1. Code Chunking:")
code = """
class ClassOne:
def method1(self):
pass
def method2(self):
pass
class ClassTwo:
def method3(self):
pass
def method4(self):
pass
""" * 10 # Repeat to make it large
chunks = cm._chunk_code_intelligently(code, 500)
print(f" Created {len(chunks)} chunks")
print(f" First chunk size: {len(chunks[0])} chars")
# Check that chunks start at logical boundaries
for i, chunk in enumerate(chunks[:3]):
first_line = chunk.split('\n')[0].strip()
print(f" Chunk {i+1} starts with: {first_line[:50]}")
print(" ✅ Code chunking working")
# Test 2: Text chunking
print("\n2. Text Chunking:")
text = "Lorem ipsum dolor sit amet. " * 100
chunks = cm._chunk_text(text, 200)
print(f" Created {len(chunks)} chunks")
print(f" Chunk sizes: {[len(c) for c in chunks[:3]]}")
# Check for overlap
if len(chunks) > 1:
overlap = chunks[0][-50:] in chunks[1]
print(f" Has overlap: {overlap}")
print(" ✅ Text chunking working")
return True
def test_context_overflow_handling():
"""Test handling of context overflow scenarios"""
print("\n=== Testing Context Overflow Handling ===")
# Use very small model to test overflow
cm = ContextManager("local", target_utilization=0.9) # 4096 tokens
print(f"\n1. Model limits: {cm.max_tokens} tokens")
print(f" Target: {cm.target_tokens} tokens")
# Add content until overflow
print("\n2. Adding content until overflow:")
for i in range(100):
content = f"Content block {i} " * 50 # ~50 tokens each
success = cm.add_context(
content,
priority=5 if i < 50 else 8,
content_type="analysis"
)
if cm.current_tokens > cm.target_tokens * 0.8:
print(f" After {i+1} items: {cm.current_tokens}/{cm.target_tokens} tokens")
break
# Force optimization
cm.optimize_context()
print(f"\n3. After optimization:")
print(f" Items: {len(cm.context_items)}")
print(f" Tokens: {cm.current_tokens}")
print(f" Compressed: {cm.compression_stats['items_compressed']}")
print(f" Dropped: {cm.compression_stats['items_dropped']}")
assert cm.current_tokens <= cm.target_tokens, "Should be within limits after optimization"
print("\n ✅ Overflow handling working")
return True
async def main():
"""Run all context management tests"""
print("=" * 60)
print("CONTEXT MANAGEMENT TEST SUITE")
print("=" * 60)
all_passed = True
try:
# Test context manager
if not test_context_manager():
all_passed = False
except Exception as e:
print(f"❌ Context manager test failed: {e}")
all_passed = False
try:
# Test strategies
if not test_context_strategies():
all_passed = False
except Exception as e:
print(f"❌ Strategy test failed: {e}")
all_passed = False
try:
# Test orchestrator
if not await test_context_aware_orchestrator():
all_passed = False
except Exception as e:
print(f"❌ Orchestrator test failed: {e}")
all_passed = False
try:
# Test chunking
if not test_large_file_chunking():
all_passed = False
except Exception as e:
print(f"❌ Chunking test failed: {e}")
all_passed = False
try:
# Test overflow
if not test_context_overflow_handling():
all_passed = False
except Exception as e:
print(f"❌ Overflow test failed: {e}")
all_passed = False
print("\n" + "=" * 60)
if all_passed:
print("✅ ALL CONTEXT MANAGEMENT TESTS PASSED")
print("\nKey Achievements:")
print("- Token counting and estimation working")
print("- Priority-based context management working")
print("- Content compression working")
print("- Large file chunking working")
print("- Context overflow handling working")
print("- Strategy-based management working")
print("\n🎉 Context Management System Ready for Production!")
else:
print("❌ SOME TESTS FAILED")
print("Please review the errors above")
print("=" * 60)
return all_passed
if __name__ == "__main__":
success = asyncio.run(main())
sys.exit(0 if success else 1)