"""Unit tests for embedding functionality."""
import tempfile
from pathlib import Path
import numpy as np
import pytest
from amicus.embeddings import EmbeddingManager
@pytest.fixture
def temp_db():
"""Create a temporary database for testing."""
with tempfile.TemporaryDirectory() as tmpdir:
yield Path(tmpdir) / "test.db"
def test_embedding_generation(temp_db):
"""Test that embeddings are generated correctly."""
manager = EmbeddingManager(temp_db)
embedding = manager.encode_text("Test message")
assert embedding.shape == (384,), "Embedding should be 384-dimensional"
assert embedding.dtype == np.float32, "Embedding should be float32"
def test_task_archival(temp_db):
"""Test archiving a task."""
manager = EmbeddingManager(temp_db)
task = {
'id': 'task-1',
'title': 'Fix authentication bug',
'description': 'User login fails with 500 error',
'created_at': '2026-02-01T10:00:00Z',
'completed_at': '2026-02-01T12:00:00Z'
}
manager.archive_task(task)
# Verify task can be searched
results = manager.search_tasks("authentication", limit=1)
assert len(results) == 1
assert results[0]['id'] == 'task-1'
assert results[0]['title'] == 'Fix authentication bug'
def test_message_archival(temp_db):
"""Test archiving a message."""
manager = EmbeddingManager(temp_db)
message = {
'id': 'msg-1',
'sender': 'Agent-A',
'message': 'Starting work on database migration',
'timestamp': '2026-02-01T10:00:00Z'
}
manager.archive_message(message)
# Verify message can be searched
results = manager.search_messages("database", limit=1)
assert len(results) == 1
assert results[0]['sender'] == 'Agent-A'
def test_semantic_search(temp_db):
"""Test that semantic search finds related content."""
manager = EmbeddingManager(temp_db)
# Archive tasks with different content
tasks = [
{'id': 't1', 'title': 'Fix login bug', 'description': 'Users cannot authenticate'},
{'id': 't2', 'title': 'Update database schema', 'description': 'Add new user table'},
{'id': 't3', 'title': 'Improve authentication', 'description': 'Add 2FA support'},
]
for task in tasks:
manager.archive_task(task)
# Search for authentication-related tasks
results = manager.search_tasks("authentication problems", limit=2)
# Should find auth-related tasks (t1 and t3), not database (t2)
result_ids = [r['id'] for r in results]
assert 't1' in result_ids or 't3' in result_ids
assert results[0]['similarity'] > 0.3, "Similarity should be reasonably high"
def test_search_empty_database(temp_db):
"""Test searching an empty database."""
manager = EmbeddingManager(temp_db)
results = manager.search_tasks("anything", limit=5)
assert results == [], "Empty database should return no results"
def test_cosine_similarity():
"""Test cosine similarity calculation."""
# Identical vectors should have similarity 1.0
a = np.array([1.0, 0.0, 0.0])
b = np.array([1.0, 0.0, 0.0])
similarity = EmbeddingManager._cosine_similarity(a, b)
assert abs(similarity - 1.0) < 0.001
# Orthogonal vectors should have similarity 0.0
a = np.array([1.0, 0.0])
b = np.array([0.0, 1.0])
similarity = EmbeddingManager._cosine_similarity(a, b)
assert abs(similarity - 0.0) < 0.001
def test_task_with_metadata(temp_db):
"""Test archiving task with metadata."""
manager = EmbeddingManager(temp_db)
task = {
'id': 'task-meta',
'title': 'Test task',
'description': 'A test',
'metadata': {'priority': 'high', 'tags': ['bug', 'urgent']}
}
manager.archive_task(task)
results = manager.search_tasks("test", limit=1)
assert len(results) == 1
assert results[0]['metadata'] == {'priority': 'high', 'tags': ['bug', 'urgent']}
def test_message_without_id(temp_db):
"""Test archiving message without explicit ID."""
manager = EmbeddingManager(temp_db)
message = {
'sender': 'Agent-B',
'message': 'Test message without ID',
'timestamp': '2026-02-01T10:00:00Z'
}
manager.archive_message(message)
results = manager.search_messages("test message", limit=1)
assert len(results) == 1
assert results[0]['sender'] == 'Agent-B'
def test_search_with_limit(temp_db):
"""Test that search respects limit parameter."""
manager = EmbeddingManager(temp_db)
# Archive multiple tasks
for i in range(10):
task = {
'id': f'task-{i}',
'title': f'Task {i} about authentication',
'description': 'Testing search limits'
}
manager.archive_task(task)
# Search with different limits
results_3 = manager.search_tasks("authentication", limit=3)
results_5 = manager.search_tasks("authentication", limit=5)
assert len(results_3) == 3
assert len(results_5) == 5
def test_embedding_consistency(temp_db):
"""Test that same text produces same embedding."""
manager = EmbeddingManager(temp_db)
text = "Consistent test text"
embedding1 = manager.encode_text(text)
embedding2 = manager.encode_text(text)
# Should be identical
assert np.allclose(embedding1, embedding2)
def test_database_persistence(temp_db):
"""Test that data persists across manager instances."""
# Create first manager and archive data
manager1 = EmbeddingManager(temp_db)
task = {
'id': 'persist-test',
'title': 'Persistence test',
'description': 'Testing database persistence'
}
manager1.archive_task(task)
# Create new manager instance and search
manager2 = EmbeddingManager(temp_db)
results = manager2.search_tasks("persistence", limit=1)
assert len(results) == 1
assert results[0]['id'] == 'persist-test'
def test_special_characters_in_text(temp_db):
"""Test handling of special characters."""
manager = EmbeddingManager(temp_db)
task = {
'id': 'special-chars',
'title': 'Fix bug with "quotes" and \'apostrophes\'',
'description': 'Handle special chars: @#$%^&*()'
}
manager.archive_task(task)
results = manager.search_tasks("quotes apostrophes", limit=1)
assert len(results) == 1
assert results[0]['id'] == 'special-chars'
def test_empty_string_handling(temp_db):
"""Test handling of empty strings."""
manager = EmbeddingManager(temp_db)
task = {
'id': 'empty-test',
'title': '',
'description': 'Only description'
}
manager.archive_task(task)
results = manager.search_tasks("description", limit=1)
assert len(results) == 1
assert results[0]['title'] == ''