AI Book Agent MCP Server

test_vector_store.py•14.4 KiB

import pytest import numpy as np from unittest.mock import Mock, patch, MagicMock from pathlib import Path from src.search.vector_store import VectorStore, BookSearchEngine class TestVectorStore: """Test suite for the VectorStore class.""" @pytest.fixture def mock_chromadb_collection(self): """Create a mock ChromaDB collection.""" collection = MagicMock() collection.count.return_value = 0 collection.add = MagicMock() collection.delete = MagicMock() collection.get = MagicMock(return_value={'ids': [], 'documents': [], 'metadatas': []}) collection.query = MagicMock(return_value={ 'ids': [[]], 'documents': [[]], 'metadatas': [[]], 'distances': [[]] }) return collection @pytest.fixture def mock_chromadb_client(self, mock_chromadb_collection): """Create a mock ChromaDB client.""" client = Mock() client.get_or_create_collection.return_value = mock_chromadb_collection client.delete_collection = MagicMock() return client @pytest.fixture def vector_store(self, tmp_path, mock_chromadb_client): """Create VectorStore with mocked ChromaDB.""" with patch('src.search.vector_store.chromadb.PersistentClient', return_value=mock_chromadb_client): store = VectorStore( persist_directory=str(tmp_path / "test_vector_db"), collection_name="test_collection" ) return store def test_initialization(self, tmp_path): """Test VectorStore initialization.""" with patch('src.search.vector_store.chromadb.PersistentClient') as mock_client: mock_collection = Mock() mock_collection.count.return_value = 10 mock_client.return_value.get_or_create_collection.return_value = mock_collection store = VectorStore( persist_directory=str(tmp_path / "test_db"), collection_name="test_books" ) assert store.collection_name == "test_books" assert store.persist_directory.exists() mock_client.assert_called_once() mock_collection.count.assert_called_once() def test_add_book_chunks(self, vector_store): """Test adding book chunks to the store.""" book_id = "test-book" chunks = ["Chunk 1 content", "Chunk 2 content", "Chunk 3 content"] embeddings = np.random.randn(3, 384) metadata_list = [ {"chapter_id": 0, "chapter_title": "Chapter 1"}, {"chapter_id": 0, "chapter_title": "Chapter 1"}, {"chapter_id": 1, "chapter_title": "Chapter 2"} ] vector_store.add_book_chunks(book_id, chunks, embeddings, metadata_list) # Verify remove_book was called first vector_store.collection.get.assert_called_with(where={"book_id": book_id}) # Verify add was called with correct parameters vector_store.collection.add.assert_called_once() add_call = vector_store.collection.add.call_args assert len(add_call[1]['ids']) == 3 assert add_call[1]['ids'][0] == "test-book_chunk_0" assert add_call[1]['documents'] == chunks assert len(add_call[1]['embeddings']) == 3 assert all(metadata["book_id"] == book_id for metadata in add_call[1]['metadatas']) def test_add_book_chunks_validation(self, vector_store): """Test validation in add_book_chunks.""" with pytest.raises(ValueError): vector_store.add_book_chunks( "test-book", ["chunk1", "chunk2"], # 2 chunks np.zeros((3, 384)), # 3 embeddings - mismatch [{"meta": 1}, {"meta": 2}] ) def test_remove_book(self, vector_store): """Test removing a book from the store.""" book_id = "test-book" # Mock existing chunks vector_store.collection.get.return_value = { 'ids': ['test-book_chunk_0', 'test-book_chunk_1', 'test-book_chunk_2'] } vector_store.remove_book(book_id) vector_store.collection.get.assert_called_with(where={"book_id": book_id}) vector_store.collection.delete.assert_called_with( ids=['test-book_chunk_0', 'test-book_chunk_1', 'test-book_chunk_2'] ) def test_search_with_results(self, vector_store): """Test searching with results.""" query_embedding = np.random.randn(384) # Mock search results vector_store.collection.query.return_value = { 'ids': [['id1', 'id2']], 'documents': [['Document 1 content', 'Document 2 content']], 'metadatas': [[ {'book_id': 'book1', 'chapter_title': 'Chapter 1'}, {'book_id': 'book2', 'chapter_title': 'Chapter 2'} ]], 'distances': [[0.5, 0.8]] # L2 distances } results = vector_store.search(query_embedding, n_results=5, similarity_threshold=0.5) assert len(results) == 2 assert results[0]['content'] == 'Document 1 content' assert results[0]['similarity'] > results[1]['similarity'] # First result more similar assert results[0]['rank'] == 1 assert results[1]['rank'] == 2 def test_search_with_book_filter(self, vector_store): """Test searching with book filter.""" query_embedding = np.random.randn(384) book_filter = ['book1', 'book2'] vector_store.search(query_embedding, n_results=5, book_filter=book_filter) query_call = vector_store.collection.query.call_args assert query_call[1]['where'] == {"book_id": {"$in": book_filter}} def test_search_with_similarity_threshold(self, vector_store): """Test searching with similarity threshold filtering.""" query_embedding = np.random.randn(384) # Mock results with varying distances vector_store.collection.query.return_value = { 'ids': [['id1', 'id2', 'id3']], 'documents': [['Doc 1', 'Doc 2', 'Doc 3']], 'metadatas': [[{}, {}, {}]], 'distances': [[0.1, 0.5, 2.0]] # Corresponds to similarities ~0.91, 0.67, 0.33 } results = vector_store.search(query_embedding, n_results=5, similarity_threshold=0.6) # Only first two results should pass threshold assert len(results) == 2 assert all(result['similarity'] >= 0.6 for result in results) def test_get_book_chunks(self, vector_store): """Test getting all chunks for a book.""" book_id = "test-book" vector_store.collection.get.return_value = { 'documents': ['Chunk 1', 'Chunk 2'], 'metadatas': [ {'chapter_id': 0, 'chapter_title': 'Chapter 1'}, {'chapter_id': 1, 'chapter_title': 'Chapter 2'} ] } chunks = vector_store.get_book_chunks(book_id) assert len(chunks) == 2 assert chunks[0]['content'] == 'Chunk 1' assert chunks[0]['metadata']['chapter_id'] == 0 vector_store.collection.get.assert_called_with( where={"book_id": book_id}, include=["documents", "metadatas"] ) def test_list_books(self, vector_store): """Test listing all books.""" vector_store.collection.get.return_value = { 'metadatas': [ {'book_id': 'book1'}, {'book_id': 'book2'}, {'book_id': 'book1'}, # Duplicate {'book_id': 'book3'} ] } books = vector_store.list_books() assert books == ['book1', 'book2', 'book3'] # Sorted and unique def test_get_stats(self, vector_store): """Test getting store statistics.""" vector_store.collection.count.return_value = 100 vector_store.collection.get.return_value = { 'metadatas': [ {'book_id': 'book1'}, {'book_id': 'book2'} ] } stats = vector_store.get_stats() assert stats['total_chunks'] == 100 assert stats['total_books'] == 2 assert stats['collection_name'] == 'test_collection' assert 'persist_directory' in stats def test_clear(self, vector_store, mock_chromadb_client): """Test clearing the vector store.""" vector_store.clear() mock_chromadb_client.delete_collection.assert_called_with(name='test_collection') # Should recreate collection assert mock_chromadb_client.get_or_create_collection.call_count == 2 # Once in init, once in clear class TestBookSearchEngine: """Test suite for the BookSearchEngine class.""" @pytest.fixture def mock_vector_store(self): """Create a mock vector store.""" store = Mock(spec=VectorStore) store.search.return_value = [] store.get_book_chunks.return_value = [] store.list_books.return_value = [] return store @pytest.fixture def mock_embedding_generator(self): """Create a mock embedding generator.""" generator = Mock() generator.embed_text.return_value = np.random.randn(384) return generator @pytest.fixture def search_engine(self, mock_vector_store, mock_embedding_generator): """Create BookSearchEngine with mocks.""" return BookSearchEngine(mock_vector_store, mock_embedding_generator) def test_search_books(self, search_engine, mock_vector_store, mock_embedding_generator): """Test searching books with natural language query.""" query = "machine learning algorithms" # Mock vector store results mock_vector_store.search.return_value = [ { 'content': 'Content about ML algorithms', 'similarity': 0.85, 'rank': 1, 'metadata': { 'book_id': 'ml-book', 'book_title': 'ML Fundamentals', 'author': 'John Doe', 'chapter_title': 'Chapter 3', 'chapter_id': 2 } } ] results = search_engine.search_books(query, max_results=5) assert len(results) == 1 assert results[0]['book_title'] == 'ML Fundamentals' assert results[0]['similarity'] == 0.85 assert results[0]['word_count'] == 4 mock_embedding_generator.embed_text.assert_called_once_with(query) mock_vector_store.search.assert_called_once() def test_get_book_content_full_book(self, search_engine, mock_vector_store): """Test getting full book content.""" book_id = "test-book" mock_vector_store.get_book_chunks.return_value = [ { 'content': 'Chapter 1 content', 'metadata': { 'book_title': 'Test Book', 'author': 'Test Author', 'chapter_id': 0, 'chapter_title': 'Chapter 1' } }, { 'content': 'Chapter 2 content', 'metadata': { 'book_title': 'Test Book', 'author': 'Test Author', 'chapter_id': 1, 'chapter_title': 'Chapter 2' } } ] result = search_engine.get_book_content(book_id) assert result['book_id'] == book_id assert result['book_title'] == 'Test Book' assert result['author'] == 'Test Author' assert 'Chapter 1 content' in result['content'] assert 'Chapter 2 content' in result['content'] assert result['chunk_count'] == 2 def test_get_book_content_specific_chapter(self, search_engine, mock_vector_store): """Test getting specific chapter content.""" book_id = "test-book" chapter_id = 1 mock_vector_store.get_book_chunks.return_value = [ { 'content': 'Chapter 1 content', 'metadata': {'chapter_id': 0, 'chapter_title': 'Chapter 1'} }, { 'content': 'Chapter 2 content', 'metadata': { 'chapter_id': 1, 'chapter_title': 'Chapter 2', 'book_title': 'Test Book', 'author': 'Test Author' } } ] result = search_engine.get_book_content(book_id, chapter_id) assert result['chapter_id'] == 1 assert result['chapter_title'] == 'Chapter 2' assert 'Chapter 2 content' in result['content'] assert 'Chapter 1 content' not in result['content'] def test_get_book_content_not_found(self, search_engine, mock_vector_store): """Test getting content for non-existent book.""" mock_vector_store.get_book_chunks.return_value = [] result = search_engine.get_book_content("non-existent") assert 'error' in result assert 'No content found' in result['error'] def test_find_related_content(self, search_engine, mock_vector_store): """Test finding related content.""" content = "Neural networks and deep learning" mock_vector_store.list_books.return_value = ['book1', 'book2', 'book3'] # Mock search to be called by search_books search_engine.search_books = Mock(return_value=[ {'book_id': 'book2', 'content': 'Related content'} ]) results = search_engine.find_related_content(content, max_results=3, exclude_book='book1') assert len(results) == 1 search_engine.search_books.assert_called_with( query=content, max_results=3, book_filter=['book2', 'book3'], similarity_threshold=0.6 ) def test_error_handling_in_search(self, search_engine, mock_embedding_generator): """Test error handling in search_books.""" mock_embedding_generator.embed_text.side_effect = Exception("Embedding error") with pytest.raises(Exception) as exc_info: search_engine.search_books("test query") assert "Embedding error" in str(exc_info.value)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/trakru/mcp-library-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_vector_store.py•14.4 KiB