Indigo MCP Server Plugin

test_lancedb_regression.py•7.46 KiB

""" Regression test for LanceDB default limit bug. This test ensures that all records are retrieved when validating vector store contents, not just the default 10 records that LanceDB returns without an explicit limit. Bug: https://github.com/[repo]/issues/[number] Fixed in: validation.py and main.py by adding .limit(999999) to search() calls """ import pytest import tempfile import os import sys from pathlib import Path # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent / "MCP Server.indigoPlugin/Contents/Server Plugin")) import lancedb import pyarrow as pa from mcp_server.common.vector_store.main import VectorStore from mcp_server.common.vector_store.validation import load_validation_data class TestLanceDBRegression: """Regression tests for LanceDB default limit bug.""" @pytest.fixture def temp_db_path(self): """Create a temporary database path.""" temp_dir = tempfile.mkdtemp(prefix="test_lancedb_regression_") yield temp_dir # Cleanup import shutil if os.path.exists(temp_dir): shutil.rmtree(temp_dir, ignore_errors=True) def test_validation_retrieves_all_records_not_just_10(self, temp_db_path, monkeypatch): """ Test that validation retrieves ALL records, not just LanceDB's default 10. This is the specific bug that caused embeddings to be regenerated unnecessarily. """ # Set required environment variable monkeypatch.setenv("OPENAI_API_KEY", "test-key-for-testing") # Create a vector store vector_store = VectorStore(temp_db_path) # Create test data with MORE than 10 records (to expose the bug) test_count = 15 test_devices = [] for i in range(1, test_count + 1): test_devices.append({ "id": i, "name": f"Test Device {i}", "description": f"Description for device {i}", "model": "TestModel", "deviceTypeId": "test.device", "address": f"addr_{i}", "protocol": "test" }) # Mock embedding generation to avoid OpenAI calls def mock_embeddings(texts, entity_names=None, progress_callback=None): return [[0.1] * 1536 for _ in texts] def mock_embedding(text): return [0.1] * 1536 # Replace embedding methods original_batch = vector_store._generate_embeddings_batch original_single = vector_store._generate_embedding vector_store._generate_embeddings_batch = mock_embeddings vector_store._generate_embedding = mock_embedding try: # Update embeddings (this creates the records) vector_store.update_embeddings( devices=test_devices, variables=[], actions=[] ) # Now test the validation data loading (where the bug was) table = vector_store.db.open_table("devices") # Load validation data using the function that had the bug validation_data = load_validation_data(table, vector_store.logger) # CRITICAL TEST: We should get ALL 15 records, not just 10 assert len(validation_data) == test_count, \ f"Expected {test_count} records in validation, but got {len(validation_data)}. " \ f"This indicates the LanceDB limit bug has regressed!" # Verify all IDs are present loaded_ids = set(validation_data.keys()) expected_ids = set(range(1, test_count + 1)) missing_ids = expected_ids - loaded_ids assert not missing_ids, \ f"Missing IDs in validation data: {missing_ids}. " \ f"The LanceDB default limit is cutting off records!" finally: # Restore original methods vector_store._generate_embeddings_batch = original_batch vector_store._generate_embedding = original_single vector_store.close() def test_vector_store_stats_counts_all_records(self, temp_db_path, monkeypatch): """Test that get_stats() returns correct counts for tables with >10 records.""" monkeypatch.setenv("OPENAI_API_KEY", "test-key") vector_store = VectorStore(temp_db_path) # Create 12 devices, 11 variables, 13 actions (all > 10) test_devices = [{"id": i, "name": f"Device {i}"} for i in range(1, 13)] test_variables = [{"id": i, "name": f"Var {i}"} for i in range(1, 12)] test_actions = [{"id": i, "name": f"Action {i}"} for i in range(1, 14)] # Mock embeddings vector_store._generate_embeddings_batch = lambda *args, **kwargs: [[0.1] * 1536] * 100 vector_store._generate_embedding = lambda text: [0.1] * 1536 # Update embeddings vector_store.update_embeddings( devices=test_devices, variables=test_variables, actions=test_actions ) # Get stats (this also had the bug) stats = vector_store.get_stats() # Verify counts are correct, not limited to 10 assert stats["tables"]["devices"] == 12, \ f"Expected 12 devices, got {stats['tables']['devices']}" assert stats["tables"]["variables"] == 11, \ f"Expected 11 variables, got {stats['tables']['variables']}" assert stats["tables"]["actions"] == 13, \ f"Expected 13 actions, got {stats['tables']['actions']}" vector_store.close() def test_raw_lancedb_behavior_confirmation(self, temp_db_path): """ Confirm that LanceDB actually has this default limit behavior. This test documents the underlying issue for future reference. """ db = lancedb.connect(temp_db_path) # Create a table with 15 records schema = pa.schema([ pa.field("id", pa.int64()), pa.field("name", pa.string()), pa.field("embedding", pa.list_(pa.float32(), 128)) ]) ids = list(range(1, 16)) names = [f"Item {i}" for i in ids] embeddings = [[0.1] * 128 for _ in ids] test_data = pa.Table.from_arrays( [ pa.array(ids), pa.array(names), pa.array(embeddings, type=pa.list_(pa.float32(), 128)) ], schema=schema ) table = db.create_table("test", test_data) # Test WITHOUT limit (default behavior) results_no_limit = table.search().to_list() # Test WITH explicit limit (returns all records) results_with_limit = table.search().limit(999999).to_list() # Document the behavior (updated for LanceDB 0.25.2) # Note: LanceDB 0.25.2 changed default from 10 to 15 assert len(results_no_limit) == 15, \ f"LanceDB behavior may have changed - expected 15 (v0.25.2 default), got {len(results_no_limit)}" assert len(results_with_limit) == 15, \ "LanceDB limit(999999) not working as expected" print(f"✅ Confirmed: LanceDB search() defaults to 15 records (v0.25.2)") print(f"✅ Confirmed: search().limit(999999) returns all {len(results_with_limit)} records") if __name__ == "__main__": # Run the tests pytest.main([__file__, "-v"])

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mlamoure/indigo-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_lancedb_regression.py•7.46 KiB