Session Buddy

Overview Schema Related Servers Score Discussions

test_hnsw_indexing.py•16.2 KiB

"""Tests for HNSW vector indexing performance optimization.

Tests HNSW (Hierarchical Navigable Small World) index creation, usage,
and performance improvements for vector similarity search.
"""

from __future__ import annotations

import typing as t
from datetime import UTC, datetime
from pathlib import Path

import pytest

from session_buddy.adapters.reflection_adapter_oneiric import (
    ReflectionDatabaseAdapterOneiric,
)
from session_buddy.adapters.settings import ReflectionAdapterSettings


class TestHNSWIndexCreation:
    """Test HNSW index creation and configuration."""

    @pytest.fixture
    def temp_db_path(self, tmp_path: Path) -> Path:
        """Create temporary database path."""
        return tmp_path / "test_reflection.duckdb"

    @pytest.fixture
    def adapter_settings(self, temp_db_path: Path) -> ReflectionAdapterSettings:
        """Create adapter settings with HNSW enabled."""
        return ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_hnsw",
            enable_hnsw_index=True,
            hnsw_m=16,
            hnsw_ef_construction=200,
            hnsw_ef_search=64,
        )

    @pytest.mark.asyncio
    async def test_hnsw_index_creation_on_init(self, adapter_settings: ReflectionAdapterSettings) -> None:
        """Test that HNSW indexes are created during adapter initialization."""
        adapter = ReflectionDatabaseAdapterOneiric(settings=adapter_settings)

        # Initialize should create tables and HNSW indexes
        await adapter.initialize()

        # Check that HNSW indexes exist
        result = adapter.conn.execute(
            "SELECT * FROM duckdb_indexes() WHERE index_name LIKE '%hnsw%'"
        ).fetchall()

        # Should have 2 HNSW indexes (conversations and reflections)
        # Column 4 is index_name (verified from duckdb_indexes() schema)
        hnsw_indexes = [row for row in result if "hnsw" in row[4].lower()]

        assert len(hnsw_indexes) >= 2, f"Expected at least 2 HNSW indexes, got {len(hnsw_indexes)}"

        await adapter.aclose()

    @pytest.mark.asyncio
    async def test_hnsw_index_disabled_when_setting_false(self, temp_db_path: Path) -> None:
        """Test that HNSW indexes are not created when enable_hnsw_index=False."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_no_hnsw",
            enable_hnsw_index=False,
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)
        await adapter.initialize()

        # Check that HNSW indexes do NOT exist
        # Filter for indexes ending with '_hnsw' to avoid false matches
        result = adapter.conn.execute(
            "SELECT index_name FROM duckdb_indexes() WHERE index_name LIKE '%_hnsw'"
        ).fetchall()

        # Should have no HNSW indexes when disabled
        hnsw_indexes = [row[0] for row in result]
        assert len(hnsw_indexes) == 0, f"Expected no HNSW indexes when disabled, got {len(hnsw_indexes)}: {hnsw_indexes}"

        await adapter.aclose()

    @pytest.mark.asyncio
    async def test_hnsw_index_parameters_respected(self, adapter_settings: ReflectionAdapterSettings) -> None:
        """Test that custom HNSW parameters (M, ef_construction) are used."""
        adapter = ReflectionDatabaseAdapterOneiric(settings=adapter_settings)
        await adapter.initialize()

        # Check index details to verify parameters
        # Note: Column is 'sql' not 'index_sql' (verified from duckdb_indexes() schema)
        result = adapter.conn.execute(
            """
            SELECT index_name, sql
            FROM duckdb_indexes()
            WHERE index_name LIKE '%hnsw%'
            """
        ).fetchall()

        assert len(result) >= 2

        # Verify SQL contains our custom parameters
        for row in result:
            # Column 1 is 'sql' (index creation statement)
            index_sql = row[1]
            # Check that our parameters are in the index definition
            if "M" in index_sql:
                assert "16" in index_sql or "M = 16" in index_sql
            if "ef_construction" in index_sql:
                assert "200" in index_sql or "ef_construction = 200" in index_sql

        await adapter.aclose()


class TestHNSWSearchPerformance:
    """Test that vector search works with HNSW indexes."""

    @pytest.fixture
    def temp_db_path(self, tmp_path: Path) -> Path:
        """Create temporary database path."""
        return tmp_path / "test_search.duckdb"

    @pytest.fixture
    async def populated_adapter(self, temp_db_path: Path) -> ReflectionDatabaseAdapterOneiric:
        """Create adapter with sample conversation data."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_search",
            enable_hnsw_index=True,
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)
        await adapter.initialize()

        # Add sample conversations with embeddings
        conversations = [
            "Python programming language",
            "Machine learning algorithms",
            "Web development with HTML",
            "Database management systems",
            "Cloud computing infrastructure",
        ]

        for conv in conversations:
            await adapter.store_conversation(conv, {"test": True})

        return adapter

    @pytest.mark.asyncio
    async def test_vector_search_with_hnsw(self, populated_adapter: ReflectionDatabaseAdapterOneiric) -> None:
        """Test that vector search works correctly with HNSW indexes."""
        # Search for similar conversations
        results = await populated_adapter.search_conversations(
            query="Python code", limit=5, threshold=0.0
        )

        # Should return results
        assert len(results) > 0

        # Check that results have expected structure
        for result in results:
            assert "id" in result
            assert "content" in result
            assert "score" in result
            assert isinstance(result["score"], float)
            assert 0.0 <= result["score"] <= 1.0

        # Top result should be about Python (highest similarity)
        assert "python" in results[0]["content"].lower()

        await populated_adapter.aclose()

    @pytest.mark.asyncio
    async def test_hnsw_ef_search_parameter_set(self, populated_adapter: ReflectionDatabaseAdapterOneiric) -> None:
        """Test that hnsw_ef_search parameter is set during search."""
        # Perform a search (should set hnsw_ef_search parameter)
        await populated_adapter.search_conversations("Python", limit=3)

        # Verify the parameter is set
        result = populated_adapter.conn.execute("SELECT * FROM duckdb_settings() WHERE name = 'hnsw_ef_search'").fetchone()

        # Parameter should be set to our configured value (64)
        assert result is not None
        assert result[1] == "64"  # value field

        await populated_adapter.aclose()


class TestHNSWGracefulFallback:
    """Test graceful fallback when VSS extension is unavailable."""

    @pytest.fixture
    def temp_db_path(self, tmp_path: Path) -> Path:
        """Create temporary database path."""
        return tmp_path / "test_fallback.duckdb"

    @pytest.mark.asyncio
    async def test_fallback_without_vss_extension(self, temp_db_path: Path) -> None:
        """Test that system falls back to array_cosine_similarity when VSS unavailable."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_fallback",
            enable_hnsw_index=True,  # Try to enable HNSW
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)

        # Initialize (should handle VSS extension gracefully)
        await adapter.initialize()

        # Add test data
        await adapter.store_conversation("Test conversation one", {"test": True})
        await adapter.store_conversation("Test conversation two", {"test": True})

        # Search should still work using array_cosine_similarity
        # Use threshold=0.0 to ensure we get results even with lower similarity
        results = await adapter.search_conversations("Test", limit=5, threshold=0.0)

        # Should return results using fallback method
        assert len(results) >= 2, f"Expected at least 2 results, got {len(results)}"

        # Results should have valid scores even without HNSW
        for result in results:
            assert "score" in result
            assert isinstance(result["score"], float)

        await adapter.aclose()

    @pytest.mark.asyncio
    async def test_hnsw_disabled_no_error(self, temp_db_path: Path) -> None:
        """Test that disabling HNSW doesn't cause any errors."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_disabled",
            enable_hnsw_index=False,  # Explicitly disable
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)
        await adapter.initialize()

        # Add and search data
        await adapter.store_conversation("Data without HNSW", {"test": True})
        # Use threshold=0.0 to ensure we get results
        results = await adapter.search_conversations("Data", limit=5, threshold=0.0)

        # Should work fine without HNSW
        assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"

        await adapter.aclose()


class TestHNSWConfigurationOptions:
    """Test various HNSW configuration options."""

    @pytest.fixture
    def temp_db_path(self, tmp_path: Path) -> Path:
        """Create temporary database path."""
        return tmp_path / "test_config.duckdb"

    @pytest.mark.asyncio
    async def test_custom_hnsw_parameters(self, temp_db_path: Path) -> None:
        """Test creating HNSW indexes with custom M and ef_construction values."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_custom",
            enable_hnsw_index=True,
            hnsw_m=32,  # Custom M value
            hnsw_ef_construction=400,  # Custom ef_construction
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)
        await adapter.initialize()

        # Verify indexes were created successfully
        # Note: The SQL returned by duckdb_indexes() may not include the WITH clause
        # The parameters are still used during index creation internally
        result = adapter.conn.execute(
            "SELECT index_name FROM duckdb_indexes() WHERE index_name LIKE '%_hnsw'"
        ).fetchall()

        # Should have created HNSW indexes
        assert len(result) >= 2, f"Expected at least 2 HNSW indexes, got {len(result)}"

        await adapter.aclose()

    @pytest.mark.asyncio
    async def test_hnsw_with_different_metrics(self, temp_db_path: Path) -> None:
        """Test HNSW indexes with different distance metrics."""
        # Test with L2 (Euclidean) distance
        # Note: DuckDB VSS uses "l2sq" for Euclidean distance, not "euclidean"
        settings_l2 = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_l2",
            distance_metric="l2sq",  # DuckDB VSS uses "l2sq" for Euclidean distance
            enable_hnsw_index=True,
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings_l2)
        await adapter.initialize()

        # Verify index was created successfully
        result = adapter.conn.execute(
            "SELECT index_name FROM duckdb_indexes() WHERE index_name LIKE '%conv_embeddings_hnsw'"
        ).fetchone()

        assert result is not None
        # Column 0 is index_name
        assert "conv_embeddings_hnsw" in result[0]

        await adapter.aclose()


class TestHNSWPerformanceBasics:
    """Basic tests to verify HNSW functionality for performance improvements."""

    @pytest.fixture
    def temp_db_path(self, tmp_path: Path) -> Path:
        """Create temporary database path."""
        return tmp_path / "test_perf.duckdb"

    @pytest.mark.asyncio
    async def test_index_exists_check(self, temp_db_path: Path) -> None:
        """Test that we can check if HNSW indexes exist."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_exist",
            enable_hnsw_index=True,
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)
        await adapter.initialize()

        # Verify we can query for HNSW indexes
        result = adapter.conn.execute(
            """
            SELECT table_name, index_name
            FROM duckdb_indexes()
            WHERE index_name LIKE '%hnsw%'
            """
        ).fetchall()

        assert len(result) >= 2  # At least conversations and reflections

        await adapter.aclose()

    @pytest.mark.asyncio
    async def test_multiple_collections_independent_indexes(self, temp_db_path: Path) -> None:
        """Test that different collections get independent HNSW indexes."""
        # Create first collection
        settings1 = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="collection1",
            enable_hnsw_index=True,
        )
        adapter1 = ReflectionDatabaseAdapterOneiric(settings=settings1)
        await adapter1.initialize()

        # Create second collection
        settings2 = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="collection2",
            enable_hnsw_index=True,
        )
        adapter2 = ReflectionDatabaseAdapterOneiric(settings=settings2)
        await adapter2.initialize()

        # Check that both collections have their own HNSW indexes
        # Note: SELECT index_name means column 0 is the index name
        result = adapter1.conn.execute(
            "SELECT index_name FROM duckdb_indexes() WHERE index_name LIKE '%hnsw%'"
        ).fetchall()

        # Column 0 is index_name (from SELECT clause)
        collection1_indexes = [row[0] for row in result if "collection1" in row[0]]
        collection2_indexes = [row[0] for row in result if "collection2" in row[0]]

        # Each collection should have at least 2 HNSW indexes (conversations + reflections)
        assert len(collection1_indexes) >= 2
        assert len(collection2_indexes) >= 2

        await adapter1.aclose()
        await adapter2.aclose()


class TestHNSWWithRealData:
    """Integration tests with realistic data volumes."""

    @pytest.fixture
    def temp_db_path(self, tmp_path: Path) -> Path:
        """Create temporary database path."""
        return tmp_path / "test_realdata.duckdb"

    @pytest.mark.asyncio
    async def test_search_accuracy_with_hnsw(self, temp_db_path: Path) -> None:
        """Test that HNSW search maintains accuracy compared to baseline."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_accuracy",
            enable_hnsw_index=True,
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)
        await adapter.initialize()

        # Add related conversations
        await adapter.store_conversation("How to write Python functions", {"topic": "python"})
        await adapter.store_conversation("Python class inheritance tutorial", {"topic": "python"})
        await adapter.store_conversation("JavaScript function examples", {"topic": "javascript"})
        await adapter.store_conversation("Database query optimization", {"topic": "database"})

        # Search for Python-related content
        results = await adapter.search_conversations(
            "Python programming", limit=5, threshold=0.5
        )

        # Python results should rank higher than JavaScript
        python_results = [r for r in results if "python" in r["content"].lower()]
        js_results = [r for r in results if "javascript" in r["content"].lower()]

        # Python should appear before JavaScript in results
        if python_results and js_results:
            first_python_idx = next(i for i, r in enumerate(results) if "python" in r["content"].lower())
            first_js_idx = next(i for i, r in enumerate(results) if "javascript" in r["content"].lower())
            assert first_python_idx < first_js_idx, "Python results should rank higher"

        await adapter.aclose()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/lesleslie/session-buddy'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_hnsw_indexing.py•16.2 KiB

"""Tests for HNSW vector indexing performance optimization.

Tests HNSW (Hierarchical Navigable Small World) index creation, usage,
and performance improvements for vector similarity search.
"""

from __future__ import annotations

import typing as t
from datetime import UTC, datetime
from pathlib import Path

import pytest

from session_buddy.adapters.reflection_adapter_oneiric import (
    ReflectionDatabaseAdapterOneiric,
)
from session_buddy.adapters.settings import ReflectionAdapterSettings


class TestHNSWIndexCreation:
    """Test HNSW index creation and configuration."""

    @pytest.fixture
    def temp_db_path(self, tmp_path: Path) -> Path:
        """Create temporary database path."""
        return tmp_path / "test_reflection.duckdb"

    @pytest.fixture
    def adapter_settings(self, temp_db_path: Path) -> ReflectionAdapterSettings:
        """Create adapter settings with HNSW enabled."""
        return ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_hnsw",
            enable_hnsw_index=True,
            hnsw_m=16,
            hnsw_ef_construction=200,
            hnsw_ef_search=64,
        )

    @pytest.mark.asyncio
    async def test_hnsw_index_creation_on_init(self, adapter_settings: ReflectionAdapterSettings) -> None:
        """Test that HNSW indexes are created during adapter initialization."""
        adapter = ReflectionDatabaseAdapterOneiric(settings=adapter_settings)

        # Initialize should create tables and HNSW indexes
        await adapter.initialize()

        # Check that HNSW indexes exist
        result = adapter.conn.execute(
            "SELECT * FROM duckdb_indexes() WHERE index_name LIKE '%hnsw%'"
        ).fetchall()

        # Should have 2 HNSW indexes (conversations and reflections)
        # Column 4 is index_name (verified from duckdb_indexes() schema)
        hnsw_indexes = [row for row in result if "hnsw" in row[4].lower()]

        assert len(hnsw_indexes) >= 2, f"Expected at least 2 HNSW indexes, got {len(hnsw_indexes)}"

        await adapter.aclose()

    @pytest.mark.asyncio
    async def test_hnsw_index_disabled_when_setting_false(self, temp_db_path: Path) -> None:
        """Test that HNSW indexes are not created when enable_hnsw_index=False."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_no_hnsw",
            enable_hnsw_index=False,
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)
        await adapter.initialize()

        # Check that HNSW indexes do NOT exist
        # Filter for indexes ending with '_hnsw' to avoid false matches
        result = adapter.conn.execute(
            "SELECT index_name FROM duckdb_indexes() WHERE index_name LIKE '%_hnsw'"
        ).fetchall()

        # Should have no HNSW indexes when disabled
        hnsw_indexes = [row[0] for row in result]
        assert len(hnsw_indexes) == 0, f"Expected no HNSW indexes when disabled, got {len(hnsw_indexes)}: {hnsw_indexes}"

        await adapter.aclose()

    @pytest.mark.asyncio
    async def test_hnsw_index_parameters_respected(self, adapter_settings: ReflectionAdapterSettings) -> None:
        """Test that custom HNSW parameters (M, ef_construction) are used."""
        adapter = ReflectionDatabaseAdapterOneiric(settings=adapter_settings)
        await adapter.initialize()

        # Check index details to verify parameters
        # Note: Column is 'sql' not 'index_sql' (verified from duckdb_indexes() schema)
        result = adapter.conn.execute(
            """
            SELECT index_name, sql
            FROM duckdb_indexes()
            WHERE index_name LIKE '%hnsw%'
            """
        ).fetchall()

        assert len(result) >= 2

        # Verify SQL contains our custom parameters
        for row in result:
            # Column 1 is 'sql' (index creation statement)
            index_sql = row[1]
            # Check that our parameters are in the index definition
            if "M" in index_sql:
                assert "16" in index_sql or "M = 16" in index_sql
            if "ef_construction" in index_sql:
                assert "200" in index_sql or "ef_construction = 200" in index_sql

        await adapter.aclose()


class TestHNSWSearchPerformance:
    """Test that vector search works with HNSW indexes."""

    @pytest.fixture
    def temp_db_path(self, tmp_path: Path) -> Path:
        """Create temporary database path."""
        return tmp_path / "test_search.duckdb"

    @pytest.fixture
    async def populated_adapter(self, temp_db_path: Path) -> ReflectionDatabaseAdapterOneiric:
        """Create adapter with sample conversation data."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_search",
            enable_hnsw_index=True,
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)
        await adapter.initialize()

        # Add sample conversations with embeddings
        conversations = [
            "Python programming language",
            "Machine learning algorithms",
            "Web development with HTML",
            "Database management systems",
            "Cloud computing infrastructure",
        ]

        for conv in conversations:
            await adapter.store_conversation(conv, {"test": True})

        return adapter

    @pytest.mark.asyncio
    async def test_vector_search_with_hnsw(self, populated_adapter: ReflectionDatabaseAdapterOneiric) -> None:
        """Test that vector search works correctly with HNSW indexes."""
        # Search for similar conversations
        results = await populated_adapter.search_conversations(
            query="Python code", limit=5, threshold=0.0
        )

        # Should return results
        assert len(results) > 0

        # Check that results have expected structure
        for result in results:
            assert "id" in result
            assert "content" in result
            assert "score" in result
            assert isinstance(result["score"], float)
            assert 0.0 <= result["score"] <= 1.0

        # Top result should be about Python (highest similarity)
        assert "python" in results[0]["content"].lower()

        await populated_adapter.aclose()

    @pytest.mark.asyncio
    async def test_hnsw_ef_search_parameter_set(self, populated_adapter: ReflectionDatabaseAdapterOneiric) -> None:
        """Test that hnsw_ef_search parameter is set during search."""
        # Perform a search (should set hnsw_ef_search parameter)
        await populated_adapter.search_conversations("Python", limit=3)

        # Verify the parameter is set
        result = populated_adapter.conn.execute("SELECT * FROM duckdb_settings() WHERE name = 'hnsw_ef_search'").fetchone()

        # Parameter should be set to our configured value (64)
        assert result is not None
        assert result[1] == "64"  # value field

        await populated_adapter.aclose()


class TestHNSWGracefulFallback:
    """Test graceful fallback when VSS extension is unavailable."""

    @pytest.fixture
    def temp_db_path(self, tmp_path: Path) -> Path:
        """Create temporary database path."""
        return tmp_path / "test_fallback.duckdb"

    @pytest.mark.asyncio
    async def test_fallback_without_vss_extension(self, temp_db_path: Path) -> None:
        """Test that system falls back to array_cosine_similarity when VSS unavailable."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_fallback",
            enable_hnsw_index=True,  # Try to enable HNSW
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)

        # Initialize (should handle VSS extension gracefully)
        await adapter.initialize()

        # Add test data
        await adapter.store_conversation("Test conversation one", {"test": True})
        await adapter.store_conversation("Test conversation two", {"test": True})

        # Search should still work using array_cosine_similarity
        # Use threshold=0.0 to ensure we get results even with lower similarity
        results = await adapter.search_conversations("Test", limit=5, threshold=0.0)

        # Should return results using fallback method
        assert len(results) >= 2, f"Expected at least 2 results, got {len(results)}"

        # Results should have valid scores even without HNSW
        for result in results:
            assert "score" in result
            assert isinstance(result["score"], float)

        await adapter.aclose()

    @pytest.mark.asyncio
    async def test_hnsw_disabled_no_error(self, temp_db_path: Path) -> None:
        """Test that disabling HNSW doesn't cause any errors."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_disabled",
            enable_hnsw_index=False,  # Explicitly disable
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)
        await adapter.initialize()

        # Add and search data
        await adapter.store_conversation("Data without HNSW", {"test": True})
        # Use threshold=0.0 to ensure we get results
        results = await adapter.search_conversations("Data", limit=5, threshold=0.0)

        # Should work fine without HNSW
        assert len(results) >= 1, f"Expected at least 1 result, got {len(results)}"

        await adapter.aclose()


class TestHNSWConfigurationOptions:
    """Test various HNSW configuration options."""

    @pytest.fixture
    def temp_db_path(self, tmp_path: Path) -> Path:
        """Create temporary database path."""
        return tmp_path / "test_config.duckdb"

    @pytest.mark.asyncio
    async def test_custom_hnsw_parameters(self, temp_db_path: Path) -> None:
        """Test creating HNSW indexes with custom M and ef_construction values."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_custom",
            enable_hnsw_index=True,
            hnsw_m=32,  # Custom M value
            hnsw_ef_construction=400,  # Custom ef_construction
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)
        await adapter.initialize()

        # Verify indexes were created successfully
        # Note: The SQL returned by duckdb_indexes() may not include the WITH clause
        # The parameters are still used during index creation internally
        result = adapter.conn.execute(
            "SELECT index_name FROM duckdb_indexes() WHERE index_name LIKE '%_hnsw'"
        ).fetchall()

        # Should have created HNSW indexes
        assert len(result) >= 2, f"Expected at least 2 HNSW indexes, got {len(result)}"

        await adapter.aclose()

    @pytest.mark.asyncio
    async def test_hnsw_with_different_metrics(self, temp_db_path: Path) -> None:
        """Test HNSW indexes with different distance metrics."""
        # Test with L2 (Euclidean) distance
        # Note: DuckDB VSS uses "l2sq" for Euclidean distance, not "euclidean"
        settings_l2 = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_l2",
            distance_metric="l2sq",  # DuckDB VSS uses "l2sq" for Euclidean distance
            enable_hnsw_index=True,
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings_l2)
        await adapter.initialize()

        # Verify index was created successfully
        result = adapter.conn.execute(
            "SELECT index_name FROM duckdb_indexes() WHERE index_name LIKE '%conv_embeddings_hnsw'"
        ).fetchone()

        assert result is not None
        # Column 0 is index_name
        assert "conv_embeddings_hnsw" in result[0]

        await adapter.aclose()


class TestHNSWPerformanceBasics:
    """Basic tests to verify HNSW functionality for performance improvements."""

    @pytest.fixture
    def temp_db_path(self, tmp_path: Path) -> Path:
        """Create temporary database path."""
        return tmp_path / "test_perf.duckdb"

    @pytest.mark.asyncio
    async def test_index_exists_check(self, temp_db_path: Path) -> None:
        """Test that we can check if HNSW indexes exist."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_exist",
            enable_hnsw_index=True,
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)
        await adapter.initialize()

        # Verify we can query for HNSW indexes
        result = adapter.conn.execute(
            """
            SELECT table_name, index_name
            FROM duckdb_indexes()
            WHERE index_name LIKE '%hnsw%'
            """
        ).fetchall()

        assert len(result) >= 2  # At least conversations and reflections

        await adapter.aclose()

    @pytest.mark.asyncio
    async def test_multiple_collections_independent_indexes(self, temp_db_path: Path) -> None:
        """Test that different collections get independent HNSW indexes."""
        # Create first collection
        settings1 = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="collection1",
            enable_hnsw_index=True,
        )
        adapter1 = ReflectionDatabaseAdapterOneiric(settings=settings1)
        await adapter1.initialize()

        # Create second collection
        settings2 = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="collection2",
            enable_hnsw_index=True,
        )
        adapter2 = ReflectionDatabaseAdapterOneiric(settings=settings2)
        await adapter2.initialize()

        # Check that both collections have their own HNSW indexes
        # Note: SELECT index_name means column 0 is the index name
        result = adapter1.conn.execute(
            "SELECT index_name FROM duckdb_indexes() WHERE index_name LIKE '%hnsw%'"
        ).fetchall()

        # Column 0 is index_name (from SELECT clause)
        collection1_indexes = [row[0] for row in result if "collection1" in row[0]]
        collection2_indexes = [row[0] for row in result if "collection2" in row[0]]

        # Each collection should have at least 2 HNSW indexes (conversations + reflections)
        assert len(collection1_indexes) >= 2
        assert len(collection2_indexes) >= 2

        await adapter1.aclose()
        await adapter2.aclose()


class TestHNSWWithRealData:
    """Integration tests with realistic data volumes."""

    @pytest.fixture
    def temp_db_path(self, tmp_path: Path) -> Path:
        """Create temporary database path."""
        return tmp_path / "test_realdata.duckdb"

    @pytest.mark.asyncio
    async def test_search_accuracy_with_hnsw(self, temp_db_path: Path) -> None:
        """Test that HNSW search maintains accuracy compared to baseline."""
        settings = ReflectionAdapterSettings(
            database_path=temp_db_path,
            collection_name="test_accuracy",
            enable_hnsw_index=True,
        )

        adapter = ReflectionDatabaseAdapterOneiric(settings=settings)
        await adapter.initialize()

        # Add related conversations
        await adapter.store_conversation("How to write Python functions", {"topic": "python"})
        await adapter.store_conversation("Python class inheritance tutorial", {"topic": "python"})
        await adapter.store_conversation("JavaScript function examples", {"topic": "javascript"})
        await adapter.store_conversation("Database query optimization", {"topic": "database"})

        # Search for Python-related content
        results = await adapter.search_conversations(
            "Python programming", limit=5, threshold=0.5
        )

        # Python results should rank higher than JavaScript
        python_results = [r for r in results if "python" in r["content"].lower()]
        js_results = [r for r in results if "javascript" in r["content"].lower()]

        # Python should appear before JavaScript in results
        if python_results and js_results:
            first_python_idx = next(i for i, r in enumerate(results) if "python" in r["content"].lower())
            first_js_idx = next(i for i, r in enumerate(results) if "javascript" in r["content"].lower())
            assert first_python_idx < first_js_idx, "Python results should rank higher"

        await adapter.aclose()