principia-mcp

test_search.py•5.17 KiB

"""Tests for FTS5 SearchIndex."""

import os
import tempfile
from pathlib import Path

import pytest

from principia_mcp.models import Paper
from principia_mcp.search import SearchIndex
from principia_mcp.zotero import ZoteroReader


@pytest.fixture
def temp_cache_dir():
    """Create a temporary cache directory for the search index."""
    with tempfile.TemporaryDirectory() as tmpdir:
        yield Path(tmpdir)


@pytest.fixture
def sample_papers():
    """Create sample papers for testing."""
    return [
        Paper(
            id=1,
            key="ABC123",
            title="Machine Learning for Natural Language Processing",
            authors=["Alice Smith", "Bob Jones"],
            abstract="This paper explores deep learning techniques for NLP tasks.",
            collections=["AI Research", "NLP"],
        ),
        Paper(
            id=2,
            key="DEF456",
            title="Quantum Computing Fundamentals",
            authors=["Carol White"],
            abstract="An introduction to quantum algorithms and quantum supremacy.",
            collections=["Physics", "Computing"],
        ),
        Paper(
            id=3,
            key="GHI789",
            title="Neural Networks in Computer Vision",
            authors=["David Brown", "Eve Green"],
            abstract="Convolutional neural networks for image classification.",
            collections=["AI Research", "Computer Vision"],
        ),
    ]


@pytest.fixture
def index(temp_cache_dir, sample_papers):
    """Create a SearchIndex with sample papers."""
    idx = SearchIndex(cache_dir=temp_cache_dir)
    idx.build(sample_papers, zotero_mtime=1000.0)
    return idx


def test_index_build(temp_cache_dir, sample_papers):
    """Test that index builds successfully."""
    idx = SearchIndex(cache_dir=temp_cache_dir)
    assert idx.is_empty()

    idx.build(sample_papers, zotero_mtime=1000.0)
    assert not idx.is_empty()


def test_search_returns_results(index):
    """Test that search returns matching paper IDs."""
    results = index.search("machine learning")
    assert 1 in results  # Paper about ML


def test_search_ranking(index):
    """Test that more relevant results come first."""
    results = index.search("neural networks")
    assert len(results) >= 1
    # Paper 3 has "Neural Networks" in title, should rank high
    assert 3 in results


def test_search_no_results(index):
    """Test search with no matches returns empty list."""
    results = index.search("xyznonexistent123")
    assert results == []


def test_collection_filtering(index):
    """Test filtering search results by collection."""
    # Search in AI Research collection only
    results = index.search("neural", collection="AI Research")
    assert 3 in results  # Neural Networks paper is in AI Research
    assert 2 not in results  # Quantum paper is not in AI Research


def test_collection_filtering_no_match(index):
    """Test collection filter with no matching papers."""
    results = index.search("quantum", collection="AI Research")
    assert results == []  # Quantum paper is not in AI Research


def test_needs_rebuild_new_index(temp_cache_dir):
    """Test needs_rebuild returns True for empty index."""
    idx = SearchIndex(cache_dir=temp_cache_dir)
    assert idx.needs_rebuild(1000.0)


def test_needs_rebuild_stale(index):
    """Test needs_rebuild returns True when Zotero is newer."""
    # Index was built with mtime=1000.0
    assert index.needs_rebuild(2000.0)  # Newer mtime


def test_needs_rebuild_current(index):
    """Test needs_rebuild returns False when index is current."""
    # Index was built with mtime=1000.0
    assert not index.needs_rebuild(1000.0)  # Same mtime
    assert not index.needs_rebuild(500.0)  # Older mtime


def test_is_empty(temp_cache_dir):
    """Test is_empty correctly detects empty index."""
    idx = SearchIndex(cache_dir=temp_cache_dir)
    assert idx.is_empty()


def test_is_not_empty(index):
    """Test is_empty returns False for populated index."""
    assert not index.is_empty()


# Integration tests with real Zotero (skipped if not available)
ZOTERO_DIR = Path(os.environ.get("ZOTERO_DIR", Path.home() / "Zotero"))


@pytest.fixture
def reader():
    """Create a ZoteroReader instance."""
    if not ZOTERO_DIR.exists():
        pytest.skip("Zotero directory not found")
    return ZoteroReader(ZOTERO_DIR)


def test_get_mtime(reader):
    """Test that get_mtime returns a valid timestamp."""
    mtime = reader.get_mtime()
    assert isinstance(mtime, float)
    assert mtime > 0


def test_get_all_papers(reader):
    """Test that get_all_papers returns papers without limit."""
    papers = reader.get_all_papers()
    assert isinstance(papers, list)
    # Should return more than the default limit of list_papers
    # (assuming the library has more than 50 papers)


def test_integration_build_and_search(reader, temp_cache_dir):
    """Test building index from real Zotero and searching."""
    papers = reader.get_all_papers()
    if not papers:
        pytest.skip("No papers in Zotero library")

    idx = SearchIndex(cache_dir=temp_cache_dir)
    idx.build(papers, reader.get_mtime())

    # Search for a term that should exist
    results = idx.search("the", limit=5)
    assert isinstance(results, list)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/phelps-matthew/principia-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_search.py•5.17 KiB

"""Tests for FTS5 SearchIndex."""

import os
import tempfile
from pathlib import Path

import pytest

from principia_mcp.models import Paper
from principia_mcp.search import SearchIndex
from principia_mcp.zotero import ZoteroReader


@pytest.fixture
def temp_cache_dir():
    """Create a temporary cache directory for the search index."""
    with tempfile.TemporaryDirectory() as tmpdir:
        yield Path(tmpdir)


@pytest.fixture
def sample_papers():
    """Create sample papers for testing."""
    return [
        Paper(
            id=1,
            key="ABC123",
            title="Machine Learning for Natural Language Processing",
            authors=["Alice Smith", "Bob Jones"],
            abstract="This paper explores deep learning techniques for NLP tasks.",
            collections=["AI Research", "NLP"],
        ),
        Paper(
            id=2,
            key="DEF456",
            title="Quantum Computing Fundamentals",
            authors=["Carol White"],
            abstract="An introduction to quantum algorithms and quantum supremacy.",
            collections=["Physics", "Computing"],
        ),
        Paper(
            id=3,
            key="GHI789",
            title="Neural Networks in Computer Vision",
            authors=["David Brown", "Eve Green"],
            abstract="Convolutional neural networks for image classification.",
            collections=["AI Research", "Computer Vision"],
        ),
    ]


@pytest.fixture
def index(temp_cache_dir, sample_papers):
    """Create a SearchIndex with sample papers."""
    idx = SearchIndex(cache_dir=temp_cache_dir)
    idx.build(sample_papers, zotero_mtime=1000.0)
    return idx


def test_index_build(temp_cache_dir, sample_papers):
    """Test that index builds successfully."""
    idx = SearchIndex(cache_dir=temp_cache_dir)
    assert idx.is_empty()

    idx.build(sample_papers, zotero_mtime=1000.0)
    assert not idx.is_empty()


def test_search_returns_results(index):
    """Test that search returns matching paper IDs."""
    results = index.search("machine learning")
    assert 1 in results  # Paper about ML


def test_search_ranking(index):
    """Test that more relevant results come first."""
    results = index.search("neural networks")
    assert len(results) >= 1
    # Paper 3 has "Neural Networks" in title, should rank high
    assert 3 in results


def test_search_no_results(index):
    """Test search with no matches returns empty list."""
    results = index.search("xyznonexistent123")
    assert results == []


def test_collection_filtering(index):
    """Test filtering search results by collection."""
    # Search in AI Research collection only
    results = index.search("neural", collection="AI Research")
    assert 3 in results  # Neural Networks paper is in AI Research
    assert 2 not in results  # Quantum paper is not in AI Research


def test_collection_filtering_no_match(index):
    """Test collection filter with no matching papers."""
    results = index.search("quantum", collection="AI Research")
    assert results == []  # Quantum paper is not in AI Research


def test_needs_rebuild_new_index(temp_cache_dir):
    """Test needs_rebuild returns True for empty index."""
    idx = SearchIndex(cache_dir=temp_cache_dir)
    assert idx.needs_rebuild(1000.0)


def test_needs_rebuild_stale(index):
    """Test needs_rebuild returns True when Zotero is newer."""
    # Index was built with mtime=1000.0
    assert index.needs_rebuild(2000.0)  # Newer mtime


def test_needs_rebuild_current(index):
    """Test needs_rebuild returns False when index is current."""
    # Index was built with mtime=1000.0
    assert not index.needs_rebuild(1000.0)  # Same mtime
    assert not index.needs_rebuild(500.0)  # Older mtime


def test_is_empty(temp_cache_dir):
    """Test is_empty correctly detects empty index."""
    idx = SearchIndex(cache_dir=temp_cache_dir)
    assert idx.is_empty()


def test_is_not_empty(index):
    """Test is_empty returns False for populated index."""
    assert not index.is_empty()


# Integration tests with real Zotero (skipped if not available)
ZOTERO_DIR = Path(os.environ.get("ZOTERO_DIR", Path.home() / "Zotero"))


@pytest.fixture
def reader():
    """Create a ZoteroReader instance."""
    if not ZOTERO_DIR.exists():
        pytest.skip("Zotero directory not found")
    return ZoteroReader(ZOTERO_DIR)


def test_get_mtime(reader):
    """Test that get_mtime returns a valid timestamp."""
    mtime = reader.get_mtime()
    assert isinstance(mtime, float)
    assert mtime > 0


def test_get_all_papers(reader):
    """Test that get_all_papers returns papers without limit."""
    papers = reader.get_all_papers()
    assert isinstance(papers, list)
    # Should return more than the default limit of list_papers
    # (assuming the library has more than 50 papers)


def test_integration_build_and_search(reader, temp_cache_dir):
    """Test building index from real Zotero and searching."""
    papers = reader.get_all_papers()
    if not papers:
        pytest.skip("No papers in Zotero library")

    idx = SearchIndex(cache_dir=temp_cache_dir)
    idx.build(papers, reader.get_mtime())

    # Search for a term that should exist
    results = idx.search("the", limit=5)
    assert isinstance(results, list)