Simplenote MCP Server

test_search_edge_cases.py•9.85 KiB

"""Tests for edge cases in the advanced search functionality."""

from datetime import datetime, timedelta

import pytest

from simplenote_mcp.server.search.engine import SearchEngine
from simplenote_mcp.server.search.parser import QueryParser, TokenType


class TestQueryParserEdgeCases:
    """Test edge cases for QueryParser."""

    def test_empty_query(self) -> None:
        """Test parsing an empty query."""
        parser = QueryParser("")
        assert len(parser.tokens) == 0

        parser = QueryParser("   ")
        assert len(parser.tokens) == 0

    def test_basic_query_structure(self) -> None:
        """Test basic parsing of queries to ensure tokens are created properly."""
        # For now, test simpler queries since grouping with parentheses
        # might not be fully implemented yet
        parser = QueryParser("term1 AND term2 OR term3")
        tokens = parser.tokens

        # Check that the basic operators are recognized
        assert any(token.type == TokenType.AND for token in tokens)
        assert any(token.type == TokenType.OR for token in tokens)

        # Check terms
        terms = [token.value for token in tokens if token.type == TokenType.TERM]
        assert "term1" in terms
        assert "term2" in terms
        assert "term3" in terms

    def test_alternative_operators(self) -> None:
        """Test parsing with alternative operator syntax."""
        # Test that different case variations work
        parser = QueryParser("term1 and term2 OR term3")
        tokens = parser.tokens

        # Check operators (case insensitivity handled in normalization)
        assert any(token.type == TokenType.AND for token in tokens)
        assert any(token.type == TokenType.OR for token in tokens)

        # Also check NOT operator
        parser = QueryParser("term1 not term2")
        tokens = parser.tokens
        assert any(token.type == TokenType.NOT for token in tokens)

    def test_special_characters(self) -> None:
        """Test parsing with special characters."""
        # These should be handled as part of regular terms
        parser = QueryParser("term-with-dashes term_with_underscores term+with+plus")
        tokens = parser.tokens

        # Extract just the term values
        term_values = [token.value for token in tokens if token.type == TokenType.TERM]

        assert "term-with-dashes" in term_values
        assert "term_with_underscores" in term_values
        assert "term+with+plus" in term_values

    def test_case_insensitivity(self) -> None:
        """Test case insensitivity in operator parsing."""
        # Test lowercase, uppercase, and mixed case operators
        parser = QueryParser("term1 and term2 OR term3 NoT term4")
        tokens = parser.tokens

        # Count operators
        operators = [
            token.type
            for token in tokens
            if token.type in (TokenType.AND, TokenType.OR, TokenType.NOT)
        ]

        # Should have found all the operators regardless of case
        assert TokenType.AND in operators
        assert TokenType.OR in operators
        assert TokenType.NOT in operators


class TestSearchEngineEdgeCases:
    """Test edge cases for SearchEngine."""

    @pytest.fixture
    def edge_case_notes(self) -> dict[str, dict[str, str | list[str] | datetime]]:
        """Sample notes for edge case testing."""
        now = datetime.now()
        yesterday = now - timedelta(days=1)
        last_week = now - timedelta(days=7)

        return {
            "empty": {
                "key": "empty",
                "content": "",
                "tags": [],
                "modifydate": now.isoformat(),
            },
            "special_chars": {
                "key": "special_chars",
                "content": "Contains special chars: !@#$%^&*()_-+={}[]|\\:;\"'<>,.?/",
                "tags": ["special"],
                "modifydate": yesterday.isoformat(),
            },
            "duplicate_terms": {
                "key": "duplicate_terms",
                "content": "test test test test this has the term test repeated many times test test",
                "tags": ["test"],
                "modifydate": last_week.isoformat(),
            },
            "mixed_case": {
                "key": "mixed_case",
                "content": "This Has Mixed CASE content Test tEsT TEST test",
                "tags": ["TEST"],
                "modifydate": now.isoformat(),
            },
            "unicode": {
                "key": "unicode",
                "content": "Contains unicode: 你好 привет こんにちは مرحبا",
                "tags": ["unicode"],
                "modifydate": yesterday.isoformat(),
            },
            "invalid_date": {
                "key": "invalid_date",
                "content": "This note has an invalid date format",
                "tags": [],
                "modifydate": "not-a-date",
            },
            "no_date": {
                "key": "no_date",
                "content": "This note has no date",
                "tags": [],
            },
        }

    def test_empty_query(self, edge_case_notes: dict) -> None:
        """Test search with empty query but with filters."""
        engine = SearchEngine()

        # Empty query with tag filter (case-insensitive matching)
        results = engine.search(edge_case_notes, "", tag_filters=["test"])
        assert len(results) == 2  # Both "test" and "TEST" tags match
        result_keys = [r["key"] for r in results]
        assert "duplicate_terms" in result_keys
        assert "mixed_case" in result_keys

        # Empty query with no filters should return empty result
        results = engine.search(edge_case_notes, "")
        assert len(results) == 0

        # Empty query with date filter
        now = datetime.now()
        yesterday = now - timedelta(days=1)
        results = engine.search(edge_case_notes, "", date_range=(yesterday, now))
        # Will match notes with valid dates from yesterday and today
        assert len(results) >= 2  # At least empty and mixed_case notes

    def test_complex_boolean_expression(self, edge_case_notes: dict) -> None:
        """Test searching with complex boolean expressions."""
        engine = SearchEngine()

        # Test with simpler boolean expressions first
        results = engine.search(edge_case_notes, "test")
        assert len(results) >= 2  # Should match duplicate_terms and mixed_case

        results = engine.search(edge_case_notes, "unicode")
        assert len(results) >= 1  # Should match the unicode note

        # Test NOT operator
        results = engine.search(edge_case_notes, "test NOT mixed")
        assert len(results) >= 1  # Should match duplicate_terms but not mixed_case
        assert any(note["key"] == "duplicate_terms" for note in results)

    def test_case_insensitivity(self, edge_case_notes: dict) -> None:
        """Test case insensitivity in search terms."""
        engine = SearchEngine()

        # Search for lowercase "test" should match mixed case content and tags
        results = engine.search(edge_case_notes, "test")
        assert len(results) == 2  # should match duplicate_terms and mixed_case

        # Search for uppercase "TEST" should also match
        results = engine.search(edge_case_notes, "TEST")
        assert len(results) == 2  # same results

    def test_basic_relevance_scoring(self, edge_case_notes: dict) -> None:
        """Test basic aspects of relevance scoring mechanism."""
        engine = SearchEngine()

        # Search for "test" should return duplicate_terms and mixed_case
        results = engine.search(edge_case_notes, "test")
        assert len(results) >= 2

        # Verify all expected results are present, we don't check ranking order
        # since implementation details of scoring might change
        result_keys = [note["key"] for note in results]
        assert "duplicate_terms" in result_keys
        assert "mixed_case" in result_keys

        # But we should still check that we can match a note with a term in its first line
        # Create a new note with "test" in the title line
        test_notes = dict(edge_case_notes)
        test_notes["title_match"] = {
            "key": "title_match",
            "content": "Test in the title line\nThis is the rest of the content with no other test word",
            "tags": [],
            "modifydate": datetime.now().isoformat(),
        }

        # Search again - title_match should be included in results
        results = engine.search(test_notes, "test")
        result_keys = [note["key"] for note in results]
        assert "title_match" in result_keys

    def test_date_parsing_edge_cases(self, edge_case_notes: dict) -> None:
        """Test date parsing edge cases."""
        engine = SearchEngine()

        # Test with notes that have invalid date or no date
        # This mainly checks that the search doesn't crash
        now = datetime.now()
        yesterday = now - timedelta(days=1)

        # Should skip the notes with invalid dates when filtering by date
        results = engine.search(edge_case_notes, "", date_range=(yesterday, now))
        assert all(note["key"] != "invalid_date" for note in results)
        assert all(note["key"] != "no_date" for note in results)

        # But should still find those notes when searching by content
        results = engine.search(edge_case_notes, "invalid")
        assert len(results) == 1
        assert results[0]["key"] == "invalid_date"

    def test_unicode_content(self, edge_case_notes: dict) -> None:
        """Test searching in content with unicode characters."""
        engine = SearchEngine()

        # Search for unicode content
        results = engine.search(edge_case_notes, "unicode")
        assert len(results) == 1
        assert results[0]["key"] == "unicode"

        # Search for the actual unicode characters
        results = engine.search(edge_case_notes, "привет")
        assert len(results) == 1
        assert results[0]["key"] == "unicode"

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/docdyhr/simplenote-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_search_edge_cases.py•9.85 KiB

"""Tests for edge cases in the advanced search functionality."""

from datetime import datetime, timedelta

import pytest

from simplenote_mcp.server.search.engine import SearchEngine
from simplenote_mcp.server.search.parser import QueryParser, TokenType


class TestQueryParserEdgeCases:
    """Test edge cases for QueryParser."""

    def test_empty_query(self) -> None:
        """Test parsing an empty query."""
        parser = QueryParser("")
        assert len(parser.tokens) == 0

        parser = QueryParser("   ")
        assert len(parser.tokens) == 0

    def test_basic_query_structure(self) -> None:
        """Test basic parsing of queries to ensure tokens are created properly."""
        # For now, test simpler queries since grouping with parentheses
        # might not be fully implemented yet
        parser = QueryParser("term1 AND term2 OR term3")
        tokens = parser.tokens

        # Check that the basic operators are recognized
        assert any(token.type == TokenType.AND for token in tokens)
        assert any(token.type == TokenType.OR for token in tokens)

        # Check terms
        terms = [token.value for token in tokens if token.type == TokenType.TERM]
        assert "term1" in terms
        assert "term2" in terms
        assert "term3" in terms

    def test_alternative_operators(self) -> None:
        """Test parsing with alternative operator syntax."""
        # Test that different case variations work
        parser = QueryParser("term1 and term2 OR term3")
        tokens = parser.tokens

        # Check operators (case insensitivity handled in normalization)
        assert any(token.type == TokenType.AND for token in tokens)
        assert any(token.type == TokenType.OR for token in tokens)

        # Also check NOT operator
        parser = QueryParser("term1 not term2")
        tokens = parser.tokens
        assert any(token.type == TokenType.NOT for token in tokens)

    def test_special_characters(self) -> None:
        """Test parsing with special characters."""
        # These should be handled as part of regular terms
        parser = QueryParser("term-with-dashes term_with_underscores term+with+plus")
        tokens = parser.tokens

        # Extract just the term values
        term_values = [token.value for token in tokens if token.type == TokenType.TERM]

        assert "term-with-dashes" in term_values
        assert "term_with_underscores" in term_values
        assert "term+with+plus" in term_values

    def test_case_insensitivity(self) -> None:
        """Test case insensitivity in operator parsing."""
        # Test lowercase, uppercase, and mixed case operators
        parser = QueryParser("term1 and term2 OR term3 NoT term4")
        tokens = parser.tokens

        # Count operators
        operators = [
            token.type
            for token in tokens
            if token.type in (TokenType.AND, TokenType.OR, TokenType.NOT)
        ]

        # Should have found all the operators regardless of case
        assert TokenType.AND in operators
        assert TokenType.OR in operators
        assert TokenType.NOT in operators


class TestSearchEngineEdgeCases:
    """Test edge cases for SearchEngine."""

    @pytest.fixture
    def edge_case_notes(self) -> dict[str, dict[str, str | list[str] | datetime]]:
        """Sample notes for edge case testing."""
        now = datetime.now()
        yesterday = now - timedelta(days=1)
        last_week = now - timedelta(days=7)

        return {
            "empty": {
                "key": "empty",
                "content": "",
                "tags": [],
                "modifydate": now.isoformat(),
            },
            "special_chars": {
                "key": "special_chars",
                "content": "Contains special chars: !@#$%^&*()_-+={}[]|\\:;\"'<>,.?/",
                "tags": ["special"],
                "modifydate": yesterday.isoformat(),
            },
            "duplicate_terms": {
                "key": "duplicate_terms",
                "content": "test test test test this has the term test repeated many times test test",
                "tags": ["test"],
                "modifydate": last_week.isoformat(),
            },
            "mixed_case": {
                "key": "mixed_case",
                "content": "This Has Mixed CASE content Test tEsT TEST test",
                "tags": ["TEST"],
                "modifydate": now.isoformat(),
            },
            "unicode": {
                "key": "unicode",
                "content": "Contains unicode: 你好 привет こんにちは مرحبا",
                "tags": ["unicode"],
                "modifydate": yesterday.isoformat(),
            },
            "invalid_date": {
                "key": "invalid_date",
                "content": "This note has an invalid date format",
                "tags": [],
                "modifydate": "not-a-date",
            },
            "no_date": {
                "key": "no_date",
                "content": "This note has no date",
                "tags": [],
            },
        }

    def test_empty_query(self, edge_case_notes: dict) -> None:
        """Test search with empty query but with filters."""
        engine = SearchEngine()

        # Empty query with tag filter (case-insensitive matching)
        results = engine.search(edge_case_notes, "", tag_filters=["test"])
        assert len(results) == 2  # Both "test" and "TEST" tags match
        result_keys = [r["key"] for r in results]
        assert "duplicate_terms" in result_keys
        assert "mixed_case" in result_keys

        # Empty query with no filters should return empty result
        results = engine.search(edge_case_notes, "")
        assert len(results) == 0

        # Empty query with date filter
        now = datetime.now()
        yesterday = now - timedelta(days=1)
        results = engine.search(edge_case_notes, "", date_range=(yesterday, now))
        # Will match notes with valid dates from yesterday and today
        assert len(results) >= 2  # At least empty and mixed_case notes

    def test_complex_boolean_expression(self, edge_case_notes: dict) -> None:
        """Test searching with complex boolean expressions."""
        engine = SearchEngine()

        # Test with simpler boolean expressions first
        results = engine.search(edge_case_notes, "test")
        assert len(results) >= 2  # Should match duplicate_terms and mixed_case

        results = engine.search(edge_case_notes, "unicode")
        assert len(results) >= 1  # Should match the unicode note

        # Test NOT operator
        results = engine.search(edge_case_notes, "test NOT mixed")
        assert len(results) >= 1  # Should match duplicate_terms but not mixed_case
        assert any(note["key"] == "duplicate_terms" for note in results)

    def test_case_insensitivity(self, edge_case_notes: dict) -> None:
        """Test case insensitivity in search terms."""
        engine = SearchEngine()

        # Search for lowercase "test" should match mixed case content and tags
        results = engine.search(edge_case_notes, "test")
        assert len(results) == 2  # should match duplicate_terms and mixed_case

        # Search for uppercase "TEST" should also match
        results = engine.search(edge_case_notes, "TEST")
        assert len(results) == 2  # same results

    def test_basic_relevance_scoring(self, edge_case_notes: dict) -> None:
        """Test basic aspects of relevance scoring mechanism."""
        engine = SearchEngine()

        # Search for "test" should return duplicate_terms and mixed_case
        results = engine.search(edge_case_notes, "test")
        assert len(results) >= 2

        # Verify all expected results are present, we don't check ranking order
        # since implementation details of scoring might change
        result_keys = [note["key"] for note in results]
        assert "duplicate_terms" in result_keys
        assert "mixed_case" in result_keys

        # But we should still check that we can match a note with a term in its first line
        # Create a new note with "test" in the title line
        test_notes = dict(edge_case_notes)
        test_notes["title_match"] = {
            "key": "title_match",
            "content": "Test in the title line\nThis is the rest of the content with no other test word",
            "tags": [],
            "modifydate": datetime.now().isoformat(),
        }

        # Search again - title_match should be included in results
        results = engine.search(test_notes, "test")
        result_keys = [note["key"] for note in results]
        assert "title_match" in result_keys

    def test_date_parsing_edge_cases(self, edge_case_notes: dict) -> None:
        """Test date parsing edge cases."""
        engine = SearchEngine()

        # Test with notes that have invalid date or no date
        # This mainly checks that the search doesn't crash
        now = datetime.now()
        yesterday = now - timedelta(days=1)

        # Should skip the notes with invalid dates when filtering by date
        results = engine.search(edge_case_notes, "", date_range=(yesterday, now))
        assert all(note["key"] != "invalid_date" for note in results)
        assert all(note["key"] != "no_date" for note in results)

        # But should still find those notes when searching by content
        results = engine.search(edge_case_notes, "invalid")
        assert len(results) == 1
        assert results[0]["key"] == "invalid_date"

    def test_unicode_content(self, edge_case_notes: dict) -> None:
        """Test searching in content with unicode characters."""
        engine = SearchEngine()

        # Search for unicode content
        results = engine.search(edge_case_notes, "unicode")
        assert len(results) == 1
        assert results[0]["key"] == "unicode"

        # Search for the actual unicode characters
        results = engine.search(edge_case_notes, "привет")
        assert len(results) == 1
        assert results[0]["key"] == "unicode"