We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/doobidoo/mcp-memory-service'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
"""
Integration tests for lightweight ONNX quality scoring without transformers.
Tests the complete workflow of:
1. ONNX model loading with tokenizers package only
2. Quality scoring with both classifier and cross-encoder models
3. Auto quality scoring integration in memory service
4. Fallback behavior when dependencies unavailable
Author: Generated for PR #337
"""
import pytest
import asyncio
import numpy as np
from pathlib import Path
from unittest.mock import Mock, AsyncMock, patch
# Skip all tests if ONNX Runtime not available
try:
import onnxruntime as ort
ONNX_AVAILABLE = True
except ImportError:
ONNX_AVAILABLE = False
# Check tokenizers availability
try:
from tokenizers import Tokenizer
TOKENIZERS_AVAILABLE = True
except ImportError:
TOKENIZERS_AVAILABLE = False
@pytest.mark.skipif(not ONNX_AVAILABLE, reason="Requires ONNX Runtime")
class TestLightweightONNXSetup:
"""Test ONNX quality scoring without transformers dependency."""
MODEL_NAME = "nvidia-quality-classifier-deberta"
MODEL_PATH = Path.home() / ".cache" / "mcp_memory" / "onnx_models" / MODEL_NAME
@pytest.fixture
def mock_onnx_model(self):
"""Mock ONNX model for testing without actual inference."""
mock_model = Mock()
# Mock classifier output (3 classes: high, medium, low)
mock_model.run.return_value = [np.array([[2.0, 0.5, -1.0]])] # High quality
mock_model.get_providers.return_value = ['CPUExecutionProvider']
return mock_model
@pytest.fixture
def mock_tokenizer(self):
"""Mock tokenizers package tokenizer."""
mock_tok = Mock()
mock_encoding = Mock()
mock_encoding.ids = [101] + [1234] * 50 + [102] # CLS + tokens + SEP
mock_encoding.attention_mask = [1] * 52
mock_encoding.type_ids = [0] * 52
mock_tok.encode.return_value = mock_encoding
mock_tok.enable_truncation = Mock()
mock_tok.enable_padding = Mock()
return mock_tok
def test_onnx_model_exists(self):
"""Verify ONNX model files exist in cache."""
onnx_path = self.MODEL_PATH / "model.onnx"
tokenizer_json = self.MODEL_PATH / "tokenizer.json"
# If model doesn't exist, skip test (not an error, just not downloaded yet)
if not onnx_path.exists():
pytest.skip(f"ONNX model not downloaded yet: {onnx_path}")
assert onnx_path.exists(), "model.onnx should exist"
assert onnx_path.stat().st_size > 0, "model.onnx should not be empty"
# tokenizer.json is required for lightweight setup
if not tokenizer_json.exists():
pytest.skip(f"tokenizer.json not found, transformers fallback will be used")
assert tokenizer_json.exists(), "tokenizer.json should exist for lightweight setup"
@pytest.mark.skipif(not TOKENIZERS_AVAILABLE, reason="Requires tokenizers package")
def test_tokenizers_package_loading(self):
"""Test loading tokenizer using tokenizers package (not transformers)."""
tokenizer_json = self.MODEL_PATH / "tokenizer.json"
if not tokenizer_json.exists():
pytest.skip(f"tokenizer.json not found at {tokenizer_json}")
# Load tokenizer using tokenizers package
tokenizer = Tokenizer.from_file(str(tokenizer_json))
# Test single text encoding
text = "This is a high quality memory."
encoded = tokenizer.encode(text)
assert hasattr(encoded, 'ids'), "Should have ids attribute"
assert hasattr(encoded, 'attention_mask'), "Should have attention_mask attribute"
assert len(encoded.ids) > 0, "Should produce token IDs"
assert len(encoded.ids) == len(encoded.attention_mask), "IDs and mask should match length"
@pytest.mark.skipif(not TOKENIZERS_AVAILABLE, reason="Requires tokenizers package")
def test_tokenizers_pair_encoding(self):
"""Test text pair encoding for cross-encoder models."""
tokenizer_json = self.MODEL_PATH / "tokenizer.json"
if not tokenizer_json.exists():
pytest.skip(f"tokenizer.json not found at {tokenizer_json}")
tokenizer = Tokenizer.from_file(str(tokenizer_json))
tokenizer.enable_truncation(max_length=512)
tokenizer.enable_padding(length=512)
# Encode query-document pair
query = "python async patterns"
document = "Async/await enables concurrent I/O operations."
encoded = tokenizer.encode((query, document))
assert hasattr(encoded, 'type_ids'), "Should have type_ids for pairs"
assert len(encoded.ids) == 512, "Should pad/truncate to 512 tokens"
assert len(encoded.attention_mask) == 512, "Attention mask should be 512"
assert len(encoded.type_ids) == 512, "Type IDs should be 512"
# Verify token type IDs separate query (0) from document (1)
assert 0 in encoded.type_ids, "Should have query tokens (type 0)"
assert 1 in encoded.type_ids, "Should have document tokens (type 1)"
@pytest.mark.xfail(reason="Needs refactoring: tests mock internal implementation details that changed during refactoring. Should test behavior, not implementation.")
@patch('mcp_memory_service.quality.onnx_ranker.ort.InferenceSession')
@patch('tokenizers.Tokenizer')
def test_onnx_ranker_initialization_without_transformers(
self, mock_tokenizer_class, mock_inference_session, mock_tokenizer, mock_onnx_model
):
"""Test ONNXRankerModel initializes correctly without transformers."""
from mcp_memory_service.quality.onnx_ranker import ONNXRankerModel
# Setup mocks
mock_tokenizer_class.from_file.return_value = mock_tokenizer
mock_inference_session.return_value = mock_onnx_model
# Create model path mock
with patch('mcp_memory_service.quality.onnx_ranker.Path') as mock_path:
mock_model_path = Mock()
mock_model_path.exists.return_value = True
mock_onnx_path = Mock()
mock_onnx_path.exists.return_value = True
mock_tokenizer_json = Mock()
mock_tokenizer_json.exists.return_value = True
mock_path.home.return_value = Mock()
mock_path.home.return_value.__truediv__.return_value = mock_model_path
mock_model_path.__truediv__.side_effect = lambda x: {
'model.onnx': mock_onnx_path,
'tokenizer.json': mock_tokenizer_json
}.get(x, Mock())
# Initialize model
with patch('mcp_memory_service.quality.onnx_ranker.TRANSFORMERS_AVAILABLE', False):
ranker = ONNXRankerModel(model_name=self.MODEL_NAME, device="cpu")
# Verify tokenizers package was used (not transformers)
mock_tokenizer_class.from_file.assert_called_once()
assert ranker._use_fast_tokenizer is True, "Should use fast tokenizer"
@pytest.mark.xfail(reason="Needs refactoring: tests mock internal implementation details that changed during refactoring. Should test behavior, not implementation.")
@patch('mcp_memory_service.quality.onnx_ranker.ort.InferenceSession')
@patch('tokenizers.Tokenizer')
def test_quality_scoring_with_classifier(
self, mock_tokenizer_class, mock_inference_session, mock_tokenizer, mock_onnx_model
):
"""Test quality scoring using classifier model (DeBERTa)."""
from mcp_memory_service.quality.onnx_ranker import ONNXRankerModel
# Setup mocks
mock_tokenizer_class.from_file.return_value = mock_tokenizer
mock_inference_session.return_value = mock_onnx_model
with patch('mcp_memory_service.quality.onnx_ranker.Path') as mock_path:
# Mock path setup (same as above)
mock_model_path = Mock()
mock_model_path.exists.return_value = True
mock_onnx_path = Mock()
mock_onnx_path.exists.return_value = True
mock_tokenizer_json = Mock()
mock_tokenizer_json.exists.return_value = True
mock_path.home.return_value = Mock()
mock_path.home.return_value.__truediv__.return_value = mock_model_path
mock_model_path.__truediv__.side_effect = lambda x: {
'model.onnx': mock_onnx_path,
'tokenizer.json': mock_tokenizer_json
}.get(x, Mock())
with patch('mcp_memory_service.quality.onnx_ranker.TRANSFORMERS_AVAILABLE', False):
ranker = ONNXRankerModel(model_name=self.MODEL_NAME, device="cpu")
# Score quality
memory_content = "This is a high quality memory about Python async patterns."
score = ranker.score_quality(query="", memory_content=memory_content)
# Verify score is valid
assert 0.0 <= score <= 1.0, f"Score {score} should be between 0 and 1"
assert score > 0.5, "High quality logits should produce high score"
# Verify tokenizer was called
mock_tokenizer.encode.assert_called_once()
@pytest.mark.xfail(reason="Needs refactoring: tests mock internal implementation details that changed during refactoring. Should test behavior, not implementation.")
@patch('mcp_memory_service.quality.onnx_ranker.ort.InferenceSession')
@patch('tokenizers.Tokenizer')
def test_quality_scoring_with_cross_encoder(
self, mock_tokenizer_class, mock_inference_session, mock_tokenizer
):
"""Test quality scoring using cross-encoder model (MS-MARCO)."""
from mcp_memory_service.quality.onnx_ranker import ONNXRankerModel
# Mock cross-encoder output (binary classification)
mock_model = Mock()
mock_model.run.return_value = [np.array([[1.5]])] # Positive logit = high relevance
mock_model.get_providers.return_value = ['CPUExecutionProvider']
# Setup mocks
mock_tokenizer_class.from_file.return_value = mock_tokenizer
mock_inference_session.return_value = mock_model
with patch('mcp_memory_service.quality.onnx_ranker.Path') as mock_path:
# Mock path setup
mock_model_path = Mock()
mock_model_path.exists.return_value = True
mock_onnx_path = Mock()
mock_onnx_path.exists.return_value = True
mock_tokenizer_json = Mock()
mock_tokenizer_json.exists.return_value = True
mock_path.home.return_value = Mock()
mock_path.home.return_value.__truediv__.return_value = mock_model_path
mock_model_path.__truediv__.side_effect = lambda x: {
'model.onnx': mock_onnx_path,
'tokenizer.json': mock_tokenizer_json
}.get(x, Mock())
# Override model config to cross-encoder
with patch('mcp_memory_service.quality.onnx_ranker.validate_model_selection') as mock_validate:
mock_validate.return_value = {
'name': 'ms-marco-cross-encoder',
'type': 'cross-encoder',
'repo': 'cross-encoder/ms-marco-MiniLM-L-6-v2',
'onnx_file': 'model.onnx'
}
with patch('mcp_memory_service.quality.onnx_ranker.TRANSFORMERS_AVAILABLE', False):
ranker = ONNXRankerModel(model_name='ms-marco-cross-encoder', device="cpu")
# Score with query
query = "python async patterns"
document = "Async/await enables concurrent I/O operations."
score = ranker.score_quality(query=query, memory_content=document)
# Verify score is valid
assert 0.0 <= score <= 1.0, f"Score {score} should be between 0 and 1"
# Verify pair encoding was used (tokenizer.encode((query, document)))
# The mock should have been called with a tuple
call_args = mock_tokenizer.encode.call_args
assert call_args is not None, "Tokenizer should have been called"
@pytest.mark.xfail(reason="Needs refactoring: mock storage doesn't properly simulate real storage behavior. Rewrite to use actual test storage.")
@pytest.mark.asyncio
async def test_auto_quality_scoring_after_store(self):
"""Test automatic quality scoring is triggered after memory store."""
from mcp_memory_service.services.memory_service import MemoryService
from mcp_memory_service.models.memory import Memory
# Mock storage
mock_storage = AsyncMock()
mock_storage.store.return_value = (True, "Success")
# Mock async scorer
with patch('mcp_memory_service.services.memory_service.async_scorer') as mock_scorer:
mock_scorer.score_memory = AsyncMock()
# Enable quality boost
with patch('mcp_memory_service.services.memory_service.MCP_QUALITY_BOOST_ENABLED', True):
service = MemoryService(storage=mock_storage)
# Store memory
result = await service.store_memory(
content="Test memory content",
tags=["test"],
memory_type="note"
)
# Verify store succeeded
assert result["success"] is True
# Verify async scorer was called
mock_scorer.score_memory.assert_called_once()
call_args = mock_scorer.score_memory.call_args
assert call_args[1]['storage'] == mock_storage, "Should pass storage to scorer"
@pytest.mark.xfail(reason="Needs refactoring: mock storage doesn't properly simulate real storage behavior. Rewrite to use actual test storage.")
@pytest.mark.asyncio
async def test_auto_quality_scoring_after_retrieve(self):
"""Test automatic quality scoring is triggered after memory retrieval."""
from mcp_memory_service.services.memory_service import MemoryService
from mcp_memory_service.models.memory import Memory
from mcp_memory_service.storage.base import SearchResult
# Mock storage with retrieve results
mock_storage = AsyncMock()
mock_memory = Memory(
content="Test memory",
content_hash="abc123",
tags=["test"],
memory_type="note"
)
mock_storage.retrieve.return_value = [
SearchResult(memory=mock_memory, relevance_score=0.9)
]
# Mock async scorer
with patch('mcp_memory_service.services.memory_service.async_scorer') as mock_scorer:
mock_scorer.score_memory = AsyncMock()
# Enable quality boost
with patch('mcp_memory_service.services.memory_service.MCP_QUALITY_BOOST_ENABLED', True):
service = MemoryService(storage=mock_storage)
# Retrieve memories
result = await service.retrieve_memories(
query="test query",
n_results=5
)
# Verify retrieve succeeded
assert len(result["memories"]) == 1
# Verify async scorer was called
mock_scorer.score_memory.assert_called_once()
call_args = mock_scorer.score_memory.call_args
assert call_args[0][0] == mock_memory, "Should score retrieved memory"
assert call_args[1]['query'] == "test query", "Should pass query to scorer"
@pytest.mark.xfail(reason="Needs refactoring: mock storage doesn't properly simulate real storage behavior. Rewrite to use actual test storage.")
@pytest.mark.asyncio
async def test_quality_scoring_silent_failure(self):
"""Test quality scoring failures don't break memory operations."""
from mcp_memory_service.services.memory_service import MemoryService
# Mock storage
mock_storage = AsyncMock()
mock_storage.store.return_value = (True, "Success")
# Mock async scorer to raise exception
with patch('mcp_memory_service.services.memory_service.async_scorer') as mock_scorer:
mock_scorer.score_memory = AsyncMock(side_effect=Exception("Scorer failed"))
# Enable quality boost
with patch('mcp_memory_service.services.memory_service.MCP_QUALITY_BOOST_ENABLED', True):
service = MemoryService(storage=mock_storage)
# Store memory should still succeed even if scoring fails
result = await service.store_memory(
content="Test memory content",
tags=["test"],
memory_type="note"
)
# Verify store succeeded despite scorer failure
assert result["success"] is True, "Store should succeed even if quality scoring fails"
@pytest.mark.xfail(reason="Needs refactoring: tests mock internal implementation details that changed during refactoring. Should test behavior, not implementation.")
def test_fallback_to_transformers(self):
"""Test graceful fallback to transformers when tokenizers unavailable."""
from mcp_memory_service.quality.onnx_ranker import ONNXRankerModel
# Mock tokenizers import failure
with patch('mcp_memory_service.quality.onnx_ranker.TOKENIZERS_AVAILABLE', False):
with patch('mcp_memory_service.quality.onnx_ranker.TRANSFORMERS_AVAILABLE', True):
with patch('mcp_memory_service.quality.onnx_ranker.AutoTokenizer') as mock_auto_tokenizer:
with patch('mcp_memory_service.quality.onnx_ranker.ort.InferenceSession'):
with patch('mcp_memory_service.quality.onnx_ranker.Path') as mock_path:
# Mock path setup
mock_model_path = Mock()
mock_model_path.exists.return_value = True
mock_onnx_path = Mock()
mock_onnx_path.exists.return_value = True
mock_tokenizer_json = Mock()
mock_tokenizer_json.exists.return_value = False # No tokenizer.json
mock_path.home.return_value = Mock()
mock_path.home.return_value.__truediv__.return_value = mock_model_path
mock_model_path.__truediv__.side_effect = lambda x: {
'model.onnx': mock_onnx_path,
'tokenizer.json': mock_tokenizer_json
}.get(x, Mock())
ranker = ONNXRankerModel(model_name=self.MODEL_NAME, device="cpu")
# Verify transformers was used instead of tokenizers
mock_auto_tokenizer.from_pretrained.assert_called_once()
assert ranker._use_fast_tokenizer is False, "Should use slow tokenizer"
@pytest.mark.xfail(reason="Needs refactoring: tests mock internal implementation details that changed during refactoring. Should test behavior, not implementation.")
def test_error_on_missing_dependencies(self):
"""Test error when neither tokenizers nor transformers available."""
from mcp_memory_service.quality.onnx_ranker import ONNXRankerModel
# Mock both packages unavailable
with patch('mcp_memory_service.quality.onnx_ranker.TOKENIZERS_AVAILABLE', False):
with patch('mcp_memory_service.quality.onnx_ranker.TRANSFORMERS_AVAILABLE', False):
with patch('mcp_memory_service.quality.onnx_ranker.Path') as mock_path:
# Mock ONNX model already exists
mock_model_path = Mock()
mock_model_path.exists.return_value = True
mock_onnx_path = Mock()
mock_onnx_path.exists.return_value = True
mock_tokenizer_json = Mock()
mock_tokenizer_json.exists.return_value = False
mock_path.home.return_value = Mock()
mock_path.home.return_value.__truediv__.return_value = mock_model_path
mock_model_path.__truediv__.side_effect = lambda x: {
'model.onnx': mock_onnx_path,
'tokenizer.json': mock_tokenizer_json
}.get(x, Mock())
# Should raise ImportError
with pytest.raises(ImportError, match="Neither tokenizers nor transformers available"):
with patch('mcp_memory_service.quality.onnx_ranker.ort.InferenceSession'):
ranker = ONNXRankerModel(model_name=self.MODEL_NAME, device="cpu")
@pytest.mark.integration
class TestLightweightONNXEndToEnd:
"""End-to-end integration tests requiring actual model files."""
@pytest.mark.xfail(reason="Integration test requires ONNX models to be downloaded. Models may not be available in all test environments.")
@pytest.mark.skipif(
not ONNX_AVAILABLE or not TOKENIZERS_AVAILABLE,
reason="Requires ONNX Runtime and tokenizers package"
)
def test_real_onnx_inference(self):
"""Test actual ONNX inference with real model (if available)."""
from mcp_memory_service.quality.onnx_ranker import get_onnx_ranker_model
# Try to load real model
ranker = get_onnx_ranker_model(device="cpu")
if ranker is None:
pytest.skip("ONNX model not available (not downloaded yet)")
# Run real inference
memory_content = (
"This is a detailed explanation of Python's asyncio library. "
"It covers event loops, coroutines, tasks, and futures. "
"Best practices for async/await patterns are included."
)
score = ranker.score_quality(query="", memory_content=memory_content)
# Verify valid score
assert 0.0 <= score <= 1.0, f"Score {score} should be between 0 and 1"
assert score > 0.3, "Detailed content should score reasonably well"
@pytest.mark.skipif(
not ONNX_AVAILABLE or not TOKENIZERS_AVAILABLE,
reason="Requires ONNX Runtime and tokenizers package"
)
def test_disk_usage_reduction(self):
"""Verify lightweight setup doesn't require transformers installation."""
import sys
# Check if transformers is installed
transformers_installed = 'transformers' in sys.modules or \
any('transformers' in str(p) for p in sys.path)
# If transformers is installed, this test can't verify lightweight setup
if transformers_installed:
pytest.skip("Transformers already installed, can't verify lightweight setup")
# Try to use ONNX ranker without transformers
from mcp_memory_service.quality.onnx_ranker import get_onnx_ranker_model
ranker = get_onnx_ranker_model(device="cpu")
if ranker is None:
pytest.skip("ONNX model not available")
# Should work without transformers
assert ranker._use_fast_tokenizer is True, "Should use tokenizers package"
assert ranker._tokenizer is not None, "Should have loaded tokenizer"
if __name__ == "__main__":
# Run tests
pytest.main([__file__, "-v", "--tb=short"])