Skip to main content
Glama
test_list_command.py13.5 kB
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Tests for the `local-faiss list` command. """ import json import os import tempfile from pathlib import Path from unittest.mock import patch, MagicMock import pytest from local_faiss_mcp.cli import cmd_list class MockArgs: """Mock args object for testing.""" def __init__(self, json_output=False): self.json = json_output @pytest.fixture def temp_index_dir(): """Create a temporary directory for test index.""" with tempfile.TemporaryDirectory() as tmpdir: yield Path(tmpdir) @pytest.fixture def mock_config(temp_index_dir): """Mock get_faiss_config to return temp directory.""" config = { 'index_dir': str(temp_index_dir), 'embed_model': 'all-MiniLM-L6-v2', 'rerank_model': None } with patch('local_faiss_mcp.cli.get_faiss_config', return_value=config): yield config class TestListEmptyIndex: """Tests for listing an empty index.""" def test_list_no_metadata_file(self, mock_config, capsys): """Test list when no metadata file exists.""" args = MockArgs(json_output=False) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() assert "No documents indexed yet" in captured.out def test_list_no_metadata_file_json(self, mock_config, capsys): """Test list with --json when no metadata file exists.""" args = MockArgs(json_output=True) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() output = json.loads(captured.out) assert output == {"documents": [], "total": 0} def test_list_empty_documents_array(self, mock_config, temp_index_dir, capsys): """Test list when metadata exists but documents array is empty.""" metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps({"documents": [], "model": "test"})) args = MockArgs(json_output=False) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() assert "No documents indexed yet" in captured.out class TestListWithDocuments: """Tests for listing indexed documents.""" def test_list_single_document(self, mock_config, temp_index_dir, capsys): """Test listing a single document.""" metadata = { "documents": [ {"id": 0, "source": "test.pdf", "text": "chunk 1"} ], "model": "test" } metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps(metadata)) args = MockArgs(json_output=False) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() assert "Indexed Documents (1 total)" in captured.out assert "test.pdf" in captured.out assert "Chunks: 1" in captured.out def test_list_multiple_documents_grouped(self, mock_config, temp_index_dir, capsys): """Test listing multiple documents with chunk grouping.""" metadata = { "documents": [ {"id": 0, "source": "doc1.pdf", "text": "chunk 1"}, {"id": 1, "source": "doc1.pdf", "text": "chunk 2"}, {"id": 2, "source": "doc1.pdf", "text": "chunk 3"}, {"id": 3, "source": "doc2.md", "text": "chunk 1"}, {"id": 4, "source": "doc2.md", "text": "chunk 2"}, ], "model": "test" } metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps(metadata)) args = MockArgs(json_output=False) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() assert "Indexed Documents (2 total)" in captured.out assert "doc1.pdf" in captured.out assert "Chunks: 3" in captured.out assert "doc2.md" in captured.out assert "Chunks: 2" in captured.out class TestListJsonOutput: """Tests for JSON output format.""" def test_list_json_single_document(self, mock_config, temp_index_dir, capsys): """Test JSON output with single document.""" metadata = { "documents": [ {"id": 0, "source": "test.pdf", "text": "chunk 1"}, {"id": 1, "source": "test.pdf", "text": "chunk 2"} ], "model": "test" } metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps(metadata)) args = MockArgs(json_output=True) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() output = json.loads(captured.out) assert output["total"] == 1 assert len(output["documents"]) == 1 assert output["documents"][0]["source"] == "test.pdf" assert output["documents"][0]["chunks"] == 2 def test_list_json_multiple_documents(self, mock_config, temp_index_dir, capsys): """Test JSON output with multiple documents.""" metadata = { "documents": [ {"id": 0, "source": "a_file.txt", "text": "chunk 1"}, {"id": 1, "source": "b_file.md", "text": "chunk 1"}, {"id": 2, "source": "b_file.md", "text": "chunk 2"}, {"id": 3, "source": "c_file.pdf", "text": "chunk 1"}, ], "model": "test" } metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps(metadata)) args = MockArgs(json_output=True) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() output = json.loads(captured.out) assert output["total"] == 3 # Verify alphabetical sorting sources = [doc["source"] for doc in output["documents"]] assert sources == ["a_file.txt", "b_file.md", "c_file.pdf"] # Verify chunk counts chunks = {doc["source"]: doc["chunks"] for doc in output["documents"]} assert chunks["a_file.txt"] == 1 assert chunks["b_file.md"] == 2 assert chunks["c_file.pdf"] == 1 def test_list_json_with_indexed_at(self, mock_config, temp_index_dir, capsys): """Test JSON output includes indexed_at field.""" metadata = { "documents": [ {"id": 0, "source": "test.pdf", "text": "chunk 1", "indexed_at": "2024-12-08T14:23:00"}, {"id": 1, "source": "test.pdf", "text": "chunk 2", "indexed_at": "2024-12-08T14:23:00"} ], "model": "test" } metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps(metadata)) args = MockArgs(json_output=True) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() output = json.loads(captured.out) assert output["documents"][0]["indexed_at"] == "2024-12-08T14:23:00" class TestListWithIndexedDate: """Tests for indexed date display.""" def test_list_shows_indexed_date(self, mock_config, temp_index_dir, capsys): """Test that human-readable output shows indexed date.""" metadata = { "documents": [ {"id": 0, "source": "test.pdf", "text": "chunk 1", "indexed_at": "2024-12-08T14:23:00"} ], "model": "test" } metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps(metadata)) args = MockArgs(json_output=False) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() assert "Indexed: 2024-12-08 14:23" in captured.out def test_list_handles_missing_indexed_at(self, mock_config, temp_index_dir, capsys): """Test that list works when indexed_at is missing (backward compatible).""" metadata = { "documents": [ {"id": 0, "source": "test.pdf", "text": "chunk 1"} # No indexed_at ], "model": "test" } metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps(metadata)) args = MockArgs(json_output=False) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() assert "test.pdf" in captured.out assert "Indexed:" not in captured.out # Should not show "Indexed:" line def test_list_keeps_most_recent_indexed_at(self, mock_config, temp_index_dir, capsys): """Test that when a source has multiple chunks with different timestamps, the most recent is kept.""" metadata = { "documents": [ {"id": 0, "source": "test.pdf", "text": "chunk 1", "indexed_at": "2024-12-08T10:00:00"}, {"id": 1, "source": "test.pdf", "text": "chunk 2", "indexed_at": "2024-12-08T15:30:00"}, # Most recent {"id": 2, "source": "test.pdf", "text": "chunk 3", "indexed_at": "2024-12-08T12:00:00"} ], "model": "test" } metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps(metadata)) args = MockArgs(json_output=False) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() # Should show the most recent timestamp (15:30), not the first or middle one assert "Indexed: 2024-12-08 15:30" in captured.out assert "Chunks: 3" in captured.out def test_list_keeps_most_recent_indexed_at_json(self, mock_config, temp_index_dir, capsys): """Test that JSON output includes the most recent indexed_at.""" metadata = { "documents": [ {"id": 0, "source": "test.pdf", "text": "chunk 1", "indexed_at": "2024-12-08T10:00:00"}, {"id": 1, "source": "test.pdf", "text": "chunk 2", "indexed_at": "2024-12-08T15:30:00"}, {"id": 2, "source": "test.pdf", "text": "chunk 3", "indexed_at": "2024-12-08T12:00:00"} ], "model": "test" } metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps(metadata)) args = MockArgs(json_output=True) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() output = json.loads(captured.out) assert output["documents"][0]["indexed_at"] == "2024-12-08T15:30:00" def test_list_invalid_date_format_fallback(self, mock_config, temp_index_dir, capsys): """Test that invalid date formats are handled gracefully.""" metadata = { "documents": [ {"id": 0, "source": "test.pdf", "text": "chunk 1", "indexed_at": "not-a-valid-date"} ], "model": "test" } metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps(metadata)) args = MockArgs(json_output=False) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() # Should show raw value when parsing fails assert "Indexed: not-a-valid-date" in captured.out class TestListEdgeCases: """Tests for edge cases and error handling.""" def test_list_missing_source_fallback(self, mock_config, temp_index_dir, capsys): """Test that documents without 'source' field default to 'unknown'.""" metadata = { "documents": [ {"id": 0, "text": "chunk 1"}, # No source field {"id": 1, "text": "chunk 2"} ], "model": "test" } metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps(metadata)) args = MockArgs(json_output=False) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() assert "unknown" in captured.out assert "Chunks: 2" in captured.out def test_list_missing_source_fallback_json(self, mock_config, temp_index_dir, capsys): """Test that JSON output handles missing source field.""" metadata = { "documents": [ {"id": 0, "text": "chunk 1"}, # No source field {"id": 1, "text": "chunk 2"} ], "model": "test" } metadata_path = temp_index_dir / "metadata.json" metadata_path.write_text(json.dumps(metadata)) args = MockArgs(json_output=True) result = cmd_list(args) assert result == 0 captured = capsys.readouterr() output = json.loads(captured.out) assert output["documents"][0]["source"] == "unknown" assert output["documents"][0]["chunks"] == 2 def test_list_malformed_metadata_json(self, mock_config, temp_index_dir, capsys): """Test that malformed JSON is handled gracefully.""" metadata_path = temp_index_dir / "metadata.json" # Write invalid JSON metadata_path.write_text("{invalid json content") args = MockArgs(json_output=False) result = cmd_list(args) # Should return error code assert result == 1 captured = capsys.readouterr() assert "Failed to read metadata" in captured.err

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nonatofabio/local_faiss_mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server