Code-Index-MCP

test_document_error_recovery.py•14.9 KiB

#!/usr/bin/env python3 """Test error recovery in document processing.""" import tempfile import threading import time from pathlib import Path from unittest.mock import patch import pytest from mcp_server.plugins.markdown_plugin import MarkdownPlugin from mcp_server.plugins.plaintext_plugin import PlainTextPlugin from mcp_server.plugins.python_plugin.plugin import Plugin as PythonPlugin from mcp_server.storage.sqlite_store import SQLiteStore class TestDocumentErrorRecovery: """Test error recovery mechanisms in document processing.""" @pytest.fixture def setup_plugins(self): """Setup plugins for testing.""" with tempfile.TemporaryDirectory() as tmpdir: store = SQLiteStore(str(Path(tmpdir) / "test.db")) markdown_plugin = MarkdownPlugin(enable_semantic=False) plaintext_config = { "name": "plaintext", "code": "plaintext", "extensions": [".txt", ".text"], "file_pattern": r".*\.(txt|text)$", } plaintext_plugin = PlainTextPlugin(plaintext_config, enable_semantic=False) python_plugin = PythonPlugin() yield { "markdown": markdown_plugin, "plaintext": plaintext_plugin, "python": python_plugin, "store": store, "tmpdir": tmpdir, } def test_parser_error_recovery(self, setup_plugins): """Test recovery from parser errors.""" plugins = setup_plugins # Markdown that might cause parser issues problematic_md = """# Title ```python def broken(): # Unclosed string text = "this string never ends... # Next section should still parse ## Valid Section This content should be extracted despite the error above.""" # Should not raise exception result = plugins["markdown"].indexFile("broken.md", problematic_md) assert result is not None # Should still extract valid sections assert any("Valid Section" in sym.name for sym in result.symbols) # Chunks should be created chunks = plugins["markdown"].chunk_document(problematic_md, Path("broken.md")) assert len(chunks) > 0 assert any("Valid Section" in chunk.content for chunk in chunks) def test_memory_error_recovery(self, setup_plugins): """Test recovery from memory-related errors.""" plugin = setup_plugins["markdown"] # Generate large content that might cause memory issues large_content = "# Title\n\n" # Add 1000 sections with substantial content for i in range(1000): large_content += f"## Section {i}\n\n" large_content += "Large paragraph. " * 100 + "\n\n" # Mock memory error during chunk creation original_chunk = plugin.chunk_document call_count = 0 def chunk_with_memory_error(*args, **kwargs): nonlocal call_count call_count += 1 if call_count == 1: raise MemoryError("Simulated memory error") return original_chunk(*args, **kwargs) with patch.object(plugin, "chunk_document", side_effect=chunk_with_memory_error): # First attempt should fail try: chunks = plugin.chunk_document(large_content, Path("large.md")) except MemoryError: pass # Second attempt should work (simulating recovery) chunks = plugin.chunk_document(large_content, Path("large.md")) assert chunks is not None def test_encoding_error_recovery(self, setup_plugins): """Test recovery from encoding errors.""" plugins = setup_plugins # Content with potential encoding issues mixed_bytes = b"# Title\n\nValid text\xff\xfe\xfdInvalid bytes\n\nMore valid text" # Convert with error handling content = mixed_bytes.decode("utf-8", errors="replace") # Should process despite encoding issues result = plugins["markdown"].indexFile("encoding.md", content) assert result is not None chunks = plugins["markdown"].chunk_document(content, Path("encoding.md")) assert len(chunks) > 0 assert any("Title" in chunk.content for chunk in chunks) assert any("valid text" in chunk.content for chunk in chunks) def test_concurrent_access_recovery(self, setup_plugins): """Test recovery from concurrent access issues.""" plugins = setup_plugins store = plugins["store"] content = "# Concurrent Test\n\nTesting concurrent access" results = [] errors = [] def index_file(plugin, filename, thread_id): try: result = plugin.indexFile(filename, f"{content}\n\nThread {thread_id}") results.append(result) except Exception as e: errors.append(e) # Simulate concurrent indexing threads = [] for i in range(5): thread = threading.Thread( target=index_file, args=(plugins["markdown"], "concurrent.md", i) ) threads.append(thread) thread.start() for thread in threads: thread.join() # All operations should complete (some might fail but should recover) assert len(results) + len(errors) == 5 assert len(results) > 0 # At least some should succeed def test_partial_chunk_failure_recovery(self, setup_plugins): """Test recovery when some chunks fail to process.""" plugin = setup_plugins["markdown"] content = """# Document ## Section 1 Normal content here. ## Section 2 This section is fine too. ## Section 3 Also good content.""" # Mock chunk creation to fail on specific chunks original_chunk_method = plugin.chunk_strategy.create_chunks def failing_chunk_creation(*args, **kwargs): chunks = original_chunk_method(*args, **kwargs) # Simulate failure on second chunk if len(chunks) > 1: # Create a problematic chunk chunks[1] = None # This would normally cause issues return [c for c in chunks if c is not None] # Filter out failed chunks with patch.object( plugin.chunk_strategy, "create_chunks", side_effect=failing_chunk_creation ): chunks = plugin.chunk_document(content, Path("partial.md")) # Should still return valid chunks assert chunks is not None assert len(chunks) > 0 assert all(chunk is not None for chunk in chunks) def test_metadata_extraction_failure_recovery(self, setup_plugins): """Test recovery from metadata extraction failures.""" plugin = setup_plugins["markdown"] # Content with problematic frontmatter content = """--- title: Valid Title date: not-a-valid-date tags: [unclosed, list author: name: Test email: invalid@email --- # Content This content should still be processed.""" # Should handle metadata errors gracefully metadata = plugin.extract_metadata(content, Path("meta_error.md")) assert metadata is not None assert isinstance(metadata, dict) # Document should still be indexed result = plugin.indexFile("meta_error.md", content) assert result is not None assert len(result.symbols) > 0 def test_file_system_error_recovery(self, setup_plugins): """Test recovery from file system errors.""" plugins = setup_plugins tmpdir = Path(plugins["tmpdir"]) # Create a file test_file = tmpdir / "test.md" test_file.write_text("# Test\n\nContent") # Make file read-only test_file.chmod(0o444) try: # Try to process (might fail on some operations) content = test_file.read_text() result = plugins["markdown"].indexFile(str(test_file), content) assert result is not None finally: # Restore permissions test_file.chmod(0o644) def test_infinite_recursion_prevention(self, setup_plugins): """Test prevention of infinite recursion in document structure.""" plugin = setup_plugins["markdown"] # Create content that might cause recursive parsing content = """# Root [Link to self](#root) ## Section A See [Section B](#section-b) ## Section B See [Section A](#section-a) ### Subsection [Back to Root](#root)""" # Should handle without stack overflow result = plugin.indexFile("recursive.md", content) assert result is not None structure = plugin.extract_structure(content, Path("recursive.md")) assert structure is not None assert len(structure.sections) > 0 def test_timeout_recovery(self, setup_plugins): """Test recovery from operations that might timeout.""" plugin = setup_plugins["plaintext"] # Generate content that takes long to process slow_content = "" for i in range(10000): slow_content += f"Line {i}: " + "word " * 50 + "\n" # Mock a slow operation original_chunk = plugin.chunk_document def slow_chunk_document(*args, **kwargs): # Simulate slow processing import time time.sleep(0.001) # Small delay to simulate work return original_chunk(*args, **kwargs) with patch.object(plugin, "chunk_document", side_effect=slow_chunk_document): start_time = time.time() chunks = plugin.chunk_document(slow_content, Path("slow.txt")) elapsed = time.time() - start_time # Should complete even with delays assert chunks is not None assert len(chunks) > 0 def test_malformed_ast_recovery(self, setup_plugins): """Test recovery from malformed AST structures.""" plugin = setup_plugins["python"] # Python code that might produce unusual AST malformed_code = '''# Malformed Python def function( param1, param2 # Missing closing paren on purpose class IncompleteClass: def method(self pass try: something() except pass # Valid code after errors def valid_function(): """This should still be found.""" return 42 ''' # Should handle AST errors gracefully result = plugin.indexFile("malformed.py", malformed_code) assert result is not None # Should still find valid symbols assert any("valid_function" in sym.name for sym in result.symbols) def test_resource_cleanup_on_error(self, setup_plugins): """Test proper resource cleanup when errors occur.""" plugin = setup_plugins["markdown"] # Mock an error during processing original_index = plugin.indexFile def failing_index(*args, **kwargs): # Allocate some resources (simulated) temp_data = ["resource"] * 1000 # Fail partway through raise RuntimeError("Simulated processing error") with patch.object(plugin, "indexFile", side_effect=failing_index): try: plugin.indexFile("test.md", "# Test") except RuntimeError: pass # Should be able to process normally after error result = original_index("test2.md", "# Test 2") assert result is not None def test_partial_semantic_index_failure(self, setup_plugins): """Test recovery when semantic indexing partially fails.""" # Create plugin with semantic indexing with tempfile.TemporaryDirectory() as tmpdir: store = SQLiteStore(str(Path(tmpdir) / "test.db")) plugin = MarkdownPlugin(enable_semantic=True) content = """# Document ## Section 1 Content for embedding. ## Section 2 More content. ## Section 3 Final content.""" # Mock embedding generation to fail on some chunks if hasattr(plugin, "semantic_indexer"): original_embed = plugin.semantic_indexer.generate_embeddings def failing_embeddings(texts): # Fail on second text results = [] for i, text in enumerate(texts): if i == 1: results.append(None) # Simulated failure else: results.append([0.1] * 384) # Mock embedding return results with patch.object( plugin.semantic_indexer, "generate_embeddings", side_effect=failing_embeddings ): # Should still index despite embedding failures result = plugin.indexFile("semantic_fail.md", content) assert result is not None assert len(result.symbols) > 0 def test_search_error_recovery(self, setup_plugins): """Test recovery from search errors.""" plugins = setup_plugins store = plugins["store"] # Index some content plugins["markdown"].indexFile("test1.md", "# Test 1\n\nContent one") plugins["markdown"].indexFile("test2.md", "# Test 2\n\nContent two") # Mock a search error original_search = store.search_symbols_fts call_count = 0 def failing_search(query, **kwargs): nonlocal call_count call_count += 1 if call_count == 1: raise RuntimeError("Search database error") return original_search(query, **kwargs) with patch.object(store, "search_symbols_fts", side_effect=failing_search): # First search should fail try: results = store.search_symbols_fts("test") except RuntimeError: pass # Second search should work results = store.search_symbols_fts("test") assert len(results) > 0 def test_graceful_degradation(self, setup_plugins): """Test graceful degradation when features fail.""" plugin = setup_plugins["markdown"] content = """# Document ## Section with Code ```python def example(): return 42 ``` ## Section with Table | Col1 | Col2 | |------|------| | A | B | ## Regular Section Just text here.""" # Mock various feature extractions to fail with patch.object(plugin.parser, "parse", side_effect=Exception("Parser failed")): # Should fall back to basic processing # Instead of crashing, should handle error try: result = plugin.indexFile("degraded.md", content) # Might return empty or basic result except Exception: # Should not raise unhandled exception pass # Normal processing should work after error result = plugin.indexFile("normal.md", content) assert result is not None if __name__ == "__main__": pytest.main([__file__, "-v"])

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ViperJuice/Code-Index-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_document_error_recovery.py•14.9 KiB