Qdrant RAG MCP Server

proposed_fix_specialized_embeddings.py•7.02 KiB

#!/usr/bin/env python3 """Proposed fix for the dimension mismatch issue in specialized embeddings""" def show_proposed_fixes(): """Show the proposed fixes for the dimension mismatch issue""" print("=" * 80) print("PROPOSED FIXES FOR DIMENSION MISMATCH ISSUE") print("=" * 80) print("\nPROBLEM SUMMARY:") print("- During batch reindexing, Python files sometimes get 384D embeddings instead of 768D") print("- This happens when CodeRankEmbed is evicted and the system falls back to general model (384D)") print("- The fallback chain is: CodeRankEmbed (768D) -> General (384D)") print("\nROOT CAUSES:") print("1. Model eviction due to 2-model memory limit") print("2. Fallback to general model (384D) when code model fails") print("3. No dimension validation before fallback") print("4. Possible race conditions in multi-threaded access") print("\nPROPOSED FIXES:") print("\n1. FIX THE FALLBACK CHAIN (Immediate fix)") print(" In specialized_embeddings.py, modify the encode() method:") print(""" # Around line 736-739, replace: if content_type != 'general': logger.warning(f"Falling back to general model for encoding") return self.encode(texts, 'general', batch_size, show_progress_bar, normalize_embeddings) # With: if content_type != 'general': # Check if we have a dimension-compatible fallback current_dim = self.model_configs[content_type]['dimension'] # For code content (768D), try fallback model first if content_type == 'code' and self.model_configs[content_type].get('fallback'): logger.warning(f"Trying fallback model for {content_type}") try: # Temporarily update model config to use fallback original_model = self.model_configs[content_type]['name'] self.model_configs[content_type]['name'] = self.model_configs[content_type]['fallback'] result = self.encode(texts, content_type, batch_size, show_progress_bar, normalize_embeddings) # Restore original model self.model_configs[content_type]['name'] = original_model return result except Exception as fallback_e: logger.error(f"Fallback model also failed: {fallback_e}") # Restore original model self.model_configs[content_type]['name'] = original_model # Only fall back to general if dimensions match general_dim = self.model_configs['general']['dimension'] if general_dim == current_dim: logger.warning(f"Falling back to general model for encoding (same dimension)") return self.encode(texts, 'general', batch_size, show_progress_bar, normalize_embeddings) else: logger.error(f"Cannot fall back to general model: dimension mismatch ({current_dim}D vs {general_dim}D)") raise ValueError(f"Failed to encode {content_type} content and no compatible fallback available") """) print("\n2. IMPROVE MEMORY MANAGEMENT (Better fix)") print(" Increase model limit for Apple Silicon during batch operations:") print(""" # Add a context manager for batch operations: @contextmanager def batch_operation_mode(self): \"\"\"Temporarily increase model limit for batch operations\"\"\" original_max = self.max_models_in_memory try: # Increase limit during batch operations self.max_models_in_memory = min(4, original_max * 2) logger.info(f"Batch mode: increased model limit to {self.max_models_in_memory}") yield finally: self.max_models_in_memory = original_max # Evict excess models if needed while len(self.loaded_models) > original_max: self._evict_lru_model() """) print("\n3. ADD DIMENSION VALIDATION (Defensive fix)") print(" In load_model() method, validate dimensions:") print(""" # After loading a model, validate its dimension: def load_model(self, content_type: str) -> Tuple[SentenceTransformer, Dict[str, Any]]: # ... existing code ... # After successfully loading model: # Validate dimension by doing a test encoding test_embedding = model.encode(["test"], show_progress_bar=False) actual_dimension = test_embedding.shape[-1] expected_dimension = model_config['dimension'] if actual_dimension != expected_dimension: logger.error(f"Model {model_name} dimension mismatch: " f"expected {expected_dimension}, got {actual_dimension}") # Remove the incorrectly loaded model self.loaded_models.pop(model_name, None) raise ValueError(f"Model dimension mismatch for {model_name}") return model, model_config """) print("\n4. ADD THREAD SAFETY (Robustness fix)") print(" Add proper locking to prevent race conditions:") print(""" # In __init__: self._lock = threading.RLock() # Wrap critical sections: def load_model(self, content_type: str) -> Tuple[SentenceTransformer, Dict[str, Any]]: with self._lock: # Check if already loaded if model_name in self.loaded_models: self.loaded_models.move_to_end(model_name) return self.loaded_models[model_name], model_config # ... rest of method def _evict_lru_model(self, protect_model=None): with self._lock: # ... existing eviction code """) print("\n5. SPECIFIC CODERANKEDEMBED FIX (Targeted fix)") print(" Protect CodeRankEmbed from eviction during code file processing:") print(""" # In encode() method for code content: if content_type == 'code': # Ensure CodeRankEmbed stays loaded during entire operation with self._lock: self.active_models.add(model_name) # Temporarily increase protection original_max = self.max_models_in_memory if len(self.loaded_models) >= original_max: self.max_models_in_memory = len(self.loaded_models) + 1 try: # ... do encoding ... finally: with self._lock: self.max_models_in_memory = original_max self.active_models.discard(model_name) """) print("\nRECOMMENDED APPROACH:") print("1. Implement Fix #1 (fallback chain) immediately - prevents 384D fallback") print("2. Add Fix #3 (dimension validation) for safety") print("3. Consider Fix #2 (batch mode) for better performance during reindexing") print("4. Add Fix #4 (thread safety) if concurrent access is expected") print("\nTESTING THE FIX:") print("After implementing, test with:") print("- Run the reindex operation that was failing") print("- Run test_concurrent_dimension_issue.py") print("- Monitor for any 'dimension mismatch' errors") if __name__ == "__main__": show_proposed_fixes()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ancoleman/qdrant-rag-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

proposed_fix_specialized_embeddings.py•7.02 KiB