MCP Server for Qdrant

#!/bin/sh # Minimal installation script for mcp-server-qdrant on Alpine Linux set -e # Exit on any error # Install core system packages apk add --update python3 py3-pip git # Install uv if needed pip install uv # Clone the repo if not already present if [ ! -d "/tmp/mcp-server-qdrant" ]; then echo "Cloning repository..." git clone https://github.com/Jimmy974/mcp-server-qdrant.git /tmp/mcp-server-qdrant cd /tmp/mcp-server-qdrant git checkout ef795ae51801ac7bc875f0e1f9c3c3422c61d70b else cd /tmp/mcp-server-qdrant fi # Install only the minimal dependencies with uv echo "Installing minimal dependencies..." uv pip install --system numpy uv pip install --system --no-deps mcp[cli]>=1.3.0 qdrant-client>=1.12.0 pydantic>=2.10.6 pydantic-settings>=2.0.0 python-dotenv>=1.0.0 # Copy our custom minimal embedding provider echo "Installing minimal embedding provider..." mkdir -p /tmp/mcp-server-qdrant/src/mcp_server_qdrant/embeddings/ cat > /tmp/mcp-server-qdrant/src/mcp_server_qdrant/embeddings/minimal_embed.py << 'EOF' """ Minimal embedding provider with no external ML dependencies. Uses simple text processing techniques to create embeddings. """ import asyncio import logging import hashlib import numpy as np import re import math from typing import List, Dict, Any, Tuple from collections import Counter from mcp_server_qdrant.embeddings.base import EmbeddingProvider # Configure logging logger = logging.getLogger(__name__) class MinimalEmbedProvider(EmbeddingProvider): """ Minimal embedding provider that works with no ML dependencies. Uses a combination of character n-grams and word frequencies for embedding. :param model_name: Ignored, but required by the interface (can be any string). """ def __init__(self, model_name: str): self.model_name = model_name self.vector_size = 512 # Fixed size for our embeddings self.vocab_size = 10000 # Maximum vocabulary size to consider self.ngram_ranges = [(1, 1), (2, 2), (3, 3)] # Unigrams, bigrams, trigrams # Precomputed prime numbers for hashing self.primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47] logger.info(f"Initialized MinimalEmbedProvider with vector size {self.vector_size}") def _preprocess_text(self, text: str) -> str: """Basic text preprocessing.""" # Convert to lowercase text = text.lower() # Replace multiple spaces with single space text = re.sub(r'\s+', ' ', text) # Remove special characters and digits text = re.sub(r'[^\w\s]', '', text) # Strip whitespace return text.strip() def _extract_features(self, text: str) -> Tuple[List[str], Counter]: """Extract features from text (ngrams and word frequencies).""" text = self._preprocess_text(text) tokens = text.split() # Word frequency features word_freq = Counter(tokens) # Character n-gram features ngrams = [] for n_min, n_max in self.ngram_ranges: for n in range(n_min, n_max + 1): for i in range(len(text) - n + 1): ngrams.append(text[i:i+n]) return ngrams, word_freq def _compute_hash_embedding(self, features: List[str], word_freq: Counter) -> np.ndarray: """Compute embedding using a locality-sensitive hashing approach.""" embedding = np.zeros(self.vector_size, dtype=np.float32) # Process up to vocab_size features to limit computation for i, feature in enumerate(features[:self.vocab_size]): # Use multiple hash functions for better distribution for j, prime in enumerate(self.primes): # Simple hash function using different primes h = int(hashlib.md5(feature.encode()).hexdigest(), 16) % prime idx = (h + j * prime) % self.vector_size embedding[idx] += 1.0 # Add weighted word frequency components for word, count in word_freq.most_common(1000): # Get a hash for the word h = int(hashlib.md5(word.encode()).hexdigest(), 16) # Use a different part of the vector for word frequencies idx = h % (self.vector_size // 4) # Add weighted by log frequency embedding[idx] += math.log(count + 1) # Normalize the embedding norm = np.linalg.norm(embedding) if norm > 0: embedding = embedding / norm return embedding async def _embed_text(self, text: str) -> List[float]: """Embed a single text string.""" if not text or not text.strip(): return [0.0] * self.vector_size # Extract features and compute embedding features, word_freq = self._extract_features(text) embedding = self._compute_hash_embedding(features, word_freq) return embedding.tolist() async def embed_documents(self, documents: List[str]) -> List[List[float]]: """Embed a list of documents into vectors.""" if not documents: return [] embeddings = [] for doc in documents: embedding = await self._embed_text(doc) embeddings.append(embedding) return embeddings async def embed_query(self, query: str) -> List[float]: """Embed a query into a vector.""" return await self._embed_text(query) def get_vector_name(self) -> str: """ Return the name of the vector for the Qdrant collection. """ return "minimal-embed" EOF # Update the embedding types to include MINIMAL provider cat > /tmp/mcp-server-qdrant/src/mcp_server_qdrant/embeddings/types.py << 'EOF' from enum import Enum class EmbeddingProviderType(Enum): MINIMAL = "minimal" EOF # Update the factory to use our minimal provider cat > /tmp/mcp-server-qdrant/src/mcp_server_qdrant/embeddings/factory.py << 'EOF' from mcp_server_qdrant.embeddings.base import EmbeddingProvider from mcp_server_qdrant.embeddings.types import EmbeddingProviderType from mcp_server_qdrant.settings import EmbeddingProviderSettings import logging # Set up logger logger = logging.getLogger(__name__) def create_embedding_provider(settings: EmbeddingProviderSettings) -> EmbeddingProvider: """ Create a minimal embedding provider regardless of settings. :param settings: The settings for the embedding provider. :return: An instance of the minimal embedding provider. """ # Always use minimal provider try: # Import here to avoid circular imports from mcp_server_qdrant.embeddings.minimal_embed import MinimalEmbedProvider logger.info(f"Creating minimal embedding provider") return MinimalEmbedProvider('minimal') except ImportError as e: logger.error(f"Failed to import minimal provider: {e}") raise ValueError( "Minimal embedding provider is not available. " "This is a critical error as the minimal provider has no external dependencies." ) EOF # Update settings to use minimal provider by default cat > /tmp/mcp-server-qdrant/src/mcp_server_qdrant/settings.py << 'EOF' from typing import Optional from pydantic import Field from pydantic_settings import BaseSettings from mcp_server_qdrant.embeddings.types import EmbeddingProviderType DEFAULT_TOOL_STORE_DESCRIPTION = ( "Keep the memory for later use, when you are asked to remember something." ) DEFAULT_TOOL_FIND_DESCRIPTION = ( "Look up memories in Qdrant. Use this tool when you need to: \n" " - Find memories by their content \n" " - Access memories for further analysis \n" " - Get some personal information about the user" ) class ToolSettings(BaseSettings): """ Configuration for all the tools. """ tool_store_description: str = Field( default=DEFAULT_TOOL_STORE_DESCRIPTION, validation_alias="TOOL_STORE_DESCRIPTION", ) tool_find_description: str = Field( default=DEFAULT_TOOL_FIND_DESCRIPTION, validation_alias="TOOL_FIND_DESCRIPTION", ) class EmbeddingProviderSettings(BaseSettings): """ Configuration for the embedding provider. """ provider_type: EmbeddingProviderType = Field( default=EmbeddingProviderType.MINIMAL, validation_alias="EMBEDDING_PROVIDER", ) model_name: str = Field( default="minimal", validation_alias="EMBEDDING_MODEL", ) class QdrantSettings(BaseSettings): """ Configuration for the Qdrant connector. """ location: Optional[str] = Field(default=None, validation_alias="QDRANT_URL") api_key: Optional[str] = Field(default=None, validation_alias="QDRANT_API_KEY") collection_name: str = Field(default="memories", validation_alias="COLLECTION_NAME") local_path: Optional[str] = Field( default=None, validation_alias="QDRANT_LOCAL_PATH" ) def get_qdrant_location(self) -> str: """ Get the Qdrant location, either the URL or the local path. """ return self.location or self.local_path EOF # Run the server with UV echo "========================================================" echo "Installation complete! Run with:" echo "cd /tmp/mcp-server-qdrant && uv run -p /tmp/mcp-server-qdrant python -m mcp_server_qdrant.main" echo "========================================================"