en es ja ko zh

MCP Server for Qdrant

#!/bin/sh
# Patch script for mcp-server-qdrant to run on Alpine Linux
# Run this after cloning the repository but before running with uv

# Make sure we're in the right repo directory
if [ ! -d "/tmp/mcp-server-qdrant" ]; then
  echo "Error: /tmp/mcp-server-qdrant directory not found"
  exit 1
fi

# Create the Alpine embedding provider file
cat > /tmp/mcp-server-qdrant/src/mcp_server_qdrant/embeddings/alpine_compat.py << 'EOF'
"""
Alpine Linux compatible embedding provider.
This module provides an embedding implementation that works well on Alpine Linux
with minimal dependencies and no PyTorch requirement.
"""
import asyncio
import logging
from typing import List, Dict, Any, Optional

from mcp_server_qdrant.embeddings.base import EmbeddingProvider

# Configure logging
logger = logging.getLogger(__name__)

class AlpineEmbedProvider(EmbeddingProvider):
    """
    Alpine-optimized embedding provider that works without PyTorch.
    This provider will attempt to use fastembed first, falling back to other
    lightweight options if necessary.
    
    :param model_name: The name of the embedding model to use.
    """

    def __init__(self, model_name: str):
        self.model_name = model_name
        self.embedding_model = None
        self.vector_size = 384  # Default size for small models
        
        # Try to initialize fastembed
        try:
            from fastembed import TextEmbedding
            logger.info(f"Initializing FastEmbed with model {model_name}")
            self.embedding_model = TextEmbedding(model_name)
            self.embedding_type = "fastembed"
            logger.info("Successfully initialized FastEmbed")
        except Exception as e:
            logger.warning(f"Failed to initialize FastEmbed: {e}")
            logger.warning("Will attempt to use alternative lightweight embedding")
            self._init_fallback_embedding()
    
    def _init_fallback_embedding(self):
        """Initialize a fallback embedding model if fastembed fails."""
        try:
            # Try to import onnxruntime directly
            import onnxruntime as ort
            import numpy as np
            from urllib.request import urlretrieve
            import os
            
            # Use a small, pre-quantized ONNX model
            model_url = "https://huggingface.co/optimum/all-MiniLM-L6-v2-onnx/resolve/main/model_quantized.onnx"
            model_path = "/tmp/model_quantized.onnx"
            
            if not os.path.exists(model_path):
                logger.info(f"Downloading ONNX model to {model_path}")
                urlretrieve(model_url, model_path)
            
            logger.info("Initializing ONNX runtime session")
            self.ort_session = ort.InferenceSession(model_path)
            self.embedding_type = "onnx"
            logger.info("Successfully initialized ONNX fallback model")
            
            # Set more specific configuration
            self.tokenizer = None
            try:
                # Try to load tokenizer if available
                from tokenizers import Tokenizer
                tokenizer_url = "https://huggingface.co/optimum/all-MiniLM-L6-v2-onnx/resolve/main/tokenizer.json"
                tokenizer_path = "/tmp/tokenizer.json"
                
                if not os.path.exists(tokenizer_path):
                    urlretrieve(tokenizer_url, tokenizer_path)
                
                self.tokenizer = Tokenizer.from_file(tokenizer_path)
                logger.info("Successfully loaded tokenizer")
            except Exception as te:
                logger.warning(f"Could not load tokenizer: {te}")
                logger.warning("Will use a basic tokenization method")
        
        except Exception as e:
            logger.error(f"Failed to initialize any embedding model: {e}")
            raise RuntimeError(f"Could not initialize any embedding model: {e}")

    async def embed_documents(self, documents: List[str]) -> List[List[float]]:
        """Embed a list of documents into vectors."""
        if not documents:
            return []
            
        if self.embedding_type == "fastembed":
            # Run in a thread pool since FastEmbed is synchronous
            loop = asyncio.get_event_loop()
            embeddings = await loop.run_in_executor(
                None, lambda: list(self.embedding_model.embed(documents))
            )
            return [embedding.tolist() for embedding in embeddings]
        
        elif self.embedding_type == "onnx":
            # Use ONNX runtime for inference
            embeddings = []
            for doc in documents:
                embedding = await self._embed_with_onnx(doc)
                embeddings.append(embedding)
            return embeddings
        
        else:
            raise ValueError(f"Unknown embedding type: {self.embedding_type}")

    async def embed_query(self, query: str) -> List[float]:
        """Embed a query into a vector."""
        if not query or not query.strip():
            # Return a zero vector of appropriate dimension to avoid errors
            return [0.0] * self.vector_size
            
        if self.embedding_type == "fastembed":
            # Run in a thread pool since FastEmbed is synchronous
            loop = asyncio.get_event_loop()
            embeddings = await loop.run_in_executor(
                None, lambda: list(self.embedding_model.embed([query]))
            )
            return embeddings[0].tolist()
        
        elif self.embedding_type == "onnx":
            # Use ONNX runtime for inference
            return await self._embed_with_onnx(query)
        
        else:
            raise ValueError(f"Unknown embedding type: {self.embedding_type}")
    
    async def _embed_with_onnx(self, text: str) -> List[float]:
        """Use ONNX runtime to get embeddings."""
        import numpy as np
        
        # Implement basic tokenization if no tokenizer available
        if self.tokenizer:
            # Use proper tokenizer
            encoding = self.tokenizer.encode(text)
            input_ids = encoding.ids
            attention_mask = encoding.attention_mask
            token_type_ids = [0] * len(input_ids)
            
            # Truncate if too long
            max_length = 512
            if len(input_ids) > max_length:
                input_ids = input_ids[:max_length]
                attention_mask = attention_mask[:max_length]
                token_type_ids = token_type_ids[:max_length]
                
            # Convert to numpy arrays
            input_ids = np.array([input_ids], dtype=np.int64)
            attention_mask = np.array([attention_mask], dtype=np.int64)
            token_type_ids = np.array([token_type_ids], dtype=np.int64)
        else:
            # Very basic tokenization as fallback
            # This is a simplified version and won't work as well as a proper tokenizer
            words = text.lower().split()[:512]  # Simple splitting and truncation
            
            # Create dummy input tensors (this is a simplified approach)
            input_ids = np.array([[i + 1 for i in range(len(words))]], dtype=np.int64)
            attention_mask = np.array([[1] * len(words)], dtype=np.int64)
            token_type_ids = np.array([[0] * len(words)], dtype=np.int64)
        
        # Run inference
        loop = asyncio.get_event_loop()
        outputs = await loop.run_in_executor(
            None,
            lambda: self.ort_session.run(
                None,
                {
                    "input_ids": input_ids,
                    "attention_mask": attention_mask,
                    "token_type_ids": token_type_ids
                }
            )
        )
        
        # Process the output
        embeddings = outputs[0]
        # Mean pooling
        mask = attention_mask.reshape(-1, attention_mask.shape[-1])
        mask_expanded = np.expand_dims(mask, axis=-1)
        sum_embeddings = np.sum(embeddings * mask_expanded, axis=1)
        sum_mask = np.sum(mask, axis=1, keepdims=True)
        sum_mask = np.clip(sum_mask, a_min=1e-9, a_max=None)
        mean_embeddings = sum_embeddings / sum_mask
        
        # Normalize the vector
        norm = np.linalg.norm(mean_embeddings, axis=1, keepdims=True)
        normalized_embeddings = mean_embeddings / norm
        
        return normalized_embeddings[0].tolist()

    def get_vector_name(self) -> str:
        """
        Return the name of the vector for the Qdrant collection.
        """
        model_name = self.model_name.split("/")[-1].lower()
        return f"alpine-{model_name}"
EOF

# Update the embedding types to include Alpine provider
sed -i '/class EmbeddingProviderType(Enum):/,/SENTENCE_TRANSFORMERS/ s/SENTENCE_TRANSFORMERS = "sentence-transformers"/SENTENCE_TRANSFORMERS = "sentence-transformers"\n    ALPINE = "alpine"/' /tmp/mcp-server-qdrant/src/mcp_server_qdrant/embeddings/types.py

# Update the factory to support our new provider
sed -i '/def create_embedding_provider/,/return/ {
    /if settings.provider_type == EmbeddingProviderType.FASTEMBED:/i\
    if settings.provider_type == EmbeddingProviderType.ALPINE:\
        try:\
            from mcp_server_qdrant.embeddings.alpine_compat import AlpineEmbedProvider\
            return AlpineEmbedProvider(settings.model_name)\
        except ImportError as e:\
            logger.error(f"Failed to import Alpine provider: {e}")\
            logger.info("Falling back to FastEmbed provider")\
            # Fall back to FastEmbed if Alpine provider fails\
            settings.provider_type = EmbeddingProviderType.FASTEMBED\
    
}' /tmp/mcp-server-qdrant/src/mcp_server_qdrant/embeddings/factory.py

# Add logger import if not already there
sed -i '1,/import/ s/from mcp_server_qdrant.settings import EmbeddingProviderSettings/from mcp_server_qdrant.settings import EmbeddingProviderSettings\nimport logging\n\n# Set up logger\nlogger = logging.getLogger(__name__)/' /tmp/mcp-server-qdrant/src/mcp_server_qdrant/embeddings/factory.py

# Create a .env file with Alpine provider settings
cat > /tmp/mcp-server-qdrant/.env << EOF
EMBEDDING_PROVIDER=alpine
EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
COLLECTION_NAME=memories
LOG_LEVEL=INFO
EOF

echo "Patch applied successfully! The code should now work on Alpine Linux."
echo "Install the required dependencies with:"
echo "pip install fastembed onnxruntime>=1.14.0 numpy tokenizers" 

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jimmy974/mcp-server-qdrant'

If you have feedback or need assistance with the MCP directory API, please join our Discord server