# Qdrant RAG MCP Server Configuration Example
# Copy this file to .env and update with your values
# ====================
# Core Configuration
# ====================
# Qdrant Database Configuration
QDRANT_HOST=localhost
QDRANT_PORT=6333
QDRANT_API_KEY=
QDRANT_GRPC_PORT=6334
# For cloud deployment:
# QDRANT_HOST=your-cluster-url.qdrant.io
# QDRANT_URL=https://your-cluster-url.qdrant.io:6333
# QDRANT_API_KEY=your-api-key
# Embedding Model Configuration
# Default model (small, fast, works on all platforms)
EMBEDDING_MODEL=all-MiniLM-L12-v2
# For macOS with Apple Silicon (M1/M2/M3/M4):
# EMBEDDING_MODEL=all-MiniLM-L12-v2 # Better accuracy, still fast
TOKENIZERS_PARALLELISM=false # Avoid tokenizer warnings
MPS_DEVICE_ENABLE=1 # Enable Metal Performance Shaders
# Apple Silicon MPS Optimizations (automatically set on Apple Silicon)
# PYTORCH_ENABLE_MPS_FALLBACK=1
# PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
# PYTORCH_MPS_LOW_WATERMARK_RATIO=0.0
# For better accuracy (larger models):
# EMBEDDING_MODEL=all-mpnet-base-v2 # Best general-purpose
# EMBEDDING_MODEL=multi-qa-MiniLM-L6-cos-v1 # Optimized for Q&A
# For code-specific embeddings:
# EMBEDDING_MODEL=microsoft/codebert-base # Trained on code
# EMBEDDING_MODEL=Salesforce/codet5-base # Code generation model
# Model Cache Directory (important for macOS)
SENTENCE_TRANSFORMERS_HOME=/app/data/models # Docker
# SENTENCE_TRANSFORMERS_HOME=~/Library/Caches/qdrant-mcp/models # macOS (recommended)
# SENTENCE_TRANSFORMERS_HOME=~/mcp-servers/qdrant-rag/data/models # Linux/Windows
# Server Configuration
SERVER_PORT=8080
HTTP_PORT=8081
LOG_LEVEL=INFO
# ====================
# Performance Tuning
# ====================
# Chunking Configuration
CHUNK_SIZE=1500
CHUNK_OVERLAP=200
# For code files
CODE_CHUNK_SIZE=1000
CODE_CHUNK_OVERLAP=100
# For config files
CONFIG_CHUNK_SIZE=800
CONFIG_CHUNK_OVERLAP=50
# Search Configuration
MAX_SEARCH_RESULTS=5
SIMILARITY_THRESHOLD=0.7
# Hybrid search weight (0.0 = pure vector, 1.0 = pure keyword)
HYBRID_ALPHA=0.7
# ====================
# Auto-Indexing (Optional)
# ====================
# Enable automatic indexing when files change
QDRANT_RAG_AUTO_INDEX=false
# Debounce time in seconds (wait before indexing after changes)
QDRANT_RAG_DEBOUNCE=5.0
# ====================
# GitHub Integration (v0.3.0)
# ====================
# GitHub Authentication (Choose ONE method below)
# Option 1: Personal Access Token (recommended for individual use)
# 1. Go to https://github.com/settings/tokens
# 2. Click "Generate new token" > "Generate new token (classic)"
# 3. Select scopes: repo, workflow, issues, pull_requests
# 4. Copy the generated token (starts with ghp_)
GITHUB_TOKEN=ghp_your_personal_access_token_here
# Option 2: GitHub App Authentication (recommended for organizations)
# 1. Go to https://github.com/settings/apps
# 2. Click "New GitHub App"
# 3. Configure permissions: Contents (R&W), Issues (R&W), Pull requests (R&W), Metadata (R)
# 4. Generate and download private key
# 5. Install app on repositories and note installation ID
# GITHUB_APP_ID=123456
# GITHUB_PRIVATE_KEY_PATH=/path/to/your/github-app-private-key.pem
# GITHUB_INSTALLATION_ID=12345678
# Optional: Default repository context (saves calling github_switch_repository)
# GITHUB_REPO_OWNER=your-username-or-org
# GITHUB_REPO_NAME=your-repository-name
# GitHub API Configuration (Advanced)
# GITHUB_API_URL=https://api.github.com # For GitHub Enterprise: https://github.company.com/api/v3
# GITHUB_API_TIMEOUT=30 # Request timeout in seconds
# GITHUB_RETRY_ATTEMPTS=3 # Number of retry attempts for failed requests
# GITHUB_RETRY_DELAY=1.0 # Initial delay between retries (exponential backoff)
# GITHUB_RATE_LIMIT_BUFFER=100 # Stop when this many requests remain
# GitHub Workflow Configuration
# GITHUB_DRY_RUN_DEFAULT=true # Safety: preview changes before applying
# GITHUB_REQUIRE_CONFIRMATION=true # Require user confirmation for destructive actions
# GITHUB_MAX_FILES_PER_PR=10 # Maximum files to include in a single PR
# GITHUB_DRAFT_PR_DEFAULT=true # Create PRs as drafts by default
# ====================
# MCP Client Context
# ====================
# Claude Code Integration
CLAUDE_CODE_CONFIG_PATH=~/.claude-code
# MCP Client Working Directory
# Set this to pass the client's actual working directory to the MCP server
# This helps resolve the issue where the server's working directory differs from the client's
# For Claude Code, this is automatically set to ${workspaceFolder}
# MCP_CLIENT_CWD=/path/to/your/project
# ====================
# Logging Configuration
# ====================
# Log level: DEBUG, INFO, WARNING, ERROR
QDRANT_LOG_LEVEL=INFO
# Log file location (project-aware logging)
# Defaults to ~/.mcp-servers/qdrant-rag/logs/
# LOG_DIR=/custom/log/directory
# ====================
# Advanced Configuration
# ====================
# Collection naming strategy
# PROJECT_COLLECTION_PREFIX=project
# GLOBAL_COLLECTION_PREFIX=global
# File exclusion patterns (in addition to .ragignore)
# EXCLUDE_PATTERNS=*.log,*.tmp,*.cache,*.pyc,__pycache__
# Maximum file size to index (in MB)
# MAX_FILE_SIZE_MB=10
# Connection pool settings
# CONNECTION_POOL_SIZE=10
# CONNECTION_TIMEOUT=30
# Vector index parameters
# VECTOR_DISTANCE_METRIC=Cosine # or Euclid, Dot
# VECTOR_HNSW_M=16
# VECTOR_HNSW_EF_CONSTRUCT=100
# ====================
# Development Options
# ====================
# Enable debug mode
DEBUG=false
# Disable SSL warnings (development only)
# DISABLE_SSL_WARNINGS=true
# Use local Qdrant instance without authentication
# QDRANT_LOCAL_MODE=true
# ====================
# Specialized Embeddings Configuration (v0.3.3+)
# ====================
# Enable specialized embeddings for different content types
# QDRANT_SPECIALIZED_EMBEDDINGS_ENABLED=true
# Content-Type Specific Models
# Code embeddings (optimized for code search)
# QDRANT_CODE_EMBEDDING_MODEL=nomic-ai/CodeRankEmbed
# QDRANT_CODE_EMBEDDING_FALLBACK=microsoft/codebert-base
# QDRANT_CODE_QUERY_PREFIX= # Custom query prefix (optional, defaults to model requirements)
# Config file embeddings (JSON, YAML, XML, etc.)
# QDRANT_CONFIG_EMBEDDING_MODEL=jinaai/jina-embeddings-v3
# QDRANT_CONFIG_EMBEDDING_FALLBACK=jinaai/jina-embeddings-v2-base-en
# Documentation embeddings (markdown, text files)
# QDRANT_DOC_EMBEDDING_MODEL=hkunlp/instructor-large
# QDRANT_DOC_EMBEDDING_FALLBACK=sentence-transformers/all-mpnet-base-v2
# QDRANT_DOC_INSTRUCTION_PREFIX="Represent the technical documentation for retrieval:"
# General/fallback embeddings
# QDRANT_GENERAL_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L12-v2
# Memory Management for Specialized Embeddings
# QDRANT_MAX_MODELS_IN_MEMORY=3 # Maximum number of models to keep in memory (2 for Apple Silicon)
# QDRANT_MEMORY_LIMIT_GB=4.0 # Total memory limit for all models (3.0 for Apple Silicon)
# QDRANT_MODEL_CACHE_DIR=~/.cache/qdrant-mcp/models # Where to cache downloaded models
# Performance Settings
# QDRANT_EMBEDDING_BATCH_SIZE=32 # Batch size for encoding (reduced for memory-intensive models)
# QDRANT_EMBEDDING_DEVICE=auto # Device selection: auto, cuda, mps, cpu
# HuggingFace Hub Settings
# HF_HOME=~/mcp-servers/qdrant-rag/data/models
# HF_HUB_CACHE=~/mcp-servers/qdrant-rag/data/models
# HF_HUB_DISABLE_TELEMETRY=1
# HF_HUB_DISABLE_SYMLINKS_WARNING=1
# HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1
# ====================
# Memory Management Configuration (v0.3.3+)
# ====================
# Global Memory Management
# QDRANT_MEMORY_MANAGEMENT_ENABLED=true # Enable unified memory management
# QDRANT_TOTAL_MEMORY_LIMIT_MB=8000 # Total memory limit for all components (8GB)
# QDRANT_CLEANUP_THRESHOLD_MB=6000 # Start cleanup when memory exceeds this (6GB)
# QDRANT_AGGRESSIVE_THRESHOLD_MB=7000 # Aggressive cleanup threshold (7GB)
# QDRANT_CLEANUP_INTERVAL_SECONDS=180 # Check memory every 3 minutes
# QDRANT_GC_INTERVAL_SECONDS=300 # Run garbage collection every 5 minutes
# Component-Specific Memory Limits
# Specialized Embeddings
# QDRANT_EMBEDDINGS_MAX_MEMORY_MB=4000 # Max memory for embedding models (4GB)
# QDRANT_EMBEDDINGS_MAX_MODELS=3 # Max number of models in memory
# Progressive Context Cache
# QDRANT_PROGRESSIVE_CACHE_MAX_MEMORY_MB=200 # Max memory for semantic cache (200MB)
# QDRANT_PROGRESSIVE_CACHE_MAX_ITEMS=100 # Max cached search results
# Context Tracking
# QDRANT_CONTEXT_TRACKING_MAX_MEMORY_MB=100 # Max memory for session tracking (100MB)
# QDRANT_CONTEXT_TRACKING_MAX_FILES=100 # Max files to track
# QDRANT_CONTEXT_TRACKING_MAX_EVENTS=500 # Max timeline events