# MCP Knowledge Server - Configuration
#
# This configuration file contains all supported settings.
# For environment variables, use format: KNOWLEDGE_<SECTION>__<KEY>=value
# Example: export KNOWLEDGE_EMBEDDING__DEVICE=cuda
# Storage paths
storage:
documents_path: ./data/documents # Where uploaded documents are stored
vector_db_path: ./data/chromadb # ChromaDB database location
model_cache_path: ~/.cache/huggingface # Model cache directory
# Embedding model settings
embedding:
model_name: sentence-transformers/all-MiniLM-L6-v2 # HuggingFace model
batch_size: 32 # Batch size for embeddings (1-128)
device: cuda # Device: cpu or cuda
# Text chunking settings
chunking:
chunk_size: 500 # Characters per chunk (100-2000)
chunk_overlap: 50 # Overlap between chunks (0-500)
strategy: sentence # Strategy: sentence, paragraph, or fixed
# Processing settings
processing:
max_concurrent_tasks: 3 # Concurrent document processing (1-10)
ocr_confidence_threshold: 0.6 # OCR quality threshold (0.0-1.0)
max_file_size_mb: 100 # Maximum file size (1-1000 MB)
# OCR settings
ocr:
enabled: true # Enable OCR processing
language: eng # OCR language code (eng, fra, deu, etc.)
force_ocr: false # Force OCR even when text extraction is available
confidence_threshold: 0.0 # Accept all OCR results (per requirements)
# MCP server settings
mcp:
host: 127.0.0.1 # Server host (127.0.0.1 for localhost only)
port: 3000 # Server port (1024-65535)
transport: http-streamable # Transport: http-streamable (recommended), http (SSE), or stdio