.env.example•13.6 kB
# CodeGraph Configuration Example
# Copy this file to .env and update the values for your environment
# ============================================================================
# CodeGraph Core Configuration (Simplified Setup)
# ============================================================================
# Minimal Setup - Auto-detect embedding provider (ONNX, Ollama, or OpenAI)
CODEGRAPH_EMBEDDING_PROVIDER=auto
# That's it for basic usage! CodeGraph will auto-detect everything else.
# Uncomment and customize the settings below if you need more control.
# ============================================================================
# Storage and Installation Configuration
# ============================================================================
# Storage path for CodeGraph data (indexes, cache, etc.)
# CODEGRAPH_STORAGE=/path/to/storage
# CODEGRAPH_STORAGE_PATH=/path/to/storage # Alternative name
# Installation directory for binaries
# CODEGRAPH_INSTALL_DIR=$HOME/.local/bin
# Project identification (for multi-project setups)
# CODEGRAPH_PROJECT_ID=my-project
# CODEGRAPH_ORGANIZATION_ID=my-org
# CODEGRAPH_REPOSITORY_URL=https://github.com/user/repo
# CODEGRAPH_DOMAIN=example.com
# ============================================================================
# Embedding Provider Configuration
# ============================================================================
# Provider options: "auto", "onnx", "ollama", "openai", "jina", or "lmstudio"
# CODEGRAPH_EMBEDDING_PROVIDER=auto
# ONNX: Specify model path (or leave empty for auto-detection from HuggingFace cache)
# CODEGRAPH_LOCAL_MODEL=/path/to/your/onnx/model
# Ollama: Specify embedding model name
# CODEGRAPH_EMBEDDING_MODEL=all-minilm:latest
# CODEGRAPH_OLLAMA_URL=http://localhost:11434
# LM Studio: Best for MLX + Flash Attention 2 (recommended on macOS)
# Default: jina-embeddings-v4 (2048 or 1024 dimensions)
# CODEGRAPH_EMBEDDING_PROVIDER=lmstudio
# CODEGRAPH_EMBEDDING_MODEL=jinaai/jina-embeddings-v4
# CODEGRAPH_LMSTUDIO_URL=http://localhost:1234
# CODEGRAPH_EMBEDDING_DIMENSION=2048
# Batch size for embedding generation (applies to all providers)
# CODEGRAPH_EMBEDDING_BATCH_SIZE=32 # Default: 32, valid range: 1-4096
# OpenAI: Model name (API key configured below in Security section)
# CODEGRAPH_EMBEDDING_MODEL=text-embedding-3-small
# Jina AI: Cloud embeddings with reranking (requires JINA_API_KEY)
# CODEGRAPH_EMBEDDING_PROVIDER=jina
# JINA_EMBEDDING_MODEL=jina-embeddings-v4
# JINA_EMBEDDINGS_MODEL=jina-embeddings-v4 # Alternative name
# JINA_EMBEDDING_DIMENSION=2048 (supports 1024, 512, 256 but only 1024 has pre-defined column and HNSW index in surrealdb schema)
# JINA_API_KEY=your-jina-api-key-here
# JINA_MAX_TEXTS=512 # Leverage Jina API Batch functionality max 512 documents with 8192 tokens each remember to set --max-concurrent 1 when indexing
# JINA_MAX_TOKENS=7000
# JINA_API_BASE=https://api.jina.ai/v1
# JINA_API_TASK=code.passage # used when embedding data, code.query is used when searching data
# JINA_TASK=code.passage # Alternative name
# JINA_LATE_CHUNKING=false
# JINA_TRUNCATE=true # truncate texts and embeddings if over limit
# JINA_REQUEST_DELAY_MS=600 # small delay not to throttle the API when batching
# Jina Reranking Configuration
# CODEGRAPH_RERANKING_PROVIDER=jina
# CODEGRAPH_RERANK_CANDIDATES=512 # Leverage Jina API Batch functionality also here
# JINA_ENABLE_RERANKING=true
# JINA_RERANKING_ENABLED=true # Alternative name
# JINA_RERANKING_MODEL=jina-reranker-v3
# JINA_RERANKING_TOP_N=10
# Jina Relationship Embeddings Configuration
# JINA_REL_BATCH_SIZE=50
# JINA_REL_MAX_TEXTS=50
# Jina Batching for Large Indexing Operations
# CODEGRAPH_JINA_BATCH_SIZE=2000
# CODEGRAPH_JINA_BATCH_MINUTES=9.0 # used by estimate command, this is how long it took on average to index the codegraph codebase nodes with Jina
# ============================================================================
# Dual-Mode Search Configuration
# ============================================================================
# CodeGraph supports two search modes based on CODEGRAPH_EMBEDDING_PROVIDER:
#
# Local Mode (FAISS + local/ollama embeddings)
# ---------------------------------------------
# - Uses FAISS for in-memory vector search
# - Embeddings: ONNX, Ollama, or LM Studio
# - Best for: Desktop development, privacy-focused setups
# - Requires: Build with --features faiss
# Example:
# CODEGRAPH_EMBEDDING_PROVIDER=local # or ollama or lmstudio
#
# Cloud Mode (SurrealDB HNSW + Jina embeddings + reranking)
# ----------------------------------------------------------
# - Uses SurrealDB HNSW indexes for scalable vector search
# - Embeddings: Jina AI (Variable Matryosha dimensions - check what the model outputs and adjust schema/codegraph.surql HNSW vector dims)
# - Supported Jina AI embedding model is f.ex. jina-embeddings-v4
# - Reranking: Jina reranker-v3 for improved relevance
# - Best for: Cloud deployments, multi-user systems, scalability
# - Requires: SurrealDB instance, Jina API key
# Example:
# CODEGRAPH_EMBEDDING_PROVIDER=jina
# JINA_API_KEY=your-jina-api-key-here
# Cloud features toggle
# CODEGRAPH_CLOUD_ENABLED=false
# ============================================================================
# SurrealDB Configuration (required for cloud mode)
# ============================================================================
# SurrealDB connection (local or cloud)
# CODEGRAPH_SURREALDB_URL=ws://localhost:3004
# SURREALDB_URL=ws://localhost:3004 # Alternative name
# CODEGRAPH_SURREALDB_NAMESPACE=codegraph
# SURREALDB_NAMESPACE=codegraph # Alternative name
# CODEGRAPH_SURREALDB_DATABASE=main
# SURREALDB_DATABASE=main # Alternative name
# CODEGRAPH_SURREALDB_USERNAME=root
# SURREALDB_USERNAME=root # Alternative name
# CODEGRAPH_SURREALDB_PASSWORD=root
# SURREALDB_PASSWORD=root # Alternative name
#
# Important: HNSW index dimension must match embedding provider
# - Jina: Variable Matryoska dimensions depending on model 2048, 1024, 512, 256
# - OpenAI: Small 1536 dimensions, Large 3072 dimensions
# - Local ONNX: typically 384 qdrant/all-mini-llm-onnx
# - Local Ollama: qwen3-embedder:0.6b-8b 1024, 2048, 4096
# - 384 (all-mini-llm:latest)
# - 1024 (qwen3-embedding:0.6b)
# - 1536 (text-embedding-3-small)
# - 2048 (qwen3-embedding:4b, jina-embeddings-v4)
# - 3072 (text-embedding-3-large)
# - 4096 (qwen3-embedding:8b)
# The following embedding fields and HNSW indexes have been built into the surrealdb schema and codegraph codebase
# For pure speed use onnx or ollama all-mini-llm
# For considerably better retriaval switch to Ollama and qwen3-embedding:0.6b
# Scale qwen3-embedding model per criticality of accuracy on Ollma
# For enhancing local accuracy enable CODEGRAPH_RERANKING_PROVIDER=lmstudio and f.ex. CODEGRAPH_RERANKING_MODEL=qwen-reranker-3:0.6b
# For true SOTA use jina provider and jina-embeddings-v4 with the jina-reranker-v3 - takes longer to index but works better
# Hierarchical config system also supports CODEGRAPH__* prefix for nested config
# Example: CODEGRAPH__DATABASE__BACKEND=surrealdb
# Example: CODEGRAPH__DATABASE__SURREALDB__CONNECTION=ws://localhost:8000
# Example: CODEGRAPH__DATABASE__SURREALDB__PASSWORD=your_password
# Example: CODEGRAPH__SERVER__PORT=8080
# Example: CODEGRAPH__LOGGING__LEVEL=debug
# ============================================================================
# LLM Configuration (for local insights generation)
# ============================================================================
# Leave empty to use context-only mode (fastest, recommended for agents like Claude/GPT-4)
# Set to enable local LLM insights generation
# LM Studio with DeepSeek Coder v2 Lite Instruct (or what ever fits in your vGPU memory)
# Supported LLM provider options: "auto", "onnx", "lmstudio", "openai", "claude" or "ollama"
# Superior MLX support, Flash Attention 2, KV-cache and Distillation model support on macOS
# CODEGRAPH_LLM_PROVIDER=lmstudio
# LLM_PROVIDER=lmstudio # Alternative name
# CODEGRAPH_MODEL=lmstudio-community/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
# CODEGRAPH_LMSTUDIO_URL=http://localhost:1234
# CODEGRAPH_CONTEXT_WINDOW=32000
# CODEGRAPH_TEMPERATURE=0.1
# Reasoning effort for complex queries (affects token usage)
# CODEGRAPH_REASONING_EFFORT=medium
# Ollama (alternative)
# LLM model (e.g., "qwen2.5-coder:14b", "Kimi-K2-Instruct")
# CODEGRAPH_MODEL=qwen2.5-coder:14b
# CODEGRAPH_OLLAMA_URL=http://localhost:11434
# CODEGRAPH_CONTEXT_WINDOW=252000 # Max for Ollama depends on the model though
# Anthropic (cloud - 200K/1M tokens)
# CODEGRAPH_LLM_PROVIDER=anthropic
# CODEGRAPH_MODEL=sonnet[1m]
# ANTHROPIC_API_KEY=sk-ant-your-key-here
# CODEGRAPH_CONTEXT_WINDOW=1000000 # 200K/1M tokens
# OpenAI (cloud - 200K/400K tokens)
# CODEGRAPH_LLM_PROVIDER=openai
# CODEGRAPH_MODEL=gpt-5-codex-mini
# OPENAI_API_KEY=sk-your-key-here
# OPENAI_ORG_ID=your_fabulous_org
# CODEGRAPH_CONTEXT_WINDOW=200000 # 400K tokens
# CODEGRAPH_REASONING_BUDGET=medium
# xAI (cloud - 252K/2M context window, $0.50-$1.50/M tokens!)
# CODEGRAPH_LLM_PROVIDER=openai # OpenAI Responses API compatible providers work
# CODEGRAPH_MODEL=grok-4-fast # or grok-code-fast-1 252K tokens the price is same
# XAI_API_KEY=xai-your-key-here
# CODEGRAPH_CONTEXT_WINDOW=2000000 # 2M tokens!
# CODEGRAPH_REASONING_BUDGET=high
# MCP-server code insights agent max output tokens - uses the CODEGRAPH_MODEL
# MCP_CODE_AGENT_MAX_OUTPUT_TOKENS=52000
# ============================================================================
# Performance & Caching Configuration
# ============================================================================
# Performance mode (affects optimization settings)
# CODEGRAPH_PERFORMANCE_MODE=balanced # Options: high_speed, balanced, low_memory
# Cache configuration
# CODEGRAPH_CACHE_SIZE=1000 # Maximum cache entries
# CODEGRAPH_CACHE_TTL=1800 # Cache TTL in seconds
# CODEGRAPH_ENABLE_CACHE=true
# Symbol indexing batch sizes
# CODEGRAPH_SYMBOL_BATCH_SIZE=500 # Batch size for symbol processing
# CODEGRAPH_SYMBOL_MAX_CONCURRENT=4 # Max concurrent symbol processing
# CODEGRAPH_SYMBOL_DB_BATCH_SIZE=1000 # Batch size for database writes
# ============================================================================
# Qwen-Coder-2.5-128K Integration (Legacy)
# ============================================================================
# Qwen model configuration
# CODEGRAPH_ENABLE_QWEN=true
# CODEGRAPH_QWEN_MAX_TOKENS=128000 # Max completion tokens (0 disables limit)
# CODEGRAPH_QWEN_TIMEOUT_SECS=180 # Request timeout before fallback (0 disables)
# CODEGRAPH_QWEN_CONNECT_TIMEOUT_MS=5000 # Connection timeout to Ollama
# ============================================================================
# Server Configuration
# ============================================================================
# Server host and port (for HTTP/REST API)
# CODEGRAPH_HOST=127.0.0.1
# CODEGRAPH_PORT=3000
# Logging
# -------
# Log level: trace, debug, info, warn, error
# Use "warn" during indexing for clean TUI output (recommended)
# Use "info" for development/debugging
RUST_LOG=warn
# ============================================================================
# Testing & Development Configuration
# ============================================================================
# Path to codegraph binary (for testing)
# CODEGRAPH_BIN=./target/release/codegraph
# CODEGRAPH_BIN=./target/debug/codegraph
# Command to run codegraph (alternative to binary path)
# CODEGRAPH_CMD="cargo run -p codegraph-mcp --bin codegraph --"
# ============================================================================
# Security Configuration (for production deployments)
# ============================================================================
# JWT Authentication
JWT_SECRET=replace_with_secure_random_secret_minimum_32_characters_long
JWT_EXPIRY_HOURS=24
# API Key Configuration
API_KEY_PREFIX=cgk
# Server Configuration
HOST=127.0.0.1
PORT=8080
ENVIRONMENT=development
# TLS/HTTPS Configuration (for production)
# TLS_CERT_PATH=/path/to/certificate.pem
# TLS_KEY_PATH=/path/to/private-key.pem
# REQUIRE_TLS=true
# Database Configuration
# DATABASE_URL=postgresql://user:password@localhost/codegraph
# REDIS_URL=redis://localhost:6379
# Rate Limiting
RATE_LIMIT_ANONYMOUS=60
RATE_LIMIT_USER=1000
RATE_LIMIT_PREMIUM=5000
RATE_LIMIT_ADMIN=10000
# Security Settings
MAX_REQUEST_SIZE=10485760 # 10MB
SESSION_TIMEOUT_HOURS=24
PASSWORD_MIN_LENGTH=12
# Logging (see RUST_LOG above for CodeGraph core logging)
# LOG_LEVEL=info # For application-level logging
SECURITY_LOG_LEVEL=warn
LOG_FORMAT=json
# Monitoring
METRICS_ENABLED=true
PROMETHEUS_PORT=9090
# External Services
# SENTRY_DSN=https://your-sentry-dsn
# ANALYTICS_KEY=your_analytics_key
# Development/Testing Only
DEV_MODE=true
DISABLE_AUTH=false # Never set to true in production!
ENABLE_DEBUG_ENDPOINTS=false
# ============================================================================
# HTTP Server Configuration (when using --transport http)
# ============================================================================
# Host address to bind HTTP server (default: 127.0.0.1)
# Use 0.0.0.0 to allow external connections
CODEGRAPH_HTTP_HOST=127.0.0.1
# Port for HTTP server (default: 3000)
CODEGRAPH_HTTP_PORT=3000
# SSE keep-alive interval in seconds (default: 15)
# Prevents proxy timeouts for long-running agentic operations
CODEGRAPH_HTTP_KEEP_ALIVE=15