# Registry Review MCP Configuration
# Copy this file to .env and fill in your values
# ============================================================================
# LLM Extraction (Phase 4.2)
# ============================================================================
# Anthropic API key for LLM-powered field extraction
# Get your API key from: https://console.anthropic.com/
REGISTRY_REVIEW_ANTHROPIC_API_KEY=
# Enable LLM extraction (true/false)
# Set to false to use regex-only extraction
REGISTRY_REVIEW_LLM_EXTRACTION_ENABLED=false
# Claude model to use for extraction
# Options: claude-sonnet-4-20250514, claude-haiku-4, etc.
REGISTRY_REVIEW_LLM_MODEL=claude-sonnet-4-5-20250929
# Maximum tokens for LLM responses (1-8000)
REGISTRY_REVIEW_LLM_MAX_TOKENS=4000
# Temperature for LLM (0.0-1.0, 0.0 = deterministic)
REGISTRY_REVIEW_LLM_TEMPERATURE=0.0
# Confidence threshold for extracted fields (0.0-1.0)
# Fields below this threshold are excluded from validation
REGISTRY_REVIEW_LLM_CONFIDENCE_THRESHOLD=0.7
# ----------------------------------------------------------------------------
# Document Chunking (for large documents)
# ----------------------------------------------------------------------------
# Maximum input characters before chunking (default: 100000, ~25K tokens)
# Documents larger than this are split into overlapping chunks
REGISTRY_REVIEW_LLM_MAX_INPUT_CHARS=100000
# Enable automatic chunking for large documents (true/false)
# When disabled, large documents may be truncated
REGISTRY_REVIEW_LLM_ENABLE_CHUNKING=true
# Size of each chunk in characters (default: 80000, ~20K tokens)
# Must be less than llm_max_input_chars
REGISTRY_REVIEW_LLM_CHUNK_SIZE=80000
# Overlap between chunks in characters (default: 2000)
# Helps avoid missing information at chunk boundaries
REGISTRY_REVIEW_LLM_CHUNK_OVERLAP=2000
# ----------------------------------------------------------------------------
# Image Processing
# ----------------------------------------------------------------------------
# Maximum images per API call (default: 20)
# Higher values = more context but higher cost per call
REGISTRY_REVIEW_LLM_MAX_IMAGES_PER_CALL=20
# Warn when document has more images than this threshold (default: 10)
# Helps identify documents that may have high extraction costs
REGISTRY_REVIEW_LLM_WARN_IMAGE_THRESHOLD=10
# ----------------------------------------------------------------------------
# Cost Management
# ----------------------------------------------------------------------------
# Maximum API calls per session (cost management)
REGISTRY_REVIEW_MAX_API_CALLS_PER_SESSION=50
# API call timeout in seconds
REGISTRY_REVIEW_API_CALL_TIMEOUT_SECONDS=30
# ============================================================================
# Logging
# ============================================================================
REGISTRY_REVIEW_LOG_LEVEL=INFO
REGISTRY_REVIEW_LOG_FORMAT=%(asctime)s [%(levelname)s] %(name)s: %(message)s
# ============================================================================
# Paths (optional - defaults to ./data)
# ============================================================================
# REGISTRY_REVIEW_DATA_DIR=/absolute/path/to/data
# REGISTRY_REVIEW_CHECKLISTS_DIR=/absolute/path/to/data/checklists
# REGISTRY_REVIEW_SESSIONS_DIR=/absolute/path/to/data/sessions
# REGISTRY_REVIEW_CACHE_DIR=/absolute/path/to/data/cache
# ============================================================================
# Performance
# ============================================================================
REGISTRY_REVIEW_ENABLE_CACHING=true
REGISTRY_REVIEW_CACHE_COMPRESSION=true
REGISTRY_REVIEW_MAX_CONCURRENT_EXTRACTIONS=5
# ============================================================================
# Validation
# ============================================================================
# Maximum days between dates for alignment validation
REGISTRY_REVIEW_DATE_ALIGNMENT_MAX_DELTA_DAYS=120
# Enable fuzzy matching for land tenure names
REGISTRY_REVIEW_LAND_TENURE_FUZZY_MATCH=true
# Minimum project ID occurrences for consistency
REGISTRY_REVIEW_PROJECT_ID_MIN_OCCURRENCES=3