Skip to main content
Glama
brockwebb

Open Census MCP Server

by brockwebb
config.py10.6 kB
""" Configuration management for Census MCP Server Handles paths, environment variables, and settings for containerized deployment. Supports both local development and Docker container environments. Pure Python implementation with direct Census API access. Updated for dual-path vector database architecture with OpenAI embeddings. """ import os import logging from pathlib import Path from typing import Dict, Any, Optional import json logger = logging.getLogger(__name__) class Config: """ Configuration manager for Census MCP Server. Loads settings from environment variables, config files, and defaults. Container-aware for seamless Docker deployment. Pure Python with direct Census API access. Supports dual-path vector database architecture with OpenAI embeddings. """ def __init__(self, config_file: Optional[str] = None): """ Initialize configuration. Args: config_file: Optional path to JSON config file """ self.config_file = config_file self._load_config() self._validate_config() def _load_config(self): """Load configuration from environment, files, and defaults.""" # Load .env file first try: from dotenv import load_dotenv env_file = self.base_dir / ".env" if hasattr(self, 'base_dir') else Path(__file__).parent.parent.parent / ".env" load_dotenv(env_file) logger.info(f"✅ Loaded .env file from {env_file}") except ImportError: logger.warning("python-dotenv not installed. Install with: pip install python-dotenv") except Exception as e: logger.warning(f"Could not load .env file: {e}") # Base paths - container-aware self.base_dir = Path(os.getenv('CENSUS_MCP_BASE', Path(__file__).parent.parent.parent)) self.data_dir = self.base_dir / "data" self.config_dir = self.base_dir / "config" # Knowledge Base paths self.knowledge_corpus_path = self.data_dir / "knowledge_corpus" # Dual-Path Vector Database Configuration self.knowledge_base_dir = self.base_dir / "knowledge-base" self.variables_db_path = self.knowledge_base_dir / "variables-faiss" self.methodology_db_path = self.knowledge_base_dir / "methodology-db" # Legacy vector DB path (for backward compatibility) self.vector_db_path = self.data_dir / "vector_db" self.vector_db_type = os.getenv('VECTOR_DB_TYPE', 'dual-path') # Census API Configuration self.census_api_key = os.getenv('CENSUS_API_KEY') self.census_base_url = os.getenv('CENSUS_BASE_URL', 'https://api.census.gov/data') # LLM Configuration self.default_llm = os.getenv('DEFAULT_LLM', 'claude') self.claude_api_key = os.getenv('ANTHROPIC_API_KEY') self.openai_api_key = os.getenv('OPENAI_API_KEY') # Embedding Configuration - Updated for OpenAI self.embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-large') self.embedding_dimension = self._get_embedding_dimension() # Server Configuration self.log_level = os.getenv('LOG_LEVEL', 'INFO') self.max_concurrent_requests = int(os.getenv('MAX_CONCURRENT_REQUESTS', '10')) self.request_timeout = int(os.getenv('REQUEST_TIMEOUT', '300')) # Vector Search Configuration self.vector_search_top_k = int(os.getenv('VECTOR_SEARCH_TOP_K', '5')) self.vector_search_threshold = float(os.getenv('VECTOR_SEARCH_THRESHOLD', '0.3')) # Container-specific settings self.is_container = os.getenv('CENSUS_MCP_CONTAINER', 'false').lower() == 'true' self.container_data_mount = os.getenv('CONTAINER_DATA_MOUNT', '/app/data') # Load from config file if provided if self.config_file and Path(self.config_file).exists(): self._load_config_file() # Load default config if it exists default_config = self.config_dir / "mcp_config.json" if default_config.exists(): self._load_config_file(str(default_config)) def _get_embedding_dimension(self) -> int: """Get embedding dimension based on model.""" model_dimensions = { # OpenAI models 'text-embedding-3-large': 3072, 'text-embedding-3-small': 1536, 'text-embedding-ada-002': 1536, # Legacy sentence transformer models (deprecated) 'all-mpnet-base-v2': 768, 'all-MiniLM-L6-v2': 384, 'all-MiniLM-L12-v2': 384, 'all-distilroberta-v1': 768, 'paraphrase-mpnet-base-v2': 768, } return int(os.getenv('EMBEDDING_DIMENSION', model_dimensions.get(self.embedding_model, 3072))) def _load_config_file(self, config_path: Optional[str] = None): """Load configuration from JSON file.""" try: config_path = config_path or self.config_file with open(config_path, 'r') as f: file_config = json.load(f) for key, value in file_config.items(): if not hasattr(self, key) or getattr(self, key) is None: setattr(self, key, value) logger.info(f"Loaded configuration from {config_path}") except Exception as e: logger.warning(f"Could not load config file {config_path}: {e}") def _validate_config(self): """Validate configuration and create necessary directories.""" # Create data directories self.data_dir.mkdir(parents=True, exist_ok=True) self.knowledge_corpus_path.mkdir(parents=True, exist_ok=True) # Dual-path database directories self.knowledge_base_dir.mkdir(parents=True, exist_ok=True) # Note: Don't auto-create variables-faiss and methodology-db as they should be built explicitly # Legacy vector DB directory (for backward compatibility) self.vector_db_path.mkdir(parents=True, exist_ok=True) # Validate embedding model if not self._validate_embedding_model(): logger.warning(f"Embedding model {self.embedding_model} may not be available.") # Log configuration status self._log_config_status() def _validate_embedding_model(self) -> bool: """Validate that the embedding model is available.""" try: if self.embedding_model.startswith('text-embedding-'): # OpenAI model - check for API key return bool(self.openai_api_key) elif self.embedding_model in ['all-mpnet-base-v2', 'all-MiniLM-L6-v2', 'all-MiniLM-L12-v2']: # Legacy sentence transformers model try: import sentence_transformers return True except ImportError: logger.warning("sentence-transformers not installed. Install with: pip install sentence-transformers") return False return True except Exception: return False def _log_config_status(self): """Log current configuration status.""" logger.info("Census MCP Server Configuration:") logger.info(f" Base directory: {self.base_dir}") logger.info(f" Data directory: {self.data_dir}") # Dual-path vector database status logger.info(f" Knowledge base directory: {self.knowledge_base_dir}") logger.info(f" Variables DB path: {self.variables_db_path}") logger.info(f" Variables DB exists: {self.variables_db_path.exists()}") logger.info(f" Methodology DB path: {self.methodology_db_path}") logger.info(f" Methodology DB exists: {self.methodology_db_path.exists()}") # Legacy path logger.info(f" Legacy vector DB path: {self.vector_db_path}") logger.info(f" Container mode: {self.is_container}") logger.info(f" Vector DB type: {self.vector_db_type}") # Updated embedding model logging if self.embedding_model.startswith('text-embedding-'): api_status = "with API key" if self.openai_api_key else "missing API key" logger.info(f" Embedding model: {self.embedding_model} (OpenAI, {api_status})") else: logger.info(f" Embedding model: {self.embedding_model} (local)") logger.info(f" Embedding dimensions: {self.embedding_dimension}") logger.info(f" Census API base: {self.census_base_url}") logger.info(f" Log level: {self.log_level}") # Check for API keys with updated messages api_keys_status = { "Census API": "✓" if self.census_api_key else "⚠ (optional)", "Claude API": "✓" if self.claude_api_key else "✗", } # OpenAI key status depends on embedding model if self.embedding_model.startswith('text-embedding-'): api_keys_status["OpenAI API"] = "✓" if self.openai_api_key else "✗ (required for embeddings)" else: api_keys_status["OpenAI API"] = "✓" if self.openai_api_key else "⚠ (not needed for local embeddings)" for api, status in api_keys_status.items(): logger.info(f" {api} key: {status}") def to_dict(self) -> Dict[str, Any]: """Convert configuration to dictionary (excluding sensitive data).""" config_dict = {} for attr_name in dir(self): if not attr_name.startswith('_') and not callable(getattr(self, attr_name)): value = getattr(self, attr_name) if 'api_key' not in attr_name.lower(): config_dict[attr_name] = str(value) if isinstance(value, Path) else value return config_dict # Global configuration instance _config_instance = None def get_config() -> Config: """Get the global configuration instance.""" global _config_instance if _config_instance is None: _config_instance = Config() return _config_instance def reload_config(config_file: Optional[str] = None) -> Config: """Reload the global configuration.""" global _config_instance _config_instance = Config(config_file) return _config_instance

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/brockwebb/open-census-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server