#!/usr/bin/env python3
"""
Configuration management for Generic PDF MCP Server
Handles loading, validation, and management of server configuration
"""
import json
import os
import sys
from pathlib import Path
from typing import Dict, Any, Optional, List
from dataclasses import dataclass, asdict
import jsonschema
@dataclass
class ServerConfig:
"""Server configuration settings"""
name: str
display_name: str
description: str
version: str = "1.0.0"
@dataclass
class StorageConfig:
"""Storage and file management settings"""
pdf_folder: str
markdown_folder: Optional[str] = None
domain_keywords: List[str] = None
def __post_init__(self):
if self.domain_keywords is None:
self.domain_keywords = []
if self.markdown_folder is None:
self.markdown_folder = os.path.join(self.pdf_folder, "markdown")
@dataclass
class ToolConfig:
"""Individual tool configuration"""
name: str
description: str
@dataclass
class ToolsConfig:
"""All tools configuration"""
search: ToolConfig
list: ToolConfig
content: ToolConfig
max_results_default: int = 5
@dataclass
class ProcessingConfig:
"""Document processing settings"""
cache_enabled: bool = True
parallel_processing: bool = True
max_file_size_mb: int = 50
context_size: int = 500
@dataclass
class GenericPDFServerConfig:
"""Complete server configuration"""
server: ServerConfig
storage: StorageConfig
tools: ToolsConfig
processing: ProcessingConfig
class ConfigManager:
"""Configuration manager with validation and loading capabilities"""
# JSON Schema for configuration validation
CONFIG_SCHEMA = {
"type": "object",
"properties": {
"server": {
"type": "object",
"properties": {
"name": {"type": "string", "pattern": "^[a-z0-9-_]+$"},
"display_name": {"type": "string", "minLength": 1},
"description": {"type": "string", "minLength": 1},
"version": {"type": "string", "pattern": r"^\d+\.\d+\.\d+$"}
},
"required": ["name", "display_name", "description"],
"additionalProperties": False
},
"storage": {
"type": "object",
"properties": {
"pdf_folder": {"type": "string", "minLength": 1},
"markdown_folder": {"type": ["string", "null"]},
"domain_keywords": {
"type": "array",
"items": {"type": "string"},
"uniqueItems": True
}
},
"required": ["pdf_folder"],
"additionalProperties": False
},
"tools": {
"type": "object",
"properties": {
"search": {
"type": "object",
"properties": {
"name": {"type": "string", "pattern": "^[a-z0-9_]+$"},
"description": {"type": "string", "minLength": 1}
},
"required": ["name", "description"],
"additionalProperties": False
},
"list": {
"type": "object",
"properties": {
"name": {"type": "string", "pattern": "^[a-z0-9_]+$"},
"description": {"type": "string", "minLength": 1}
},
"required": ["name", "description"],
"additionalProperties": False
},
"content": {
"type": "object",
"properties": {
"name": {"type": "string", "pattern": "^[a-z0-9_]+$"},
"description": {"type": "string", "minLength": 1}
},
"required": ["name", "description"],
"additionalProperties": False
},
"max_results_default": {"type": "integer", "minimum": 1, "maximum": 50}
},
"required": ["search", "list", "content"],
"additionalProperties": False
},
"processing": {
"type": "object",
"properties": {
"cache_enabled": {"type": "boolean"},
"parallel_processing": {"type": "boolean"},
"max_file_size_mb": {"type": "integer", "minimum": 1, "maximum": 1000},
"context_size": {"type": "integer", "minimum": 100, "maximum": 2000}
},
"additionalProperties": False
}
},
"required": ["server", "storage", "tools"],
"additionalProperties": False
}
def __init__(self, config_path: Optional[str] = None):
"""Initialize configuration manager"""
self.config_path = config_path or self._find_config_file()
self.config: Optional[GenericPDFServerConfig] = None
def _find_config_file(self) -> str:
"""Find configuration file in standard locations"""
possible_paths = [
"./server_config.json",
"./config/server_config.json",
os.path.expanduser("~/.config/generic_pdf_server/config.json"),
"/etc/generic_pdf_server/config.json"
]
for path in possible_paths:
if os.path.exists(path):
return path
# Return default path if none found
return "./server_config.json"
def load_config(self) -> GenericPDFServerConfig:
"""Load and validate configuration from file"""
if not os.path.exists(self.config_path):
raise FileNotFoundError(f"Configuration file not found: {self.config_path}")
try:
with open(self.config_path, 'r', encoding='utf-8') as f:
config_data = json.load(f)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in configuration file: {e}")
# Validate against schema
try:
jsonschema.validate(config_data, self.CONFIG_SCHEMA)
except jsonschema.ValidationError as e:
raise ValueError(f"Configuration validation error: {e.message}")
# Convert to dataclass structure
try:
server_config = ServerConfig(**config_data["server"])
storage_config = StorageConfig(**config_data["storage"])
# Handle tools configuration
tools_data = config_data["tools"]
tools_config = ToolsConfig(
search=ToolConfig(**tools_data["search"]),
list=ToolConfig(**tools_data["list"]),
content=ToolConfig(**tools_data["content"]),
max_results_default=tools_data.get("max_results_default", 5)
)
processing_config = ProcessingConfig(
**config_data.get("processing", {})
)
self.config = GenericPDFServerConfig(
server=server_config,
storage=storage_config,
tools=tools_config,
processing=processing_config
)
return self.config
except Exception as e:
raise ValueError(f"Error creating configuration objects: {e}")
def validate_paths(self) -> List[str]:
"""Validate that configured paths exist and are accessible"""
errors = []
if not self.config:
errors.append("Configuration not loaded")
return errors
# Check PDF folder
pdf_path = Path(self.config.storage.pdf_folder)
if not pdf_path.exists():
errors.append(f"PDF folder does not exist: {pdf_path}")
elif not pdf_path.is_dir():
errors.append(f"PDF folder is not a directory: {pdf_path}")
elif not os.access(pdf_path, os.R_OK):
errors.append(f"PDF folder is not readable: {pdf_path}")
# Check markdown folder (create if doesn't exist)
markdown_path = Path(self.config.storage.markdown_folder)
if not markdown_path.exists():
try:
markdown_path.mkdir(parents=True, exist_ok=True)
except Exception as e:
errors.append(f"Cannot create markdown folder {markdown_path}: {e}")
elif not markdown_path.is_dir():
errors.append(f"Markdown folder is not a directory: {markdown_path}")
elif not os.access(markdown_path, os.W_OK):
errors.append(f"Markdown folder is not writable: {markdown_path}")
return errors
def save_config(self, config: GenericPDFServerConfig, path: Optional[str] = None) -> None:
"""Save configuration to file"""
save_path = path or self.config_path
# Convert dataclass to dict
config_dict = {
"server": asdict(config.server),
"storage": asdict(config.storage),
"tools": {
"search": asdict(config.tools.search),
"list": asdict(config.tools.list),
"content": asdict(config.tools.content),
"max_results_default": config.tools.max_results_default
},
"processing": asdict(config.processing)
}
# Validate before saving
try:
jsonschema.validate(config_dict, self.CONFIG_SCHEMA)
except jsonschema.ValidationError as e:
raise ValueError(f"Configuration validation error: {e.message}")
# Save to file
os.makedirs(os.path.dirname(save_path) if os.path.dirname(save_path) else ".", exist_ok=True)
with open(save_path, 'w', encoding='utf-8') as f:
json.dump(config_dict, f, indent=2, ensure_ascii=False)
def create_default_config(self, server_name: str, pdf_folder: str) -> GenericPDFServerConfig:
"""Create a default configuration"""
return GenericPDFServerConfig(
server=ServerConfig(
name=server_name,
display_name=f"{server_name.replace('-', ' ').title()} PDF Server",
description=f"Search and retrieve information from {server_name.replace('-', ' ')} PDF documentation",
version="1.0.0"
),
storage=StorageConfig(
pdf_folder=pdf_folder,
markdown_folder=None, # Will be set to pdf_folder/markdown
domain_keywords=[]
),
tools=ToolsConfig(
search=ToolConfig(
name="search_docs",
description="Search through PDF documentation for specific topics or keywords"
),
list=ToolConfig(
name="list_docs",
description="List all available PDF documents"
),
content=ToolConfig(
name="get_document_content",
description="Get the full content of a specific PDF document"
),
max_results_default=5
),
processing=ProcessingConfig()
)
def get_env_var_names(self) -> Dict[str, str]:
"""Get environment variable names based on server name"""
if not self.config:
raise ValueError("Configuration not loaded")
server_name_upper = self.config.server.name.upper().replace('-', '_')
return {
"pdf_folder": f"{server_name_upper}_PDF_FOLDER",
"markdown_folder": f"{server_name_upper}_MARKDOWN_FOLDER"
}
def load_config_from_env_or_file(config_path: Optional[str] = None) -> GenericPDFServerConfig:
"""Load configuration from environment variables or file"""
config_manager = ConfigManager(config_path)
# Try to load from file first
try:
return config_manager.load_config()
except FileNotFoundError:
pass
# Fallback to environment variables (for backward compatibility)
pdf_folder = os.environ.get("QUANTCONNECT_PDF_FOLDER") or os.environ.get("PDF_FOLDER")
markdown_folder = os.environ.get("QUANTCONNECT_MARKDOWN_FOLDER") or os.environ.get("MARKDOWN_FOLDER")
if not pdf_folder:
raise ValueError(
"No configuration file found and no PDF_FOLDER environment variable set. "
"Please create a server_config.json file or set PDF_FOLDER environment variable."
)
# Create default config from environment
server_name = os.environ.get("SERVER_NAME", "generic-pdf-server")
config = config_manager.create_default_config(server_name, pdf_folder)
if markdown_folder:
config.storage.markdown_folder = markdown_folder
return config
if __name__ == "__main__":
# Test configuration loading
try:
config = load_config_from_env_or_file()
print("Configuration loaded successfully:")
print(f"Server: {config.server.name} ({config.server.version})")
print(f"PDF Folder: {config.storage.pdf_folder}")
print(f"Markdown Folder: {config.storage.markdown_folder}")
print(f"Tools: {config.tools.search.name}, {config.tools.list.name}, {config.tools.content.name}")
except Exception as e:
print(f"Configuration error: {e}", file=sys.stderr)
sys.exit(1)