MCP Document Indexer

MIT License

Overview InspectNew Endpoints Schema Related Servers Reviews Score

document-mcp
src

config.py•3.79 kB

"""Configuration management for MCP Document Indexer.""" import os from pathlib import Path from typing import List, Optional from pydantic import BaseModel, Field from dotenv import load_dotenv load_dotenv() class Config(BaseModel): """Configuration for the document indexer.""" watch_folders: List[Path] = Field( default_factory=list, description="Folders to monitor for documents" ) lancedb_path: Path = Field( default=Path("./vector_index"), description="Path to LanceDB storage" ) llm_model: str = Field( default="llama3.2:3b", description="Ollama model for document summarization" ) chunk_size: int = Field( default=1000, description="Size of text chunks in characters" ) chunk_overlap: int = Field( default=200, description="Overlap between chunks in characters" ) embedding_model: str = Field( default="all-MiniLM-L6-v2", description="Sentence transformer model for embeddings" ) file_extensions: List[str] = Field( default=[".pdf", ".docx", ".doc", ".txt", ".md", ".rtf"], description="File extensions to index" ) max_file_size_mb: int = Field( default=100, description="Maximum file size in MB to process" ) ollama_base_url: str = Field( default="http://localhost:11434", description="Ollama API base URL" ) batch_size: int = Field( default=10, description="Batch size for processing documents" ) @classmethod def from_env(cls) -> "Config": """Create config from environment variables.""" watch_folders_str = os.getenv("WATCH_FOLDERS", "") watch_folders = [] if watch_folders_str: for folder in watch_folders_str.split(","): folder = folder.strip() if folder: path = Path(folder).expanduser().absolute() if path.exists() and path.is_dir(): watch_folders.append(path) else: print(f"Warning: Folder {folder} does not exist or is not a directory") lancedb_path = os.getenv("LANCEDB_PATH", "./vector_index") llm_model = os.getenv("LLM_MODEL", "llama3.2:3b") chunk_size = int(os.getenv("CHUNK_SIZE", "1000")) chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "200")) embedding_model = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2") file_extensions_str = os.getenv("FILE_EXTENSIONS", ".pdf,.docx,.doc,.txt,.md,.rtf") file_extensions = [ext.strip() for ext in file_extensions_str.split(",")] max_file_size_mb = int(os.getenv("MAX_FILE_SIZE_MB", "100")) ollama_base_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434") batch_size = int(os.getenv("BATCH_SIZE", "10")) return cls( watch_folders=watch_folders, lancedb_path=Path(lancedb_path).expanduser().absolute(), llm_model=llm_model, chunk_size=chunk_size, chunk_overlap=chunk_overlap, embedding_model=embedding_model, file_extensions=file_extensions, max_file_size_mb=max_file_size_mb, ollama_base_url=ollama_base_url, batch_size=batch_size ) def ensure_dirs(self): """Ensure all required directories exist.""" self.lancedb_path.mkdir(parents=True, exist_ok=True) _config: Optional[Config] = None def get_config() -> Config: """Get or create the global configuration.""" global _config if _config is None: _config = Config.from_env() _config.ensure_dirs() return _config

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yairwein/document-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server