base.pyโข5.76 kB
#!/usr/bin/env python3
"""
Base Document Adapter
Defines the interface that all document adapters must implement.
"""
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
from dataclasses import dataclass
from datetime import datetime
from mcp.types import Resource
@dataclass
class SearchResult:
"""Represents a search result from a document source."""
title: str
content: str
url: str
source: str
score: float = 0.0
metadata: Dict[str, Any] = None
def __post_init__(self):
if self.metadata is None:
self.metadata = {}
@dataclass
class DocumentContent:
"""Represents content from a document."""
title: str
content: str
url: str
source: str
content_type: str = "text/markdown"
last_modified: Optional[datetime] = None
metadata: Dict[str, Any] = None
def __post_init__(self):
if self.metadata is None:
self.metadata = {}
class BaseDocumentAdapter(ABC):
"""Base class for all document adapters.
Each adapter is responsible for connecting to a specific documentation
platform (GitBook, Notion, Confluence, etc.) and providing a standardized
interface for accessing documentation content.
"""
def __init__(self, config: Dict[str, Any]):
"""Initialize the adapter with configuration.
Args:
config: Configuration dictionary specific to the adapter
"""
self.config = config
self.name = self.__class__.__name__.replace("Adapter", "").lower()
self._initialized = False
@abstractmethod
async def initialize(self) -> None:
"""Initialize the adapter.
This method should perform any necessary setup, such as:
- Validating configuration
- Testing API connections
- Caching authentication tokens
- Building initial resource indexes
Raises:
ValueError: If configuration is invalid
ConnectionError: If unable to connect to the service
"""
pass
@abstractmethod
async def list_resources(self) -> List[Resource]:
"""List all available resources from this document source.
Returns:
List of MCP Resource objects representing available documents
Raises:
RuntimeError: If adapter is not initialized
ConnectionError: If unable to fetch resources
"""
pass
@abstractmethod
async def get_content(self, resource_path: str) -> DocumentContent:
"""Get content for a specific resource.
Args:
resource_path: Path/identifier for the resource
Returns:
DocumentContent object with the resource content
Raises:
ValueError: If resource_path is invalid
FileNotFoundError: If resource doesn't exist
ConnectionError: If unable to fetch content
"""
pass
@abstractmethod
async def search(self, query: str, limit: int = 10) -> List[SearchResult]:
"""Search for content within this document source.
Args:
query: Search query string
limit: Maximum number of results to return
Returns:
List of SearchResult objects ordered by relevance
Raises:
ValueError: If query is empty or invalid
ConnectionError: If unable to perform search
"""
pass
@abstractmethod
async def get_structure(self) -> str:
"""Get the hierarchical structure of the documentation.
Returns:
String representation of the document structure (e.g., markdown outline)
Raises:
ConnectionError: If unable to fetch structure
"""
pass
async def health_check(self) -> bool:
"""Check if the adapter is healthy and can connect to its service.
Returns:
True if healthy, False otherwise
"""
try:
# Basic health check - try to list resources
await self.list_resources()
return True
except Exception:
return False
def get_resource_uri(self, resource_path: str) -> str:
"""Generate a standardized URI for a resource.
Args:
resource_path: Path/identifier for the resource
Returns:
Standardized URI in format: anydocs://{adapter_name}/{resource_path}
"""
return f"anydocs://{self.name}/{resource_path}"
def _ensure_initialized(self) -> None:
"""Ensure the adapter is initialized before operations.
Raises:
RuntimeError: If adapter is not initialized
"""
if not self._initialized:
raise RuntimeError(f"Adapter {self.name} is not initialized. Call initialize() first.")
def _validate_config(self, required_keys: List[str]) -> None:
"""Validate that required configuration keys are present.
Args:
required_keys: List of required configuration keys
Raises:
ValueError: If required keys are missing
"""
missing_keys = [key for key in required_keys if key not in self.config]
if missing_keys:
raise ValueError(f"Missing required configuration keys: {missing_keys}")
def __str__(self) -> str:
return f"{self.__class__.__name__}(name={self.name}, initialized={self._initialized})"
def __repr__(self) -> str:
return self.__str__()