MCP Memory Service

Apache 2.0

774

Overview InspectNew Endpoints Schema Related Servers Reviews Score

base.py•5.5 kB

# Copyright 2024 Heinrich Krupp # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Base classes and interfaces for document ingestion. """ import logging from abc import ABC, abstractmethod from dataclasses import dataclass from pathlib import Path from typing import List, Dict, Any, Optional, AsyncGenerator from datetime import datetime logger = logging.getLogger(__name__) @dataclass class DocumentChunk: """ Represents a chunk of text extracted from a document. Attributes: content: The extracted text content metadata: Additional metadata about the chunk chunk_index: Position of this chunk in the document source_file: Original file path """ content: str metadata: Dict[str, Any] chunk_index: int source_file: Path def __post_init__(self): """Add default metadata after initialization.""" if 'source' not in self.metadata: self.metadata['source'] = str(self.source_file) if 'chunk_index' not in self.metadata: self.metadata['chunk_index'] = self.chunk_index if 'extracted_at' not in self.metadata: self.metadata['extracted_at'] = datetime.now().isoformat() @dataclass class IngestionResult: """ Result of document ingestion operation. Attributes: success: Whether ingestion was successful chunks_processed: Number of chunks created chunks_stored: Number of chunks successfully stored errors: List of error messages encountered source_file: Original file that was processed processing_time: Time taken to process in seconds """ success: bool chunks_processed: int chunks_stored: int errors: List[str] source_file: Path processing_time: float @property def success_rate(self) -> float: """Calculate success rate as percentage.""" if self.chunks_processed == 0: return 0.0 return (self.chunks_stored / self.chunks_processed) * 100 class DocumentLoader(ABC): """ Abstract base class for document loaders. Each document format (PDF, text, etc.) should implement this interface to provide consistent document processing capabilities. """ def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200): """ Initialize document loader. Args: chunk_size: Target size for text chunks in characters chunk_overlap: Number of characters to overlap between chunks """ self.chunk_size = chunk_size self.chunk_overlap = chunk_overlap self.supported_extensions: List[str] = [] @abstractmethod def can_handle(self, file_path: Path) -> bool: """ Check if this loader can handle the given file. Args: file_path: Path to the file to check Returns: True if this loader can process the file """ pass @abstractmethod async def extract_chunks(self, file_path: Path, **kwargs) -> AsyncGenerator[DocumentChunk, None]: """ Extract text chunks from a document. Args: file_path: Path to the document file **kwargs: Additional options specific to the loader Yields: DocumentChunk objects containing extracted text and metadata Raises: FileNotFoundError: If the file doesn't exist ValueError: If the file format is not supported Exception: Other processing errors """ pass async def validate_file(self, file_path: Path) -> None: """ Validate that a file can be processed. Args: file_path: Path to validate Raises: FileNotFoundError: If file doesn't exist ValueError: If file is not supported or invalid """ if not file_path.exists(): raise FileNotFoundError(f"File not found: {file_path}") if not file_path.is_file(): raise ValueError(f"Path is not a file: {file_path}") if not self.can_handle(file_path): raise ValueError(f"File format not supported: {file_path.suffix}") def get_base_metadata(self, file_path: Path) -> Dict[str, Any]: """ Get base metadata common to all document types. Args: file_path: Path to the file Returns: Dictionary of base metadata """ stat = file_path.stat() return { 'source_file': str(file_path), 'file_name': file_path.name, 'file_extension': file_path.suffix.lower(), 'file_size': stat.st_size, 'modified_time': datetime.fromtimestamp(stat.st_mtime).isoformat(), 'loader_type': self.__class__.__name__ }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/doobidoo/mcp-memory-service'

If you have feedback or need assistance with the MCP directory API, please join our Discord server