Skip to main content
Glama

MCP RAG

by kalicyh
document_model.py3.61 kB
""" MCP 服务器文档模型 ================================== 此模块为 RAG 系统中的文档和元数据定义数据结构。 """ from datetime import datetime from typing import Dict, Any, List, Optional from dataclasses import dataclass, field @dataclass class DocumentModel: """ 表示已处理文档的数据模型。 """ # 文档基本信息 file_path: str file_name: str file_type: str file_size: int # 处理后的内容 content: str processed_content: str # 元数据 metadata: Dict[str, Any] = field(default_factory=dict) # 处理信息 processing_method: str = "unknown" processing_date: datetime = field(default_factory=datetime.now) # 结构信息 structural_elements: List[Any] = field(default_factory=list) total_elements: int = 0 titles_count: int = 0 tables_count: int = 0 lists_count: int = 0 # 分块信息 chunks: List[str] = field(default_factory=list) chunk_count: int = 0 def __post_init__(self): """数据类构造后初始化。""" if not self.file_name: self.file_name = self.file_path.split('/')[-1] if '/' in self.file_path else self.file_path.split('\\')[-1] if not self.file_type: self.file_type = self.file_name.split('.')[-1].lower() if '.' in self.file_name else "unknown" def to_dict(self) -> Dict[str, Any]: """ Convierte el modelo a un diccionario. Returns: Diccionario con todos los datos del documento """ return { 'file_path': self.file_path, 'file_name': self.file_name, 'file_type': self.file_type, 'file_size': self.file_size, 'content': self.content, 'processed_content': self.processed_content, 'metadata': self.metadata, 'processing_method': self.processing_method, 'processing_date': self.processing_date.isoformat(), 'structural_elements': len(self.structural_elements), 'total_elements': self.total_elements, 'titles_count': self.titles_count, 'tables_count': self.tables_count, 'lists_count': self.lists_count, 'chunk_count': self.chunk_count } @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'DocumentModel': """ Crea un DocumentModel desde un diccionario. Args: data: Diccionario con los datos del documento Returns: Instancia de DocumentModel """ # Convertir la fecha de string a datetime if 'processing_date' in data and isinstance(data['processing_date'], str): data['processing_date'] = datetime.fromisoformat(data['processing_date']) return cls(**data) def get_summary(self) -> str: """ Obtiene un resumen del documento. Returns: Resumen del documento """ return f"Documento: {self.file_name} ({self.file_type.upper()}) - {len(self.processed_content)} caracteres - {self.chunk_count} chunks" def is_valid(self) -> bool: """ Verifica si el documento es válido. Returns: True si el documento es válido """ return ( bool(self.file_path) and bool(self.file_name) and bool(self.processed_content) and len(self.processed_content.strip()) > 0 )

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kalicyh/mcp-rag'

If you have feedback or need assistance with the MCP directory API, please join our Discord server