MCP RAG

mcp-rag
src
models

document_model.py•3.52 KiB

""" MCP 服务器文档模型 ================================== 此模块为 RAG 系统中的文档和元数据定义数据结构。 """ from datetime import datetime from typing import Dict, Any, List, Optional from dataclasses import dataclass, field @dataclass class DocumentModel: """ 表示已处理文档的数据模型。 """ # 文档基本信息 file_path: str file_name: str file_type: str file_size: int # 处理后的内容 content: str processed_content: str # 元数据 metadata: Dict[str, Any] = field(default_factory=dict) # 处理信息 processing_method: str = "unknown" processing_date: datetime = field(default_factory=datetime.now) # 结构信息 structural_elements: List[Any] = field(default_factory=list) total_elements: int = 0 titles_count: int = 0 tables_count: int = 0 lists_count: int = 0 # 分块信息 chunks: List[str] = field(default_factory=list) chunk_count: int = 0 def __post_init__(self): """数据类构造后初始化。""" if not self.file_name: self.file_name = self.file_path.split('/')[-1] if '/' in self.file_path else self.file_path.split('\\')[-1] if not self.file_type: self.file_type = self.file_name.split('.')[-1].lower() if '.' in self.file_name else "unknown" def to_dict(self) -> Dict[str, Any]: """ Convierte el modelo a un diccionario. Returns: Diccionario con todos los datos del documento """ return { 'file_path': self.file_path, 'file_name': self.file_name, 'file_type': self.file_type, 'file_size': self.file_size, 'content': self.content, 'processed_content': self.processed_content, 'metadata': self.metadata, 'processing_method': self.processing_method, 'processing_date': self.processing_date.isoformat(), 'structural_elements': len(self.structural_elements), 'total_elements': self.total_elements, 'titles_count': self.titles_count, 'tables_count': self.tables_count, 'lists_count': self.lists_count, 'chunk_count': self.chunk_count } @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'DocumentModel': """ Crea un DocumentModel desde un diccionario. Args: data: Diccionario con los datos del documento Returns: Instancia de DocumentModel """ # Convertir la fecha de string a datetime if 'processing_date' in data and isinstance(data['processing_date'], str): data['processing_date'] = datetime.fromisoformat(data['processing_date']) return cls(**data) def get_summary(self) -> str: """ Obtiene un resumen del documento. Returns: Resumen del documento """ return f"Documento: {self.file_name} ({self.file_type.upper()}) - {len(self.processed_content)} caracteres - {self.chunk_count} chunks" def is_valid(self) -> bool: """ Verifica si el documento es válido. Returns: True si el documento es válido """ return ( bool(self.file_path) and bool(self.file_name) and bool(self.processed_content) and len(self.processed_content.strip()) > 0 )

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kalicyh/mcp-rag'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

document_model.py•3.52 KiB