MCP AI Service Platform

rag_models.py•7.01 KiB

"""RAG相关数据模型""" from datetime import datetime from typing import Optional, List, Dict, Any from sqlalchemy import ( Column, Integer, String, Text, DateTime, JSON, ForeignKey, Boolean, Float, ARRAY, UniqueConstraint ) from sqlalchemy.orm import relationship from sqlalchemy.dialects.postgresql import UUID from pgvector.sqlalchemy import Vector import uuid from src.core.database import Base class Document(Base): """文档表""" __tablename__ = "documents" id = Column(Integer, primary_key=True, autoincrement=True) title = Column(String(255), nullable=False, index=True) content = Column(Text, nullable=False) metadata = Column(JSON, default={}) file_type = Column(String(50), index=True) file_size = Column(Integer) file_hash = Column(String(64), unique=True, index=True) source_url = Column(Text) # 时间戳 created_at = Column(DateTime, default=datetime.utcnow, index=True) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) indexed_at = Column(DateTime, nullable=True) # 关系 chunks = relationship("DocumentChunk", back_populates="document", cascade="all, delete-orphan") def __repr__(self): return f"<Document(id={self.id}, title='{self.title}', file_type='{self.file_type}')>" def to_dict(self) -> Dict[str, Any]: """转换为字典""" return { "id": self.id, "title": self.title, "content": self.content, "metadata": self.metadata, "file_type": self.file_type, "file_size": self.file_size, "file_hash": self.file_hash, "source_url": self.source_url, "created_at": self.created_at.isoformat() if self.created_at else None, "updated_at": self.updated_at.isoformat() if self.updated_at else None, "indexed_at": self.indexed_at.isoformat() if self.indexed_at else None, "chunk_count": len(self.chunks) if self.chunks else 0, } class DocumentChunk(Base): """文档块表""" __tablename__ = "document_chunks" id = Column(Integer, primary_key=True, autoincrement=True) document_id = Column(Integer, ForeignKey("documents.id", ondelete="CASCADE"), nullable=False, index=True) chunk_index = Column(Integer, nullable=False) content = Column(Text, nullable=False) metadata = Column(JSON, default={}) # 向量嵌入（1536维度，适用于OpenAI text-embedding-ada-002） embedding = Column(Vector(1536), nullable=True) created_at = Column(DateTime, default=datetime.utcnow) # 关系 document = relationship("Document", back_populates="chunks") # 唯一约束：每个文档的块索引唯一 __table_args__ = ( UniqueConstraint('document_id', 'chunk_index', name='uq_document_chunk'), ) def __repr__(self): return f"<DocumentChunk(id={self.id}, document_id={self.document_id}, chunk_index={self.chunk_index})>" def to_dict(self) -> Dict[str, Any]: """转换为字典""" return { "id": self.id, "document_id": self.document_id, "chunk_index": self.chunk_index, "content": self.content, "metadata": self.metadata, "has_embedding": self.embedding is not None, "created_at": self.created_at.isoformat() if self.created_at else None, } class QueryHistory(Base): """查询历史表""" __tablename__ = "query_history" id = Column(Integer, primary_key=True, autoincrement=True) query_text = Column(Text, nullable=False) query_embedding = Column(Vector(1536), nullable=True) user_id = Column(String(255), index=True) session_id = Column(String(255), index=True) # 查询结果 results = Column(JSON, default={}) matched_chunks = Column(ARRAY(Integer), default=[]) similarity_scores = Column(ARRAY(Float), default=[]) # 性能指标 execution_time_ms = Column(Integer) total_chunks_searched = Column(Integer) created_at = Column(DateTime, default=datetime.utcnow, index=True) def __repr__(self): return f"<QueryHistory(id={self.id}, user_id='{self.user_id}', query_length={len(self.query_text)})>" def to_dict(self) -> Dict[str, Any]: """转换为字典""" return { "id": self.id, "query_text": self.query_text, "user_id": self.user_id, "session_id": self.session_id, "results": self.results, "matched_chunks": self.matched_chunks, "similarity_scores": self.similarity_scores, "execution_time_ms": self.execution_time_ms, "total_chunks_searched": self.total_chunks_searched, "created_at": self.created_at.isoformat() if self.created_at else None, } # Pydantic模型（用于API） from pydantic import BaseModel, Field from typing import Union class DocumentCreate(BaseModel): """创建文档的请求模型""" title: str = Field(..., min_length=1, max_length=255) content: str = Field(..., min_length=1) metadata: Dict[str, Any] = Field(default_factory=dict) file_type: Optional[str] = None source_url: Optional[str] = None class DocumentUpdate(BaseModel): """更新文档的请求模型""" title: Optional[str] = Field(None, min_length=1, max_length=255) content: Optional[str] = Field(None, min_length=1) metadata: Optional[Dict[str, Any]] = None source_url: Optional[str] = None class DocumentResponse(BaseModel): """文档响应模型""" id: int title: str content: str metadata: Dict[str, Any] file_type: Optional[str] file_size: Optional[int] file_hash: Optional[str] source_url: Optional[str] created_at: Optional[str] updated_at: Optional[str] indexed_at: Optional[str] chunk_count: int = 0 class Config: from_attributes = True class DocumentChunkResponse(BaseModel): """文档块响应模型""" id: int document_id: int chunk_index: int content: str metadata: Dict[str, Any] has_embedding: bool created_at: Optional[str] class Config: from_attributes = True class QueryRequest(BaseModel): """查询请求模型""" query: str = Field(..., min_length=1) user_id: Optional[str] = None session_id: Optional[str] = None similarity_threshold: Optional[float] = Field(None, ge=0.0, le=1.0) max_results: Optional[int] = Field(None, ge=1, le=100) search_type: str = Field(default="semantic", regex="^(semantic|fulltext|hybrid)$") class SearchResult(BaseModel): """搜索结果模型""" chunk_id: int document_id: int document_title: str content: str similarity_score: float metadata: Dict[str, Any] = Field(default_factory=dict) class QueryResponse(BaseModel): """查询响应模型""" query: str results: List[SearchResult] total_results: int execution_time_ms: int search_type: str request_id: Optional[str] = None

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dkb12138ggg/python-rag-mcp-client'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

rag_models.py•7.01 KiB