"""Pydantic models for semantic search responses."""
from typing import List, Optional
from pydantic import BaseModel, Field
from .base import BaseResponse
class SemanticSearchResult(BaseModel):
"""Model for semantic search results with additional metadata."""
id: int = Field(description="Document ID (int for all document types)")
doc_type: str = Field(
description="Document type (note, calendar_event, deck_card, etc.)"
)
title: str = Field(description="Document title")
category: str = Field(
default="", description="Document category (notes) or location (calendar)"
)
excerpt: str = Field(description="Excerpt from matching chunk")
score: float = Field(
description=(
"Relevance score (≥ 0.0, higher is better). "
"Score range depends on fusion method: "
"RRF produces scores in [0.0, 1.0], "
"DBSF can exceed 1.0 (sum of normalized scores from multiple systems)"
)
)
chunk_index: int = Field(description="Index of matching chunk in document")
total_chunks: int = Field(description="Total number of chunks in document")
chunk_start_offset: Optional[int] = Field(
default=None, description="Character position where chunk starts in document"
)
chunk_end_offset: Optional[int] = Field(
default=None, description="Character position where chunk ends in document"
)
page_number: Optional[int] = Field(
default=None, description="Page number for PDF documents"
)
page_count: Optional[int] = Field(
default=None, description="Total number of pages in PDF document"
)
# Context expansion fields (optional, populated when include_context=True)
has_context_expansion: bool = Field(
default=False, description="Whether context expansion was performed"
)
marked_text: Optional[str] = Field(
default=None,
description="Full text with position markers around matched chunk",
)
before_context: Optional[str] = Field(
default=None, description="Text before the matched chunk"
)
after_context: Optional[str] = Field(
default=None, description="Text after the matched chunk"
)
has_before_truncation: Optional[bool] = Field(
default=None, description="Whether before_context was truncated"
)
has_after_truncation: Optional[bool] = Field(
default=None, description="Whether after_context was truncated"
)
class SemanticSearchResponse(BaseResponse):
"""Response model for semantic search across all indexed Nextcloud apps."""
results: List[SemanticSearchResult] = Field(
description="Semantic search results with similarity scores"
)
query: str = Field(description="The search query used")
total_found: int = Field(description="Total number of documents found")
search_method: str = Field(
default="semantic", description="Search method used (semantic or hybrid)"
)
class SamplingSearchResponse(BaseResponse):
"""Response from semantic search with LLM-generated answer via MCP sampling.
This response includes both a generated natural language answer (created by
the MCP client's LLM via sampling) and the source documents used to generate
that answer. Users can read the answer for quick information and review
sources for verification and deeper exploration.
Attributes:
query: The original user query
generated_answer: Natural language answer generated by client's LLM
sources: List of semantic search results used as context
total_found: Total number of matching documents found
search_method: Always "semantic_sampling" for this response type
model_used: Name of model that generated the answer (e.g., "claude-3-5-sonnet")
stop_reason: Why generation stopped ("endTurn", "maxTokens", etc.)
"""
query: str = Field(..., description="Original user query")
generated_answer: str = Field(
..., description="LLM-generated answer based on retrieved documents"
)
sources: List[SemanticSearchResult] = Field(
default_factory=list,
description="Source documents with excerpts and relevance scores",
)
total_found: int = Field(..., description="Total matching documents")
search_method: str = Field(
default="semantic_sampling", description="Search method used"
)
model_used: Optional[str] = Field(
default=None, description="Model that generated the answer"
)
stop_reason: Optional[str] = Field(
default=None, description="Reason generation stopped"
)
class VectorSyncStatusResponse(BaseResponse):
"""Response for vector sync status.
Provides information about the current state of vector sync,
including how many documents are indexed and how many are pending.
Attributes:
indexed_count: Number of documents in Qdrant vector database
pending_count: Number of documents in processing queue
status: Current sync status ("idle" or "syncing")
enabled: Whether vector sync is enabled
"""
indexed_count: int = Field(
default=0, description="Number of documents indexed in vector database"
)
pending_count: int = Field(
default=0, description="Number of documents pending processing"
)
status: str = Field(
default="disabled",
description='Sync status: "idle", "syncing", or "disabled"',
)
enabled: bool = Field(default=False, description="Whether vector sync is enabled")
__all__ = [
"SemanticSearchResult",
"SemanticSearchResponse",
"SamplingSearchResponse",
"VectorSyncStatusResponse",
]