YouTube Knowledge Base MCP

models.py•4.18 KiB

""" Pydantic models for the knowledge base entities. """ from datetime import datetime from typing import Optional, List, Dict from pydantic import BaseModel, Field import uuid class Source(BaseModel): """A source of knowledge (YouTube video, article, etc.).""" id: str = Field(default_factory=lambda: str(uuid.uuid4())) source_type: str = "youtube" # "youtube", "article", "podcast", etc. source_id: str # video_id, url hash, etc. # Core metadata title: str channel: Optional[str] = None description: Optional[str] = None url: Optional[str] = None # Extended metadata (flexible JSON for source-type-specific data) # Example for YouTube: {"duration": 3600, "view_count": 1000000, "upload_date": "2024-01-15"} metadata: dict = Field(default_factory=dict) # Organization tags: List[str] = Field(default_factory=list) collections: List[str] = Field(default_factory=list) # User additions user_summary: Optional[str] = None # Processing state is_processed: bool = False chunk_count: int = 0 embedding_model: Optional[str] = None # Timestamps - use datetime objects for PyArrow compatibility created_at: datetime = Field(default_factory=datetime.utcnow) updated_at: datetime = Field(default_factory=datetime.utcnow) class Chunk(BaseModel): """A semantic chunk of content from a source.""" id: str = Field(default_factory=lambda: str(uuid.uuid4())) source_id: str # Foreign key to Source.source_id # Content content: str chunk_index: int # Vector (populated during embedding) - excluded from JSON to reduce response size vector: Optional[List[float]] = Field(default=None, exclude=True) # Timestamps (for time-based media) timestamp_start: Optional[float] = None # seconds timestamp_end: Optional[float] = None # Denormalized source metadata (for efficient filtered search) # NOTE: source_title removed - fetched from Source in search enrichment # NOTE: collections removed - dead code, not used in MCP source_type: str = "youtube" source_channel: Optional[str] = None tags: List[str] = Field(default_factory=list) # Contextual retrieval context: Optional[str] = None # LLM-generated context for this chunk context_model: Optional[str] = None # Model used for context generation # Future SOTA RAG fields parent_id: Optional[str] = None # Small-to-Big Retrieval: link to parent chunk speakers: List[str] = Field(default_factory=list) # Speaker diarization for filtering chapter_index: Optional[int] = None # Chapter number for long videos # Timestamps - use datetime objects for PyArrow compatibility created_at: datetime = Field(default_factory=datetime.utcnow) class SearchResult(BaseModel): """A search result with relevance scoring.""" chunk: Chunk score: float # Raw similarity score recency_weight: float = 1.0 final_score: float = 0.0 # score * recency_weight # Source info (denormalized for display) source_title: str source_url: Optional[str] = None timestamp_link: Optional[str] = None # YouTube link with timestamp # === MCP Tool Result Models === class ProcessResult(BaseModel): """Result of processing a source.""" success: bool source_id: Optional[str] = None title: Optional[str] = None chunk_count: int = 0 error: Optional[str] = None class SearchResults(BaseModel): """Search results container.""" query: str total_results: int results: List[SearchResult] search_time_ms: float = 0.0 class OperationResult(BaseModel): """Generic operation result.""" success: bool message: str affected_count: int = 0 class StatsResult(BaseModel): """Knowledge base statistics.""" total_sources: int total_chunks: int sources_by_type: Dict[str, int] unique_tags: int unique_collections: int embedding_model: str class LibraryStats(BaseModel): """Library statistics for explore_library(view='stats').""" total_sources: int total_chunks: int sources_by_type: Dict[str, int] unique_tags: int tags: List[str] embedding_model: str data_path: str # Enables LLM to guide users on data location

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Miandari/youtube-knowledge-base-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

models.py•4.18 KiB