Skip to main content
Glama
models.py5.91 kB
"""Data models for document representation based on DOM-like tree structure.""" from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, List, Optional @dataclass class DocumentNode: """Document node - similar to DOM node structure. Represents a hierarchical element in a document tree structure, supporting various content types like headings, paragraphs, lists, etc. """ type: str # node type: heading, paragraph, list, code, etc. level: Optional[int] = None # hierarchy level for headings id: str = "" title: str = "" # display title for headings content: str = "" # actual text content attributes: Dict[str, Any] = field(default_factory=dict) # additional metadata children: List["DocumentNode"] = field(default_factory=list) # child nodes parent: Optional["DocumentNode"] = None # parent node reference def __post_init__(self) -> None: """Initialize default values after dataclass creation.""" if self.attributes is None: self.attributes = {} if self.children is None: self.children = [] def add_child(self, child: "DocumentNode") -> None: """Add a child node and set parent reference.""" child.parent = self self.children.append(child) def get_ancestors(self) -> List["DocumentNode"]: """Get list of ancestor nodes from root to parent.""" ancestors = [] current = self.parent while current: ancestors.insert(0, current) current = current.parent return ancestors def get_depth(self) -> int: """Get the depth of this node in the tree.""" depth = 0 current = self.parent while current: depth += 1 current = current.parent return depth @dataclass class SearchResult: """Represents a search result within a document.""" node_id: str section: str # section title where match was found section_id: str # section node ID content: str # matching content type: str # node type line_number: Optional[int] = None context_before: str = "" context_after: str = "" @dataclass class NavigationContext: """Navigation context for a specific document node.""" current: Dict[str, str] # current node info parent: Optional[Dict[str, str]] = None # parent node info siblings: List[Dict[str, Any]] = field(default_factory=list) # sibling nodes children: List[Dict[str, str]] = field(default_factory=list) # child nodes breadcrumbs: List[Dict[str, str]] = field(default_factory=list) # ancestor path @dataclass class Document: """Represents a processed document with hierarchical structure. Uses a tree-based approach for better navigation and content organization, similar to DOM structure for web documents. """ file_path: Path title: str source_text: str # original document content source_format: str = "markdown" # document format root: Optional[DocumentNode] = None # document root node index: Dict[str, DocumentNode] = field(default_factory=dict) # node lookup index metadata: Dict[str, Any] = field(default_factory=dict) def __post_init__(self) -> None: """Initialize document structure after creation.""" if self.metadata is None: self.metadata = {} if self.root is None: self.root = DocumentNode(type="document", id="root", title=self.title) if not self.index: self.rebuild_index() def rebuild_index(self) -> None: """Rebuild the node lookup index.""" self.index.clear() if self.root: self._traverse_and_index(self.root) def _traverse_and_index(self, node: DocumentNode) -> None: """Recursively traverse and index all nodes.""" if node.id: self.index[node.id] = node for child in node.children: self._traverse_and_index(child) def get_node(self, node_id: str) -> Optional[DocumentNode]: """Get a node by ID using the index.""" return self.index.get(node_id) def get_nodes_by_type(self, node_type: str) -> List[DocumentNode]: """Get all nodes of a specific type.""" return [node for node in self.index.values() if node.type == node_type] def get_headings(self, max_level: Optional[int] = None) -> List[DocumentNode]: """Get all heading nodes, optionally filtered by maximum level.""" headings = [node for node in self.index.values() if node.type == "heading"] if max_level is not None: headings = [h for h in headings if h.level and h.level <= max_level] return sorted(headings, key=lambda h: (h.level or 0, h.id)) def get_outline(self, max_depth: int = 3) -> List[Dict[str, Any]]: """Get document outline as a structured list.""" outline = [] def build_outline(node: DocumentNode, depth: int = 0) -> None: if depth > max_depth: return if node.type == "heading" and node.level: outline.append( { "id": node.id, "title": node.title, "level": node.level, "depth": depth, "has_children": bool( [ child for child in node.children if child.type == "heading" ] ), } ) for child in node.children: build_outline(child, depth + 1 if node.type == "heading" else depth) if self.root: build_outline(self.root) return outline

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/shenyimings/DocNav-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server