Memex

MIT License
Overview InspectNew Schema Related Servers Reviews Score
mcp-memex
src
mcp_memex
import re
from dataclasses import dataclass
from typing import List, Optional

@dataclass
class MarkdownSection:
    path: str
    index: int
    level: int  # Header level (1-6)
    header: str  # The header text
    content: str  # Full content including header
    parent_index: Optional[int]  # Index of parent section
    child_indices: List[int]  # Indices of child sections

def split_markdown_sections(markdown_text: str, path: str) -> List[MarkdownSection]:
    """
    Split markdown text into sections based on headers while maintaining hierarchy.
    Returns a list of MarkdownSection objects.
    
    Example:
    # Header 1
    Content 1
    ## Subheader 1
    Content 2
    # Header 2
    Content 3
    
    Will create 3 sections with appropriate parent-child relationships.
    """
    # Regex to match markdown headers (# Header)
    header_pattern = re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE)
    
    # Find all headers with their positions
    headers = [(match.group(1), match.group(2), match.start()) 
              for match in header_pattern.finditer(markdown_text)]
    
    if not headers:
        # If no headers found, treat entire text as one section
        return [MarkdownSection(
            path=path,
            index=0,
            level=0,
            header="",
            content=markdown_text,
            parent_index=None,
            child_indices=[],
        )]
    
    # Create sections
    sections = []
    section_stack = []
    
    for i, (hashes, header_text, start_pos) in enumerate(headers):
        level = len(hashes)
        # Get section content (from this header to next header or end)
        end_pos = headers[i + 1][2] if i < len(headers) - 1 else len(markdown_text)
        content = markdown_text[start_pos:end_pos].strip()
        
        # Create new section
        section = MarkdownSection(
            path=path,
            index=i,
            level=level,
            header=header_text.strip(),
            content=content,
            parent_index=None,
            child_indices=[],
        )
        
        # Update parent-child relationships
        while section_stack and section_stack[-1].level >= level:
            section_stack.pop()
            
        if section_stack:
            parent = section_stack[-1]
            section.parent_index = parent.index
            parent.child_indices.append(section.index)
            
        section_stack.append(section)
        sections.append(section)
    
    return sections