Skip to main content
Glama
omniwaifu

Pydantic AI Documentation Server

by omniwaifu
parser.py4.59 kB
import re from pathlib import Path from typing import List, Optional, Dict from .models import ParsedDocument from .repository_manager import get_repo_path import logging logger = logging.getLogger(__name__) def get_pydantic_docs_path() -> Path: """Returns the absolute path to the 'docs' directory within the cloned Pydantic repository.""" return get_repo_path() / "docs" def extract_title_from_content(content: str, file_path: Path) -> str: """ Extracts the title from Markdown content. Looks for the first H1 header. If not found, uses the filename (stem) as a fallback, formatted to be more readable. """ heading_match = re.search(r"^\s*#\s+([^\n]+)", content, re.MULTILINE) if heading_match: return heading_match.group(1).strip() return file_path.stem.replace("-", " ").replace("_", " ").title() def create_document_id(relative_path_str: str) -> str: """ Creates a unique and clean ID for a document based on its relative path string. Example: 'usage/models.md' -> 'usage-models' """ if relative_path_str.lower().endswith(".md"): clean_path = relative_path_str[:-3] else: clean_path = relative_path_str clean_path = clean_path.replace("/", "-") clean_path = re.sub(r"[^a-zA-Z0-9-]", "", clean_path) clean_path = clean_path.lower() clean_path = re.sub(r"-+", "-", clean_path).strip("-") return clean_path if clean_path else "root" def parse_markdown_file( file_path: Path, docs_base_dir: Path ) -> Optional[ParsedDocument]: """ Parses a single Markdown file into a ParsedDocument object. Args: file_path: Absolute path to the Markdown file. docs_base_dir: Absolute path to the root of the documentation directory (e.g., .../pydantic_repo/docs). Used to determine the relative path for the document. Returns: A ParsedDocument object if successful, None otherwise. """ try: if not file_path.is_file(): logger.warning(f"Path is not a file, skipping: {file_path}") return None with open(file_path, "r", encoding="utf-8") as f: content = f.read() relative_path = file_path.relative_to(docs_base_dir) relative_path_str = relative_path.as_posix() title = extract_title_from_content(content, file_path) doc_id = create_document_id(relative_path_str) return ParsedDocument( id=doc_id, path=relative_path_str, title=title, content=content ) except Exception as e: logger.error(f"Error parsing Markdown file {file_path}: {e}", exc_info=True) return None def find_all_markdown_files(start_dir: Path) -> List[Path]: """ Recursively finds all Markdown (.md) files in the given directory. """ markdown_files = [] if not start_dir.is_dir(): logger.warning(f"Cannot find Markdown files: {start_dir} is not a directory.") return markdown_files for path_object in start_dir.rglob("*.md"): if path_object.is_file(): markdown_files.append(path_object) return markdown_files def parse_all_documents() -> Dict[str, ParsedDocument]: """ Parses all Markdown documents from the Pydantic documentation directory. Skips files if they cannot be parsed. Returns: A dictionary mapping document IDs to ParsedDocument objects. """ documents: Dict[str, ParsedDocument] = {} pydantic_docs_dir = get_pydantic_docs_path() if not pydantic_docs_dir.exists() or not pydantic_docs_dir.is_dir(): logger.error( f"Pydantic documentation directory not found or is not a directory: {pydantic_docs_dir}" ) return documents logger.info(f"Starting parsing of Markdown files from: {pydantic_docs_dir}") markdown_files = find_all_markdown_files(pydantic_docs_dir) parsed_count = 0 failed_count = 0 for md_file_path in markdown_files: doc = parse_markdown_file(md_file_path, pydantic_docs_dir) if doc: if doc.id in documents: logger.warning( f"Duplicate document ID '{doc.id}' generated for paths: " f"'{documents[doc.id].path}' and '{doc.path}'. Overwriting." ) documents[doc.id] = doc parsed_count += 1 else: failed_count += 1 logger.info( f"Finished parsing. Successfully parsed: {parsed_count} documents. Failed to parse: {failed_count} files." ) return documents

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/omniwaifu/pydantic-ai-docs-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server