"""Shared export utilities for HTML and PDF exporters.
Functions extracted from html.py and pdf.py to eliminate duplication
of TOC rendering, breadcrumb building, and title extraction logic.
"""
from pathlib import Path
from typing import Any
from local_deepwiki.logging import get_logger
logger = get_logger(__name__)
def extract_title(md_file: Path) -> str:
"""Extract title from a markdown file.
Reads the file looking for the first ``# heading`` or ``**bold**`` line.
Falls back to a title derived from the filename.
Args:
md_file: Path to the markdown file.
Returns:
Extracted title string.
"""
try:
content = md_file.read_text()
for line in content.split("\n"):
line = line.strip()
if line.startswith("# "):
return line[2:].strip()
if line.startswith("**") and line.endswith("**"):
return line[2:-2].strip()
except (OSError, UnicodeDecodeError) as e:
# OSError: File access issues
# UnicodeDecodeError: File encoding issues
logger.debug(f"Could not extract title from {md_file}: {e}")
return md_file.stem.replace("_", " ").replace("-", " ").title()
def render_toc_entry(entry: dict[str, Any], current_path: str, root_path: str) -> str:
"""Render a single TOC entry recursively as HTML.
Args:
entry: TOC entry dict with number, title, path, children.
current_path: Current page path for highlighting the active link.
root_path: Relative path to root (e.g. ``"../"``).
Returns:
HTML string for this entry and its children.
"""
has_children = bool(entry.get("children"))
parent_class = "toc-parent" if has_children else ""
html = f'<div class="toc-item {parent_class}">'
if entry.get("path"):
# Convert .md to .html for static export
html_path = entry["path"].replace(".md", ".html")
active = "active" if entry["path"] == current_path else ""
html += f"""<a href="{root_path}{html_path}" class="{active}">
<span class="toc-number">{entry.get("number", "")}</span>
<span>{entry.get("title", "")}</span>
</a>"""
else:
# No link, just a grouping label
html += f"""<span class="toc-parent">
<span class="toc-number">{entry.get("number", "")}</span>
<span>{entry.get("title", "")}</span>
</span>"""
if has_children:
html += '<div class="toc-nested">'
for child in entry["children"]:
html += render_toc_entry(child, current_path, root_path)
html += "</div>"
html += "</div>"
return html
def render_toc(entries: list[dict[str, Any]], current_path: str, root_path: str) -> str:
"""Render a list of TOC entries as HTML.
Args:
entries: List of TOC entry dicts.
current_path: Current page path for highlighting the active link.
root_path: Relative path to root (e.g. ``"../"``).
Returns:
Combined HTML string for all entries.
"""
html_parts = []
for entry in entries:
html_parts.append(render_toc_entry(entry, current_path, root_path))
return "\n".join(html_parts)
def build_breadcrumb(rel_path: Path, root_path: str, wiki_path: Path) -> str:
"""Build breadcrumb navigation HTML.
Args:
rel_path: Relative path of the current page within the wiki.
root_path: Relative path to root (e.g. ``"../"``).
wiki_path: Absolute path to the wiki directory (used to check
for ``index.md`` files in intermediate directories).
Returns:
HTML string for the breadcrumb, or empty string for root pages.
"""
parts = list(rel_path.parts)
# Root pages don't need breadcrumbs
if len(parts) == 1:
return ""
breadcrumb_items = []
# Always start with Home
breadcrumb_items.append(f'<a href="{root_path}index.html">Home</a>')
# Build path progressively
cumulative_path = ""
for part in parts[:-1]: # Exclude current page
if cumulative_path:
cumulative_path = f"{cumulative_path}/{part}"
else:
cumulative_path = part
# Check if there's an index.md in this folder
index_path = wiki_path / cumulative_path / "index.md"
display_name = part.replace("_", " ").replace("-", " ").title()
if index_path.exists():
link_path = f"{cumulative_path}/index.html"
breadcrumb_items.append(
f'<a href="{root_path}{link_path}">{display_name}</a>'
)
else:
breadcrumb_items.append(f"<span>{display_name}</span>")
# Add current page name
current_page = parts[-1]
if current_page.endswith(".md"):
current_page = current_page[:-3]
current_page = current_page.replace("_", " ").replace("-", " ").title()
breadcrumb_items.append(f'<span class="current">{current_page}</span>')
return (
'<div class="breadcrumb">'
+ ' <span class="separator">›</span> '.join(breadcrumb_items)
+ "</div>"
)