Local DeepWiki MCP Server

Overview Schema Related Servers Score Discussions

html.py•16.3 KiB

"""HTML export functionality for DeepWiki documentation.""" from __future__ import annotations import argparse import asyncio import json import re import shutil import time from pathlib import Path from typing import Any, cast import markdown from local_deepwiki.cli_progress import create_progress, is_interactive from local_deepwiki.export.shared import build_breadcrumb from local_deepwiki.export.shared import extract_title as _shared_extract_title from local_deepwiki.export.shared import render_toc, render_toc_entry from local_deepwiki.export.streaming import ( ExportConfig, ExportResult, ProgressCallback, StreamingExporter, WikiPage, WikiPageIterator, ) from local_deepwiki.export.html_template import STATIC_HTML_TEMPLATE from local_deepwiki.logging import get_logger logger = get_logger(__name__) def render_markdown(content: str) -> str: """Render markdown to HTML.""" md = markdown.Markdown( extensions=[ "fenced_code", "tables", "toc", "nl2br", ] ) return md.convert(content) def fix_internal_links(html_content: str) -> str: """Convert internal .md links to .html links in rendered HTML. Args: html_content: HTML content with potential .md links. Returns: HTML content with .md links converted to .html links. """ # Match href attributes pointing to .md files (internal links only) # Excludes http://, https://, and other protocol links pattern = r'href="((?!https?://|mailto:|#)[^"]*\.md)(#[^"]*)?"' def replace_link(match: re.Match[str]) -> str: md_path = match.group(1) anchor = match.group(2) or "" html_path = md_path[:-3] + ".html" # Replace .md with .html return f'href="{html_path}{anchor}"' return re.sub(pattern, replace_link, html_content) def add_external_link_targets(html_content: str) -> str: """Add target="_blank" to external links for opening in new tab. Args: html_content: HTML content with potential external links. Returns: HTML content with external links opening in new tabs. """ # Match href attributes pointing to http:// or https:// URLs # that don't already have a target attribute pattern = r'<a\s+href="(https?://[^"]+)"(?![^>]*target=)' def add_target(match: re.Match[str]) -> str: url = match.group(1) return f'<a href="{url}" target="_blank" rel="noopener noreferrer"' return re.sub(pattern, add_target, html_content) def extract_title(md_file: Path) -> str: """Extract title from markdown file. Delegates to ``shared.extract_title``. """ return _shared_extract_title(md_file) class StreamingHtmlExporter(StreamingExporter): """Memory-efficient HTML exporter using streaming page iteration. Writes each page to disk as it's processed, avoiding loading all pages into memory at once. Suitable for large wikis. """ def __init__( self, wiki_path: Path, output_path: Path, config: ExportConfig | None = None, *, no_progress: bool = False, ): """Initialize the streaming HTML exporter. Args: wiki_path: Path to the .deepwiki directory. output_path: Output directory for HTML files. config: Export configuration. no_progress: If True, disable progress bars. """ super().__init__(wiki_path, output_path, config) self._no_progress = no_progress async def export( self, progress_callback: ProgressCallback | None = None ) -> ExportResult: """Export wiki to HTML with streaming. Args: progress_callback: Optional callback for progress updates. Returns: ExportResult with export statistics. """ start_time = time.monotonic() errors: list[str] = [] logger.info( "Starting streaming HTML export from %s to %s", self.wiki_path, self.output_path, ) # Load TOC for navigation await asyncio.to_thread(self.load_toc) # Create output directory await asyncio.to_thread(self.output_path.mkdir, parents=True, exist_ok=True) # Copy search.json search_src = self.wiki_path / "search.json" if search_src.exists(): await asyncio.to_thread( shutil.copy, search_src, self.output_path / "search.json" ) logger.debug("Copied search.json to output directory") # Get page count for progress iterator = self.get_page_iterator() total_pages = iterator.get_page_count() # Report total pages at start if progress_callback: progress_callback( 0, total_pages, f"Starting HTML export ({total_pages} pages)" ) # Export pages one at a time exported = 0 async for page in iterator: try: await asyncio.to_thread(self._export_wiki_page, page) exported += 1 if progress_callback: progress_callback(exported, total_pages, f"Exported {page.path}") # Release content from memory after writing page.release_content() except Exception as e: # noqa: BLE001 — export error boundary: one page failure must not abort entire export error_msg = f"Failed to export {page.path}: {e}" logger.warning(error_msg) errors.append(error_msg) # Report completion if progress_callback: progress_callback( exported, total_pages, f"HTML export complete ({exported} pages)" ) duration_ms = int((time.monotonic() - start_time) * 1000) logger.info( "Streaming HTML export complete: %d pages in %dms", exported, duration_ms ) return ExportResult( pages_exported=exported, output_path=self.output_path, duration_ms=duration_ms, errors=errors, ) def _export_wiki_page(self, page: WikiPage) -> None: """Export a single wiki page to HTML. Args: page: WikiPage object with content loaded on demand. """ rel_path = page.metadata.relative_path logger.debug("Exporting page: %s", rel_path) # Render markdown to HTML, fix internal links, and set external link targets html_content = render_markdown(page.content) html_content = fix_internal_links(html_content) html_content = add_external_link_targets(html_content) # Calculate depth for relative paths depth = len(rel_path.parts) - 1 root_path = "../" * depth if depth > 0 else "./" # Build TOC HTML with correct relative paths toc_html = self._render_toc(self._toc_entries, str(rel_path), root_path) # Build breadcrumb HTML breadcrumb_html = self._build_breadcrumb(rel_path, root_path) # Calculate search.json path relative to this page search_json_path = root_path + "search.json" # Render full HTML html = STATIC_HTML_TEMPLATE.format( title=page.title, toc_html=toc_html, breadcrumb_html=breadcrumb_html, content_html=html_content, search_json_path=search_json_path, root_path=root_path, ) # Write output file output_file = self.output_path / rel_path.with_suffix(".html") output_file.parent.mkdir(parents=True, exist_ok=True) output_file.write_text(html) @staticmethod def _render_toc( entries: list[dict[str, Any]], current_path: str, root_path: str ) -> str: """Render TOC entries as HTML. Delegates to shared.render_toc.""" return render_toc(entries, current_path, root_path) @staticmethod def _render_toc_entry( entry: dict[str, Any], current_path: str, root_path: str ) -> str: """Render a single TOC entry recursively. Delegates to shared.render_toc_entry.""" return render_toc_entry(entry, current_path, root_path) def _build_breadcrumb(self, rel_path: Path, root_path: str) -> str: """Build breadcrumb navigation HTML. Delegates to shared.build_breadcrumb.""" return build_breadcrumb(rel_path, root_path, self.wiki_path) class HtmlExporter: """Export wiki markdown to static HTML files. This is the synchronous wrapper class that maintains backwards compatibility. For large wikis, use StreamingHtmlExporter directly for async streaming export. """ def __init__( self, wiki_path: Path, output_path: Path, *, no_progress: bool = False, ): """Initialize the exporter. Args: wiki_path: Path to the .deepwiki directory output_path: Output directory for HTML files no_progress: If True, disable progress bars """ self.wiki_path = Path(wiki_path) self.output_path = Path(output_path) self.toc_entries: list[dict] = [] self._no_progress = no_progress def export(self) -> int: """Export all wiki pages to HTML. Returns: Number of pages exported """ logger.info( "Starting HTML export from %s to %s", self.wiki_path, self.output_path ) # Check if we should use streaming mode iterator = WikiPageIterator(self.wiki_path) use_streaming = iterator.should_use_streaming() if use_streaming: logger.info("Large wiki detected, using streaming export mode") return self._export_streaming() return self._export_standard() def _export_streaming(self) -> int: """Export using streaming mode for large wikis.""" streaming_exporter = StreamingHtmlExporter( self.wiki_path, self.output_path, no_progress=self._no_progress, ) # Run async export in event loop with create_progress(disable=self._no_progress) as progress: task_id = progress.add_task("Exporting HTML (streaming)", total=None) def progress_callback(current: int, total: int, message: str) -> None: progress.update( task_id, total=total, completed=current, description=message ) loop = asyncio.new_event_loop() try: result = loop.run_until_complete( streaming_exporter.export(progress_callback=progress_callback) ) finally: loop.close() return result.pages_exported def _export_standard(self) -> int: """Export using standard mode (loads all pages in memory).""" # Load TOC toc_path = self.wiki_path / "toc.json" if toc_path.exists(): toc_data = json.loads(toc_path.read_text()) self.toc_entries = toc_data.get("entries", []) logger.debug("Loaded %s TOC entries", len(self.toc_entries)) # Create output directory self.output_path.mkdir(parents=True, exist_ok=True) # Copy search.json search_src = self.wiki_path / "search.json" if search_src.exists(): shutil.copy(search_src, self.output_path / "search.json") logger.debug("Copied search.json to output directory") # Find all markdown files md_files = list(self.wiki_path.rglob("*.md")) # Export with progress bar exported = 0 with create_progress(disable=self._no_progress) as progress: task = progress.add_task("Exporting HTML", total=len(md_files)) for md_file in md_files: rel_path = md_file.relative_to(self.wiki_path) progress.update(task, description=f"Exporting {rel_path.name}") self._export_page(md_file, rel_path) exported += 1 progress.update(task, advance=1) logger.info("Exported %s pages to HTML", exported) return exported def _export_page(self, md_file: Path, rel_path: Path) -> None: """Export a single markdown page to HTML. Args: md_file: Path to the markdown file rel_path: Relative path from wiki root """ logger.debug("Exporting page: %s", rel_path) # Read and convert markdown, fix internal links, set external link targets content = md_file.read_text() html_content = render_markdown(content) html_content = fix_internal_links(html_content) html_content = add_external_link_targets(html_content) title = extract_title(md_file) # Calculate depth for relative paths depth = len(rel_path.parts) - 1 root_path = "../" * depth if depth > 0 else "./" # Build TOC HTML with correct relative paths toc_html = self._render_toc(self.toc_entries, str(rel_path), root_path) # Build breadcrumb HTML breadcrumb_html = self._build_breadcrumb(rel_path, root_path) # Calculate search.json path relative to this page search_json_path = root_path + "search.json" # Render full HTML html = STATIC_HTML_TEMPLATE.format( title=title, toc_html=toc_html, breadcrumb_html=breadcrumb_html, content_html=html_content, search_json_path=search_json_path, root_path=root_path, ) # Write output file output_file = self.output_path / rel_path.with_suffix(".html") output_file.parent.mkdir(parents=True, exist_ok=True) output_file.write_text(html) @staticmethod def _render_toc(entries: list[dict], current_path: str, root_path: str) -> str: """Render TOC entries as HTML. Delegates to shared.render_toc.""" return render_toc(entries, current_path, root_path) @staticmethod def _render_toc_entry(entry: dict, current_path: str, root_path: str) -> str: """Render a single TOC entry recursively. Delegates to shared.render_toc_entry.""" return render_toc_entry(entry, current_path, root_path) def _build_breadcrumb(self, rel_path: Path, root_path: str) -> str: """Build breadcrumb navigation HTML. Delegates to shared.build_breadcrumb.""" return build_breadcrumb(rel_path, root_path, self.wiki_path) def export_to_html( wiki_path: str | Path, output_path: str | Path | None = None, *, no_progress: bool = False, ) -> str: """Export wiki to static HTML files. Args: wiki_path: Path to the .deepwiki directory output_path: Output directory (default: {wiki_path}_html) no_progress: If True, disable progress bars Returns: Path to the output directory """ wiki_path = Path(wiki_path) if output_path is None: output_path = wiki_path.parent / f"{wiki_path.name}_html" else: output_path = Path(output_path) logger.info("Exporting wiki from %s to %s", wiki_path, output_path) exporter = HtmlExporter(wiki_path, output_path, no_progress=no_progress) count = exporter.export() logger.info("HTML export complete: %s pages", count) return f"Exported {count} pages to {output_path}" def main() -> int: """CLI entry point for HTML export.""" parser = argparse.ArgumentParser( description="Export DeepWiki documentation to static HTML" ) parser.add_argument( "wiki_path", nargs="?", default=".deepwiki", help="Path to the .deepwiki directory (default: .deepwiki)", ) parser.add_argument( "--output", "-o", help="Output directory (default: {wiki_path}_html)" ) parser.add_argument( "--no-progress", action="store_true", help="Disable progress bars (for non-interactive use)", ) args = parser.parse_args() wiki_path = Path(args.wiki_path).resolve() if not wiki_path.exists(): print(f"Error: Wiki path does not exist: {wiki_path}") return 1 output_path = Path(args.output).resolve() if args.output else None result = export_to_html(wiki_path, output_path, no_progress=args.no_progress) print(result) # Print location hint actual_output = output_path or (wiki_path.parent / f"{wiki_path.name}_html") print(f"\nOpen {actual_output}/index.html in a browser to view the documentation.") return 0 if __name__ == "__main__": exit(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/UrbanDiver/local-deepwiki-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

html.py•16.3 KiB