Local DeepWiki MCP Server

Overview Schema Related Servers Score Discussions

wiki_status.py•15.7 KiB

"""Wiki generation status management for incremental updates.""" from __future__ import annotations import asyncio import hashlib import json import time from pathlib import Path from typing import Any from local_deepwiki.logging import get_logger from local_deepwiki.models import ( IndexStatus, WikiGenerationStatus, WikiPage, WikiPageStatus, ) logger = get_logger(__name__) class WikiStatusManager: """Manage wiki generation status for incremental updates.""" WIKI_STATUS_FILE = "wiki_status.json" def __init__(self, wiki_path: Path): """Initialize the status manager. Args: wiki_path: Path to wiki output directory. """ self.wiki_path = wiki_path # Track file hashes from index_status for incremental generation self._file_hashes: dict[str, str] = {} # Previous wiki generation status for incremental updates self._previous_status: WikiGenerationStatus | None = None # New page statuses for current generation self._page_statuses: dict[str, WikiPageStatus] = {} # Line info for source files (computed from chunks) self._file_line_info: dict[str, tuple[int, int]] = {} @property def file_hashes(self) -> dict[str, str]: """Get file hashes map.""" return self._file_hashes @file_hashes.setter def file_hashes(self, value: dict[str, str]) -> None: """Set file hashes map.""" self._file_hashes = value @property def file_line_info(self) -> dict[str, tuple[int, int]]: """Get file line info map.""" return self._file_line_info @file_line_info.setter def file_line_info(self, value: dict[str, tuple[int, int]]) -> None: """Set file line info map.""" self._file_line_info = value @property def page_statuses(self) -> dict[str, WikiPageStatus]: """Get page statuses map.""" return self._page_statuses @property def previous_status(self) -> WikiGenerationStatus | None: """Get previous wiki generation status.""" return self._previous_status async def load_status(self) -> WikiGenerationStatus | None: """Load previous wiki generation status. Returns: WikiGenerationStatus or None if not found. """ status_path = self.wiki_path / self.WIKI_STATUS_FILE if not status_path.exists(): return None def _read_status() -> WikiGenerationStatus | None: try: with open(status_path) as f: data = json.load(f) return WikiGenerationStatus.model_validate(data) except (json.JSONDecodeError, OSError, ValueError) as e: # json.JSONDecodeError: Corrupted or invalid JSON # OSError: File read issues # ValueError: Pydantic validation failure logger.warning("Failed to load wiki status from %s: %s", status_path, e) return None self._previous_status = await asyncio.to_thread(_read_status) return self._previous_status async def save_status(self, status: WikiGenerationStatus) -> None: """Save wiki generation status. Args: status: The WikiGenerationStatus to save. """ status_path = self.wiki_path / self.WIKI_STATUS_FILE data = status.model_dump() def _write_status() -> None: with open(status_path, "w") as f: json.dump(data, f, indent=2) await asyncio.to_thread(_write_status) @staticmethod def compute_content_hash(content: str) -> str: """Compute hash of page content. Args: content: Page content. Returns: SHA256 hash of content (first 16 chars). """ return hashlib.sha256(content.encode()).hexdigest()[:16] def needs_regeneration( self, page_path: str, source_files: list[str], ) -> bool: """Check if a page needs regeneration based on source file changes. Args: page_path: Wiki page path. source_files: List of source files that contribute to this page. Returns: True if page needs regeneration, False if it can be skipped. """ if self._previous_status is None: logger.debug("needs_regeneration(%s): no previous status", page_path) return True prev_page = self._previous_status.pages.get(page_path) if prev_page is None: logger.debug("needs_regeneration(%s): new page", page_path) return True # Check if source files list changed if set(source_files) != set(prev_page.source_files): added = set(source_files) - set(prev_page.source_files) removed = set(prev_page.source_files) - set(source_files) logger.debug( "needs_regeneration(%s): source files changed +%d -%d", page_path, len(added), len(removed), ) return True # Check if any source file has changed for source_file in source_files: current_hash = self._file_hashes.get(source_file) prev_hash = prev_page.source_hashes.get(source_file) if current_hash is None: logger.debug( "needs_regeneration(%s): no current hash for %s", page_path, source_file, ) return True if not prev_hash: # Guard against empty-string hash from previous poisoned runs logger.debug( "needs_regeneration(%s): empty/missing prev hash for %s", page_path, source_file, ) return True if current_hash != prev_hash: logger.debug( "needs_regeneration(%s): hash changed for %s", page_path, source_file, ) return True logger.debug("needs_regeneration(%s): up to date, skipping", page_path) return False async def load_existing_page(self, page_path: str) -> WikiPage | None: """Load an existing wiki page from disk. Args: page_path: Relative path to the page. Returns: WikiPage if found, None otherwise. """ full_path = self.wiki_path / page_path if not full_path.exists(): return None # Capture values needed for the sync function prev_page = ( self._previous_status.pages.get(page_path) if self._previous_status else None ) title = Path(page_path).stem.replace("_", " ").title() generated_at = prev_page.generated_at if prev_page else time.time() def _read_page() -> WikiPage | None: try: content = full_path.read_text() return WikiPage( path=page_path, title=title, content=content, generated_at=generated_at, ) except (OSError, UnicodeDecodeError) as e: # OSError: File read issues # UnicodeDecodeError: File encoding issues logger.warning("Failed to load existing page %s: %s", page_path, e) return None return await asyncio.to_thread(_read_page) def record_page_status( self, page: WikiPage, source_files: list[str], ) -> None: """Record status for a generated/loaded page. Args: page: The wiki page. source_files: Source files that contributed to this page. """ source_hashes = {f: h for f in source_files if (h := self._file_hashes.get(f))} if len(source_hashes) < len(source_files): missing = [f for f in source_files if f not in source_hashes] logger.warning( "record_page_status(%s): %d source files have no hash, " "omitting to prevent poisoned empty-string hashes: %s", page.path, len(missing), missing[:5], ) # Include line info for source files that have it source_line_info = { f: { "start_line": self._file_line_info[f][0], "end_line": self._file_line_info[f][1], } for f in source_files if f in self._file_line_info } self._page_statuses[page.path] = WikiPageStatus( path=page.path, source_files=source_files, source_hashes=source_hashes, source_line_info=source_line_info, content_hash=self.compute_content_hash(page.content), generated_at=page.generated_at, ) def get_changed_files(self) -> set[str]: """Get set of files that have changed since last generation. Compares current file hashes with previous generation's hashes. Returns: Set of file paths that have changed or are new. """ if self._previous_status is None: # No previous status means all files are "new" return set(self._file_hashes.keys()) changed = set() # Check each current file against previous hashes for file_path, current_hash in self._file_hashes.items(): # Find any page that previously tracked this file prev_hash = None for page_status in self._previous_status.pages.values(): if file_path in page_status.source_hashes: prev_hash = page_status.source_hashes[file_path] break if prev_hash is None or prev_hash != current_hash: changed.add(file_path) return changed def build_reverse_index(self) -> dict[str, set[str]]: """Build reverse index mapping source files to dependent wiki pages. Uses previous generation's page statuses to build the mapping. Returns: Dict mapping source file path to set of wiki page paths that depend on it. """ reverse_index: dict[str, set[str]] = {} if self._previous_status is None: return reverse_index for page_path, page_status in self._previous_status.pages.items(): for source_file in page_status.source_files: if source_file not in reverse_index: reverse_index[source_file] = set() reverse_index[source_file].add(page_path) return reverse_index def get_affected_pages(self, changed_files: set[str] | None = None) -> set[str]: """Get set of wiki pages affected by file changes. Uses reverse index to efficiently find all pages that depend on changed files. Args: changed_files: Optional set of changed files. If None, computes automatically. Returns: Set of wiki page paths that need regeneration. """ if changed_files is None: changed_files = self.get_changed_files() if not changed_files: return set() reverse_index = self.build_reverse_index() affected: set[str] = set() for file_path in changed_files: if file_path in reverse_index: affected.update(reverse_index[file_path]) return affected def get_regeneration_summary(self) -> dict[str, Any]: """Get a summary of what will be regenerated and why. Returns: Dict with 'changed_files', 'affected_pages', 'unchanged_pages' counts. """ changed_files = self.get_changed_files() affected_pages = self.get_affected_pages(changed_files) total_previous_pages = ( len(self._previous_status.pages) if self._previous_status else 0 ) unchanged_pages = total_previous_pages - len(affected_pages) return { "changed_files": list(changed_files), "changed_file_count": len(changed_files), "affected_pages": list(affected_pages), "affected_page_count": len(affected_pages), "unchanged_page_count": max(0, unchanged_pages), "is_full_rebuild": self._previous_status is None, } @staticmethod def compute_structural_fingerprint(index_status: IndexStatus) -> str: """Compute a structural fingerprint from the index status. The fingerprint changes when files are added, removed, or renamed, but NOT when file content changes. This allows summary pages (index.md, architecture.md, etc.) to skip regeneration on content-only edits. Args: index_status: Current index status. Returns: SHA-256 hex digest (first 16 chars) of the structural data. """ sorted_paths = sorted(f.path for f in index_status.files) sorted_languages = sorted(index_status.languages.items()) payload = json.dumps( { "files": sorted_paths, "languages": sorted_languages, "total_files": index_status.total_files, "total_chunks": index_status.total_chunks, }, sort_keys=True, ) return hashlib.sha256(payload.encode()).hexdigest()[:16] def needs_regeneration_structural( self, page_path: str, index_status: IndexStatus, ) -> bool: """Check if a summary page needs regeneration using structural fingerprint. Unlike ``needs_regeneration`` which compares per-file content hashes, this only checks whether the repository *structure* has changed (files added/removed/renamed, language distribution, totals). Args: page_path: Wiki page path. index_status: Current index status. Returns: True if the page needs regeneration. """ if self._previous_status is None: return True prev_page = self._previous_status.pages.get(page_path) if prev_page is None: return True # Empty fingerprint means pre-migration data — force one-time rebuild if not prev_page.structural_fingerprint: return True current_fp = self.compute_structural_fingerprint(index_status) return current_fp != prev_page.structural_fingerprint def record_summary_page_status( self, page: WikiPage, all_source_files: list[str], index_status: IndexStatus, ) -> None: """Record status for a summary page, including the structural fingerprint. Like ``record_page_status`` but also stores the structural fingerprint so that future incremental runs can use ``needs_regeneration_structural``. Args: page: The wiki page. all_source_files: All source files in the repo. index_status: Current index status for fingerprint computation. """ source_hashes = {f: self._file_hashes.get(f, "") for f in all_source_files} source_line_info = { f: { "start_line": self._file_line_info[f][0], "end_line": self._file_line_info[f][1], } for f in all_source_files if f in self._file_line_info } self._page_statuses[page.path] = WikiPageStatus( path=page.path, source_files=all_source_files, source_hashes=source_hashes, source_line_info=source_line_info, structural_fingerprint=self.compute_structural_fingerprint(index_status), content_hash=self.compute_content_hash(page.content), generated_at=page.generated_at, )

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/UrbanDiver/local-deepwiki-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

wiki_status.py•15.7 KiB