Local DeepWiki MCP Server

Overview Schema Related Servers Score Discussions

search.py•10 KiB

"""Search index generator for wiki pages. This module generates a JSON search index that enables client-side full-text search across wiki documentation. Includes both page-level and entity-level (function/class/method) search entries. """ from __future__ import annotations import json import re from pathlib import Path from local_deepwiki.core.vectorstore import VectorStore from local_deepwiki.models import ChunkType, IndexStatus, WikiPage def extract_headings(content: str) -> list[str]: """Extract all headings from markdown content. Args: content: Markdown content. Returns: List of heading texts (without # prefixes). """ headings = [] for line in content.split("\n"): line = line.strip() if line.startswith("#"): # Remove # prefix and clean up heading = re.sub(r"^#+\s*", "", line) # Remove markdown formatting like ** or ` heading = re.sub(r"[*`]", "", heading) if heading: headings.append(heading) return headings def extract_code_terms(content: str) -> list[str]: """Extract code terms (class names, function names) from content. Args: content: Markdown content. Returns: List of code terms found in backticks. """ terms = set() # Match inline code: `term` for match in re.finditer(r"`([^`]+)`", content): term = match.group(1) # Skip code that looks like a full statement or has spaces if len(term) < 50 and "\n" not in term: # Extract the main identifier if it's a qualified name parts = term.split(".") if parts: terms.add(parts[-1]) # Last part of qualified name if len(parts) > 1: terms.add(term) # Also add full qualified name return list(terms) def extract_snippet(content: str, max_length: int = 200) -> str: """Extract a text snippet from markdown content. Args: content: Markdown content. max_length: Maximum snippet length. Returns: Plain text snippet. """ # Remove code blocks text = re.sub(r"```[\s\S]*?```", "", content) # Remove headings text = re.sub(r"^#+\s+.*$", "", text, flags=re.MULTILINE) # Remove links but keep text: [text](url) -> text text = re.sub(r"\[([^\]]+)\]$[^)]+$", r"\1", text) # Remove markdown formatting text = re.sub(r"[*_`]", "", text) # Collapse whitespace text = " ".join(text.split()) if len(text) > max_length: text = text[:max_length].rsplit(" ", 1)[0] + "..." return text.strip() def generate_search_entry(page: WikiPage) -> dict: """Generate a search index entry for a wiki page. Args: page: The wiki page. Returns: Dictionary with searchable fields. """ headings = extract_headings(page.content) terms = extract_code_terms(page.content) snippet = extract_snippet(page.content) return { "path": page.path, "title": page.title, "headings": headings, "terms": terms, "snippet": snippet, } def generate_search_index(pages: list[WikiPage]) -> list[dict]: """Generate a search index from wiki pages. Args: pages: List of wiki pages. Returns: List of search entries. """ return [generate_search_entry(page) for page in pages] async def generate_entity_entries( index_status: IndexStatus, vector_store: VectorStore, ) -> list[dict]: """Generate search entries for individual code entities. Creates searchable entries for each function, class, and method with type information for filtering. Args: index_status: Index status with file information. vector_store: Vector store with code chunks. Returns: List of entity search entries. """ entries: list[dict] = [] for file_info in index_status.files: chunks = await vector_store.get_chunks_by_file(file_info.path) for chunk in chunks: if chunk.chunk_type not in ( ChunkType.CLASS, ChunkType.FUNCTION, ChunkType.METHOD, ): continue # Determine entity type entity_type = chunk.chunk_type.value # 'class', 'function', 'method' # Build display name if chunk.chunk_type == ChunkType.METHOD and chunk.parent_name: display_name = f"{chunk.parent_name}.{chunk.name}" else: display_name = chunk.name or "Unknown" # Extract metadata metadata = chunk.metadata or {} param_types = metadata.get("parameter_types", {}) return_type = metadata.get("return_type") raises = metadata.get("raises", []) is_async = metadata.get("is_async", False) # Build signature preview signature = "" if entity_type != "class": params = list(param_types.keys())[:3] if len(param_types) > 3: params.append("...") signature = f"({', '.join(params)})" if return_type: signature += f" → {return_type}" # Build brief description from docstring description = "" if chunk.docstring: first_line = chunk.docstring.split("\n")[0].strip() if len(first_line) > 80: description = first_line[:77] + "..." else: description = first_line # Build wiki path wiki_path = f"files/{file_info.path}".replace(".py", ".md") entry = { "type": "entity", "entity_type": entity_type, "name": chunk.name or "Unknown", "display_name": display_name, "path": wiki_path, "file": file_info.path, "signature": signature, "description": description, "is_async": is_async, "raises": raises, # Keywords for search matching "keywords": _build_keywords( chunk.name, param_types, return_type, raises ), } entries.append(entry) # Sort by name for consistent output entries = sorted(entries, key=lambda e: e["display_name"].lower()) return entries def _build_keywords( name: str | None, param_types: dict[str, str], return_type: str | None, raises: list[str], ) -> list[str]: """Build search keywords from entity metadata. Args: name: Entity name. param_types: Parameter types mapping. return_type: Return type string. raises: List of exception types. Returns: List of searchable keywords. """ keywords: set[str] = set() # Add name parts (split on underscore and camelCase) if name: keywords.add(name.lower()) # Split on underscores for part in name.split("_"): if len(part) > 2: keywords.add(part.lower()) # Split camelCase camel_parts = re.findall(r"[A-Z][a-z]+|[a-z]+", name) for part in camel_parts: if len(part) > 2: keywords.add(part.lower()) # Add type keywords for type_str in param_types.values(): if type_str: # Extract base type (e.g., "list" from "list[str]") base_type = re.split(r"[\[\]|,\s]", type_str)[0].lower() if base_type: keywords.add(base_type) if return_type: base_return = re.split(r"[\[\]|,\s]", return_type)[0].lower() if base_return: keywords.add(base_return) # Add exception keywords for exc in raises: keywords.add(exc.lower()) # Also add without "Error" suffix for easier search if exc.endswith("Error"): keywords.add(exc[:-5].lower()) return list(keywords) async def generate_full_search_index( pages: list[WikiPage], index_status: IndexStatus | None = None, vector_store: VectorStore | None = None, ) -> dict: """Generate a comprehensive search index with pages and entities. Args: pages: List of wiki pages. index_status: Optional index status for entity extraction. vector_store: Optional vector store for entity extraction. Returns: Dictionary with 'pages' and 'entities' lists. """ page_entries = generate_search_index(pages) entity_entries: list[dict] = [] if index_status and vector_store: entity_entries = await generate_entity_entries(index_status, vector_store) return { "pages": page_entries, "entities": entity_entries, "meta": { "total_pages": len(page_entries), "total_entities": len(entity_entries), }, } def write_search_index(wiki_path: Path, pages: list[WikiPage]) -> Path: """Generate and write search index to disk (legacy page-only version). Args: wiki_path: Path to wiki directory. pages: List of wiki pages. Returns: Path to the generated search.json file. """ index = generate_search_index(pages) index_path = wiki_path / "search.json" index_path.write_text(json.dumps(index, indent=2)) return index_path async def write_full_search_index( wiki_path: Path, pages: list[WikiPage], index_status: IndexStatus, vector_store: VectorStore, ) -> Path: """Generate and write comprehensive search index to disk. Includes both page-level and entity-level search entries. Args: wiki_path: Path to wiki directory. pages: List of wiki pages. index_status: Index status with file information. vector_store: Vector store with code chunks. Returns: Path to the generated search.json file. """ index = await generate_full_search_index(pages, index_status, vector_store) index_path = wiki_path / "search.json" index_path.write_text(json.dumps(index, indent=2)) return index_path

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/UrbanDiver/local-deepwiki-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

search.py•10 KiB