preindex_docs

Instructions

Pre-download and persist docs site indexes for Serper-free search.

This caches MkDocs/Sphinx search indexes (and optionally sitemaps) to disk so the server can search supported documentation sites without requiring Serper.

Input Schema

TableJSON Schema

Name	Required	Description	Default
`libraries`	Yes
`include_sitemap`	No
`persist_path`	No
`max_concurrent_sites`	No

Implementation Reference

src/documentation_search_enhanced/main.py:814-875 (handler)
The main handler function for the 'preindex_docs' MCP tool. It processes a list of libraries, calls preindex_site for each, and saves the preindexed state.
async def preindex_docs( libraries: LibrariesParam, include_sitemap: bool = False, persist_path: Optional[str] = None, max_concurrent_sites: int = 3, ): """ Pre-download and persist docs site indexes for Serper-free search. This caches MkDocs/Sphinx search indexes (and optionally sitemaps) to disk so the server can search supported documentation sites without requiring Serper. """ await enforce_rate_limit("preindex_docs") targets = libraries or sorted(docs_urls.keys()) if not targets: return { "status": "no_targets", "message": "No libraries configured to preindex", } global http_client if http_client is None: http_client = httpx.AsyncClient(timeout=httpx.Timeout(30.0, read=60.0)) concurrency = max(1, min(int(max_concurrent_sites), 10)) semaphore = asyncio.Semaphore(concurrency) async def _run_one(library: str) -> Dict[str, Any]: docs_root = docs_urls.get(library) if not docs_root: return {"library": library, "status": "unsupported"} async with semaphore: summary = await preindex_site( docs_root, http_client, user_agent=USER_AGENT, include_sitemap=include_sitemap, ) summary["library"] = library return summary results = await asyncio.gather(*[_run_one(lib) for lib in targets]) path = persist_path or site_index_path try: save_preindexed_state(path) persisted: Dict[str, Any] = {"status": "ok", "path": path} except Exception as e: persisted = {"status": "error", "path": path, "error": str(e)} return { "status": "ok", "persist": persisted, "real_time_search_enabled": real_time_search_enabled, "include_sitemap": include_sitemap, "max_concurrent_sites": concurrency, "total_libraries": len(targets), "results": results, }
src/documentation_search_enhanced/site_search.py:237-316 (helper)
Core helper function that performs the actual preindexing of a single documentation site by fetching MkDocs/Sphinx search indexes and optionally sitemaps.
async def preindex_site( site_url: str, client: httpx.AsyncClient, *, user_agent: str, include_sitemap: bool = False, ) -> Dict[str, Any]: """Fetch and cache on-site search indexes for a docs site.""" parsed = urlparse(site_url) if not parsed.scheme or not parsed.netloc: return {"site_url": site_url, "status": "invalid_url"} origin = f"{parsed.scheme}://{parsed.netloc}" results: Dict[str, Any] = { "site_url": site_url, "origin": origin, "mkdocs_index": None, "sphinx_index": None, "sitemap": None, "errors": [], } for index_url in _mkdocs_index_candidates(site_url): try: docs = await _get_cached_index( client, index_url, user_agent=user_agent, kind="mkdocs", timeout_seconds=20.0, ) except Exception as e: results["errors"].append(f"mkdocs:{index_url}: {e}") continue if docs: results["mkdocs_index"] = {"index_url": index_url, "documents": len(docs)} break for index_url in _sphinx_index_candidates(site_url): try: index = await _get_cached_index( client, index_url, user_agent=user_agent, kind="sphinx", timeout_seconds=20.0, ) except Exception as e: results["errors"].append(f"sphinx:{index_url}: {e}") continue if isinstance(index, dict): filenames = index.get("filenames") results["sphinx_index"] = { "index_url": index_url, "documents": len(filenames) if isinstance(filenames, list) else None, } break if include_sitemap: try: urls = await _load_site_sitemap_urls( client, site_url, user_agent=user_agent ) if urls: _sitemap_cache[origin] = _SitemapCacheEntry( fetched_at=datetime.now(), urls=tuple(urls) ) results["sitemap"] = {"urls": len(urls)} except Exception as e: results["errors"].append(f"sitemap:{origin}: {e}") results["status"] = ( "ok" if results.get("mkdocs_index") or results.get("sphinx_index") or results.get("sitemap") else "no_index_found" ) return results
src/documentation_search_enhanced/site_search.py:204-213 (helper)
Helper function to save the preindexed state to disk, called by the tool handler.
def save_preindexed_state(path: str) -> None: """Persist current in-memory sitemap/index caches to disk.""" if not path: raise ValueError("persist path must be non-empty") state = export_preindexed_state() tmp_path = f"{path}.tmp" with open(tmp_path, "w", encoding="utf-8") as fh: json.dump(state, fh) os.replace(tmp_path, path)

Documentation Search MCP Server

Instructions

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API