Skip to main content
Glama
gemini2026

Documentation Search MCP Server

by gemini2026

preindex_docs

Cache documentation site search indexes locally to enable offline or Serper-free searching across supported programming libraries and frameworks.

Instructions

Pre-download and persist docs site indexes for Serper-free search.

This caches MkDocs/Sphinx search indexes (and optionally sitemaps) to disk so the server can search supported documentation sites without requiring Serper.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
librariesYes
include_sitemapNo
persist_pathNo
max_concurrent_sitesNo

Implementation Reference

  • The main handler function for the 'preindex_docs' MCP tool. It processes a list of libraries, calls preindex_site for each, and saves the preindexed state.
    async def preindex_docs(
        libraries: LibrariesParam,
        include_sitemap: bool = False,
        persist_path: Optional[str] = None,
        max_concurrent_sites: int = 3,
    ):
        """
        Pre-download and persist docs site indexes for Serper-free search.
    
        This caches MkDocs/Sphinx search indexes (and optionally sitemaps) to disk so the
        server can search supported documentation sites without requiring Serper.
        """
        await enforce_rate_limit("preindex_docs")
    
        targets = libraries or sorted(docs_urls.keys())
        if not targets:
            return {
                "status": "no_targets",
                "message": "No libraries configured to preindex",
            }
    
        global http_client
        if http_client is None:
            http_client = httpx.AsyncClient(timeout=httpx.Timeout(30.0, read=60.0))
    
        concurrency = max(1, min(int(max_concurrent_sites), 10))
        semaphore = asyncio.Semaphore(concurrency)
    
        async def _run_one(library: str) -> Dict[str, Any]:
            docs_root = docs_urls.get(library)
            if not docs_root:
                return {"library": library, "status": "unsupported"}
    
            async with semaphore:
                summary = await preindex_site(
                    docs_root,
                    http_client,
                    user_agent=USER_AGENT,
                    include_sitemap=include_sitemap,
                )
                summary["library"] = library
                return summary
    
        results = await asyncio.gather(*[_run_one(lib) for lib in targets])
    
        path = persist_path or site_index_path
        try:
            save_preindexed_state(path)
            persisted: Dict[str, Any] = {"status": "ok", "path": path}
        except Exception as e:
            persisted = {"status": "error", "path": path, "error": str(e)}
    
        return {
            "status": "ok",
            "persist": persisted,
            "real_time_search_enabled": real_time_search_enabled,
            "include_sitemap": include_sitemap,
            "max_concurrent_sites": concurrency,
            "total_libraries": len(targets),
            "results": results,
        }
  • Core helper function that performs the actual preindexing of a single documentation site by fetching MkDocs/Sphinx search indexes and optionally sitemaps.
    async def preindex_site(
        site_url: str,
        client: httpx.AsyncClient,
        *,
        user_agent: str,
        include_sitemap: bool = False,
    ) -> Dict[str, Any]:
        """Fetch and cache on-site search indexes for a docs site."""
        parsed = urlparse(site_url)
        if not parsed.scheme or not parsed.netloc:
            return {"site_url": site_url, "status": "invalid_url"}
    
        origin = f"{parsed.scheme}://{parsed.netloc}"
        results: Dict[str, Any] = {
            "site_url": site_url,
            "origin": origin,
            "mkdocs_index": None,
            "sphinx_index": None,
            "sitemap": None,
            "errors": [],
        }
    
        for index_url in _mkdocs_index_candidates(site_url):
            try:
                docs = await _get_cached_index(
                    client,
                    index_url,
                    user_agent=user_agent,
                    kind="mkdocs",
                    timeout_seconds=20.0,
                )
            except Exception as e:
                results["errors"].append(f"mkdocs:{index_url}: {e}")
                continue
            if docs:
                results["mkdocs_index"] = {"index_url": index_url, "documents": len(docs)}
                break
    
        for index_url in _sphinx_index_candidates(site_url):
            try:
                index = await _get_cached_index(
                    client,
                    index_url,
                    user_agent=user_agent,
                    kind="sphinx",
                    timeout_seconds=20.0,
                )
            except Exception as e:
                results["errors"].append(f"sphinx:{index_url}: {e}")
                continue
            if isinstance(index, dict):
                filenames = index.get("filenames")
                results["sphinx_index"] = {
                    "index_url": index_url,
                    "documents": len(filenames) if isinstance(filenames, list) else None,
                }
                break
    
        if include_sitemap:
            try:
                urls = await _load_site_sitemap_urls(
                    client, site_url, user_agent=user_agent
                )
                if urls:
                    _sitemap_cache[origin] = _SitemapCacheEntry(
                        fetched_at=datetime.now(), urls=tuple(urls)
                    )
                    results["sitemap"] = {"urls": len(urls)}
            except Exception as e:
                results["errors"].append(f"sitemap:{origin}: {e}")
    
        results["status"] = (
            "ok"
            if results.get("mkdocs_index")
            or results.get("sphinx_index")
            or results.get("sitemap")
            else "no_index_found"
        )
        return results
  • Helper function to save the preindexed state to disk, called by the tool handler.
    def save_preindexed_state(path: str) -> None:
        """Persist current in-memory sitemap/index caches to disk."""
        if not path:
            raise ValueError("persist path must be non-empty")
        state = export_preindexed_state()
        tmp_path = f"{path}.tmp"
        with open(tmp_path, "w", encoding="utf-8") as fh:
            json.dump(state, fh)
        os.replace(tmp_path, path)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/gemini2026/documentation-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server