Skip to main content
Glama
cache.py3.21 kB
"""Cache management for documentation index and pages.""" from typing import Final from ..config import doc_config from . import doc_fetcher, text_processor from .doc_fetcher import Page from .indexer import Doc, IndexSearch # Python 3.13 type aliases (PEP 695) type UrlCache = dict[str, Page | None] type UrlTitles = dict[str, str] # Global state _INDEX: IndexSearch | None = None _URL_CACHE: UrlCache = {} _URL_TITLES: UrlTitles = {} _LINKS_LOADED: bool = False SNIPPET_HYDRATE_MAX: Final[int] = 5 def load_links_only() -> None: """Parse llms.txt files and index curated titles without fetching content. This function initializes the search index with document titles and URLs from configured llms.txt files. Content is not fetched during initialization for faster startup times. """ global _INDEX, _LINKS_LOADED, _URL_TITLES, _URL_CACHE if _INDEX is None: _INDEX = IndexSearch() for src in doc_config.llm_texts_url: for title, url in doc_fetcher.parse_llms_txt(src): # Record curated display title and placeholder cache _URL_TITLES[url] = title _URL_CACHE.setdefault(url, None) # Normalize title for display and indexing display_title = text_processor.normalize(title) index_title = text_processor.index_title_variants(display_title, url) # Index with empty content for fast startup _INDEX.add( Doc( uri=url, display_title=display_title, content="", index_title=index_title, ) ) _LINKS_LOADED = True def ensure_ready() -> None: """Ensure the search index is initialized and ready for use. Calls load_links_only() if the index hasn't been loaded yet. This is the main entry point for index initialization. """ if not _LINKS_LOADED: load_links_only() def ensure_page(url: str) -> Page | None: """Ensure a page is cached, fetching it if necessary. Args: url: The URL of the page to ensure is cached Returns: The cached or newly fetched Page object, or None if fetch failed """ page = _URL_CACHE.get(url) if page is not None: return page try: raw = doc_fetcher.fetch_and_clean(url) display_title = text_processor.format_display_title(url, raw.title, _URL_TITLES) page = Page(url=url, title=display_title, content=raw.content) _URL_CACHE[url] = page return page except Exception: return None def get_index() -> IndexSearch | None: """Get the current search index instance. Returns: The initialized IndexSearch instance, or None if not yet loaded """ return _INDEX def get_url_cache() -> UrlCache: """Get the URL cache dictionary. Returns: Dictionary mapping URLs to cached Page objects (or None if not fetched) """ return _URL_CACHE def get_url_titles() -> UrlTitles: """Get the curated URL titles mapping. Returns: Dictionary mapping URLs to their curated display titles from llms.txt """ return _URL_TITLES

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/praveenc/mcp-server-builder'

If you have feedback or need assistance with the MCP directory API, please join our Discord server