index_cortex_api_docs
Index Cortex Cloud API documentation to enable search functionality. Call this tool first before querying the documentation.
Instructions
Index Cortex Cloud API documentation. Call this first before searching.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| max_pages | No |
Implementation Reference
- server.py:216-219 (handler)The handler function for the 'index_cortex_api_docs' tool. It calls the DocumentationIndexer.index_site method with site='cortex_api' to crawl and cache API documentation pages.async def index_cortex_api_docs(max_pages: int = 50) -> str: """Index Cortex Cloud API documentation. Call this first before searching.""" pages_indexed = await indexer.index_site('cortex_api', max_pages) return f"Indexed {pages_indexed} pages from Cortex Cloud API documentation"
- server.py:215-215 (registration)Registers the 'index_cortex_api_docs' tool using the @mcp.tool() decorator.@mcp.tool()
- server.py:36-102 (helper)The core helper method index_site in DocumentationIndexer class that performs web crawling, parsing, and caching of documentation pages using aiohttp and BeautifulSoup.async def index_site(self, site_name: str, max_pages: int = 100): """Index documentation from a specific site""" if site_name not in self.base_urls: raise ValueError(f"Unknown site: {site_name}") base_url = self.base_urls[site_name] visited_urls = set() urls_to_visit = [base_url] pages_indexed = 0 async with aiohttp.ClientSession() as session: while urls_to_visit and pages_indexed < max_pages: url = urls_to_visit.pop(0) if url in visited_urls: continue visited_urls.add(url) try: async with session.get(url, timeout=10) as response: if response.status == 200: content = await response.text() soup = BeautifulSoup(content, 'html.parser') # Extract page content title = soup.find('title') title_text = title.text.strip() if title else url # Remove script and style elements for script in soup(["script", "style"]): script.decompose() # Get text content text_content = soup.get_text() lines = (line.strip() for line in text_content.splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) text = ' '.join(chunk for chunk in chunks if chunk) # Store in cache self.cached_pages[url] = CachedPage( title=title_text, content=text[:5000], # Limit content length url=url, site=site_name, timestamp=time.time() ) pages_indexed += 1 # Find more links to index if pages_indexed < max_pages: links = soup.find_all('a', href=True) for link in links: href = link['href'] full_url = urljoin(url, href) # Only index URLs from the same domain if urlparse(full_url).netloc == urlparse(base_url).netloc: if full_url not in visited_urls and full_url not in urls_to_visit: urls_to_visit.append(full_url) except Exception as e: print(f"Error indexing {url}: {e}") continue return pages_indexed
- src/main.py:216-219 (handler)Identical handler function for the 'index_cortex_api_docs' tool in the duplicate implementation.async def index_cortex_api_docs(max_pages: int = 50) -> str: """Index Cortex Cloud API documentation. Call this first before searching.""" pages_indexed = await indexer.index_site('cortex_api', max_pages) return f"Indexed {pages_indexed} pages from Cortex Cloud API documentation"
- src/main.py:34-100 (helper)Identical core indexing helper method in the duplicate file.async def index_site(self, site_name: str, max_pages: int = 100): """Index documentation from a specific site""" if site_name not in self.base_urls: raise ValueError(f"Unknown site: {site_name}") base_url = self.base_urls[site_name] visited_urls = set() urls_to_visit = [base_url] pages_indexed = 0 async with aiohttp.ClientSession() as session: while urls_to_visit and pages_indexed < max_pages: url = urls_to_visit.pop(0) if url in visited_urls: continue visited_urls.add(url) try: async with session.get(url, timeout=10) as response: if response.status == 200: content = await response.text() soup = BeautifulSoup(content, 'html.parser') # Extract page content title = soup.find('title') title_text = title.text.strip() if title else url # Remove script and style elements for script in soup(["script", "style"]): script.decompose() # Get text content text_content = soup.get_text() lines = (line.strip() for line in text_content.splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) text = ' '.join(chunk for chunk in chunks if chunk) # Store in cache self.cached_pages[url] = CachedPage( title=title_text, content=text[:5000], # Limit content length url=url, site=site_name, timestamp=time.time() ) pages_indexed += 1 # Find more links to index if pages_indexed < max_pages: links = soup.find_all('a', href=True) for link in links: href = link['href'] full_url = urljoin(url, href) # Only index URLs from the same domain if urlparse(full_url).netloc == urlparse(base_url).netloc: if full_url not in visited_urls and full_url not in urls_to_visit: urls_to_visit.append(full_url) except Exception as e: print(f"Error indexing {url}: {e}") continue return pages_indexed