Skip to main content
Glama
clarkemn

cortex-cloud-docs-mcp-server

search_cortex_docs

Search Cortex Cloud documentation to find answers to technical questions and access relevant information.

Instructions

Search Cortex Cloud documentation

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
queryYes

Output Schema

TableJSON Schema
NameRequiredDescriptionDefault
resultYes

Implementation Reference

  • The primary handler function for the 'search_cortex_docs' MCP tool. It is registered using the @mcp.tool() decorator and delegates the search logic to the DocumentationIndexer instance for the 'cortex_cloud' site, returning JSON-formatted results.
    @mcp.tool()
    async def search_cortex_docs(query: str) -> str:
        """Search Cortex Cloud documentation"""
        results = await indexer.search_docs(query, site='cortex_cloud')
        return json.dumps(results, indent=2)
  • The core helper method implementing the documentation search logic, including relevance scoring based on title and content matches, snippet extraction, and result ranking/sorting.
    async def search_docs(self, query: str, site: str = None) -> List[Dict]:
        """Search indexed documentation"""
        if not self.cached_pages:
            return []
        
        query_lower = query.lower()
        results = []
        
        for url, page in self.cached_pages.items():
            # Filter by site if specified
            if site and page.site != site:
                continue
            
            # Calculate relevance score
            score = 0
            title_lower = page.title.lower()
            content_lower = page.content.lower()
            
            # Higher score for title matches
            if query_lower in title_lower:
                score += 10
                # Even higher for exact title matches
                if query_lower == title_lower:
                    score += 20
            
            # Score for content matches
            content_matches = content_lower.count(query_lower)
            score += content_matches * 2
            
            # Score for partial word matches in title
            query_words = query_lower.split()
            for word in query_words:
                if word in title_lower:
                    score += 5
                if word in content_lower:
                    score += 1
            
            if score > 0:
                # Extract snippet around first match
                snippet = self._extract_snippet(page.content, query, max_length=200)
                
                results.append({
                    'title': page.title,
                    'url': page.url,
                    'site': page.site,
                    'snippet': snippet,
                    'score': score
                })
        
        # Sort by relevance score (highest first) and limit results
        results.sort(key=lambda x: x['score'], reverse=True)
        return results[:10]
  • Helper method for indexing (crawling and caching) documentation pages from a specified site, used to populate the cache before searching.
    async def index_site(self, site_name: str, max_pages: int = 100):
        """Index documentation from a specific site"""
        if site_name not in self.base_urls:
            raise ValueError(f"Unknown site: {site_name}")
        
        base_url = self.base_urls[site_name]
        visited_urls = set()
        urls_to_visit = [base_url]
        pages_indexed = 0
        
        async with aiohttp.ClientSession() as session:
            while urls_to_visit and pages_indexed < max_pages:
                url = urls_to_visit.pop(0)
                
                if url in visited_urls:
                    continue
                    
                visited_urls.add(url)
                
                try:
                    async with session.get(url, timeout=10) as response:
                        if response.status == 200:
                            content = await response.text()
                            soup = BeautifulSoup(content, 'html.parser')
                            
                            # Extract page content
                            title = soup.find('title')
                            title_text = title.text.strip() if title else url
                            
                            # Remove script and style elements
                            for script in soup(["script", "style"]):
                                script.decompose()
                            
                            # Get text content
                            text_content = soup.get_text()
                            lines = (line.strip() for line in text_content.splitlines())
                            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
                            text = ' '.join(chunk for chunk in chunks if chunk)
                            
                            # Store in cache
                            self.cached_pages[url] = CachedPage(
                                title=title_text,
                                content=text[:5000],  # Limit content length
                                url=url,
                                site=site_name,
                                timestamp=time.time()
                            )
                            
                            pages_indexed += 1
                            
                            # Find more links to index
                            if pages_indexed < max_pages:
                                links = soup.find_all('a', href=True)
                                for link in links:
                                    href = link['href']
                                    full_url = urljoin(url, href)
                                    
                                    # Only index URLs from the same domain
                                    if urlparse(full_url).netloc == urlparse(base_url).netloc:
                                        if full_url not in visited_urls and full_url not in urls_to_visit:
                                            urls_to_visit.append(full_url)
                                
                except Exception as e:
                    print(f"Error indexing {url}: {e}")
                    continue
        
        return pages_indexed
  • Dataclass representing a cached documentation page with expiration logic, used by the indexer.
    @dataclass
    class CachedPage:
        title: str
        content: str
        url: str
        site: str
        timestamp: float
        ttl: float = 3600  # 1 hour default TTL
        
        @property
        def is_expired(self) -> bool:
            return time.time() > self.timestamp + self.ttl

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/clarkemn/cortex-cloud-docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server