Skip to main content
Glama
ryu1maniwa

OpenTelemetry Documentation MCP Server

by ryu1maniwa
util.py6.5 kB
# Copyright 2025 ryu1maniwa. All Rights Reserved. # # This file is derived from awslabs.aws-documentation-mcp-server, which is licensed as follows: # # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # with the License. A copy of the License is located at # # http://www.apache.org/licenses/LICENSE-2.0 # # or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES # OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # and limitations under the License. """Utility functions for OpenTelemetry Documentation MCP Server.""" import markdownify from .models import SearchResult from bs4 import BeautifulSoup from typing import Any, Dict, List def extract_content_from_html(html: str) -> str: """Extract and convert HTML content to Markdown format. Args: html: Raw HTML content to process Returns: Simplified markdown version of the content """ if not html: return '<e>Empty HTML content</e>' try: # Parse HTML with BeautifulSoup soup = BeautifulSoup(html, 'html.parser') # Try to find the main content area main_content = None # Common content container selectors for OpenTelemetry documentation content_selectors = [ '.td-content', # opentelemetry.io uses this selector for main content 'main', 'article', '#content', '.content', '#body-content', "div[role='main']", '.td-main', ] # Try to find the main content using common selectors for selector in content_selectors: content = soup.select_one(selector) if content: main_content = content break # If no main content found, use the body if not main_content: main_content = soup.body if soup.body else soup # Remove navigation elements that might be in the main content nav_selectors = [ 'noscript', '.prevNext', '.docsite-footer', '.feedback', '.td-sidebar', '.td-sidebar-nav', '.td-page-meta', '.td-search', ] for selector in nav_selectors: for element in main_content.select(selector): element.decompose() # Define tags to strip - these are elements we don't want in the output tags_to_strip = [ 'script', 'style', 'noscript', 'meta', 'link', 'footer', 'nav', 'aside', 'header', '.td-sidebar', '.td-sidebar-nav', '.td-page-meta', '.td-search', # Common unnecessary elements 'js-show-more-buttons', 'js-show-more-text', 'feedback-container', 'feedback-section', 'doc-feedback-container', 'doc-feedback-section', 'warning-container', 'warning-section', 'cookie-banner', 'cookie-notice', 'copyright-section', 'legal-section', 'terms-section', ] # Use markdownify on the cleaned HTML content content = markdownify.markdownify( str(main_content), heading_style='ATX', autolinks=True, default_title=True, escape_asterisks=True, escape_underscores=True, newline_style='SPACES', strip=tags_to_strip, ) if not content: return '<e>Page failed to be simplified from HTML</e>' return content except Exception as e: return f'<e>Error converting HTML to Markdown: {str(e)}</e>' def is_html_content(page_raw: str, content_type: str) -> bool: """Determine if content is HTML. Args: page_raw: Raw page content content_type: Content-Type header Returns: True if content is HTML, False otherwise """ return '<html' in page_raw[:100] or 'text/html' in content_type or not content_type def format_documentation_result(url: str, content: str, start_index: int, max_length: int) -> str: """Format documentation result with pagination information. Args: url: Documentation URL content: Content to format start_index: Start index for pagination max_length: Maximum content length Returns: Formatted documentation result """ original_length = len(content) if start_index >= original_length: return f'OpenTelemetry Documentation from {url}:\n\n<e>No more content available.</e>' # Calculate the end index, ensuring we don't go beyond the content length end_index = min(start_index + max_length, original_length) truncated_content = content[start_index:end_index] if not truncated_content: return f'OpenTelemetry Documentation from {url}:\n\n<e>No more content available.</e>' actual_content_length = len(truncated_content) remaining_content = original_length - (start_index + actual_content_length) result = f'OpenTelemetry Documentation from {url}:\n\n{truncated_content}' # Only add the prompt to continue fetching if there is still remaining content if remaining_content > 0: next_start = start_index + actual_content_length result += f'\n\n<e>Content truncated. Call the read_documentation tool with start_index={next_start} to get more content.</e>' return result def parse_search_results(data: Dict[str, Any]) -> List[SearchResult]: """Parse Google Custom Search results into structured format. Args: data: Raw API response data from Google Custom Search Returns: List of SearchResult objects in a standard format """ results = [] if 'items' in data: for i, item in enumerate(data['items']): results.append( SearchResult( rank_order=i + 1, url=item.get('link', ''), title=item.get('title', ''), context=item.get('snippet'), ) ) return results

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ryu1maniwa/opentelemetry-documentation-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server