OpenTelemetry Documentation MCP Server

Overview Schema Related Servers Score Discussions

opentelemetry-documentation-mcp-server
opentelemetry_documentation_mcp_server

util.py•6.34 KiB

# Copyright 2025 ryu1maniwa. All Rights Reserved. # # This file is derived from awslabs.aws-documentation-mcp-server, which is licensed as follows: # # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # with the License. A copy of the License is located at # # http://www.apache.org/licenses/LICENSE-2.0 # # or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES # OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # and limitations under the License. """Utility functions for OpenTelemetry Documentation MCP Server.""" import markdownify from .models import SearchResult from bs4 import BeautifulSoup from typing import Any, Dict, List def extract_content_from_html(html: str) -> str: """Extract and convert HTML content to Markdown format. Args: html: Raw HTML content to process Returns: Simplified markdown version of the content """ if not html: return '<e>Empty HTML content</e>' try: # Parse HTML with BeautifulSoup soup = BeautifulSoup(html, 'html.parser') # Try to find the main content area main_content = None # Common content container selectors for OpenTelemetry documentation content_selectors = [ '.td-content', # opentelemetry.io uses this selector for main content 'main', 'article', '#content', '.content', '#body-content', "div[role='main']", '.td-main', ] # Try to find the main content using common selectors for selector in content_selectors: content = soup.select_one(selector) if content: main_content = content break # If no main content found, use the body if not main_content: main_content = soup.body if soup.body else soup # Remove navigation elements that might be in the main content nav_selectors = [ 'noscript', '.prevNext', '.docsite-footer', '.feedback', '.td-sidebar', '.td-sidebar-nav', '.td-page-meta', '.td-search', ] for selector in nav_selectors: for element in main_content.select(selector): element.decompose() # Define tags to strip - these are elements we don't want in the output tags_to_strip = [ 'script', 'style', 'noscript', 'meta', 'link', 'footer', 'nav', 'aside', 'header', '.td-sidebar', '.td-sidebar-nav', '.td-page-meta', '.td-search', # Common unnecessary elements 'js-show-more-buttons', 'js-show-more-text', 'feedback-container', 'feedback-section', 'doc-feedback-container', 'doc-feedback-section', 'warning-container', 'warning-section', 'cookie-banner', 'cookie-notice', 'copyright-section', 'legal-section', 'terms-section', ] # Use markdownify on the cleaned HTML content content = markdownify.markdownify( str(main_content), heading_style='ATX', autolinks=True, default_title=True, escape_asterisks=True, escape_underscores=True, newline_style='SPACES', strip=tags_to_strip, ) if not content: return '<e>Page failed to be simplified from HTML</e>' return content except Exception as e: return f'<e>Error converting HTML to Markdown: {str(e)}</e>' def is_html_content(page_raw: str, content_type: str) -> bool: """Determine if content is HTML. Args: page_raw: Raw page content content_type: Content-Type header Returns: True if content is HTML, False otherwise """ return '<html' in page_raw[:100] or 'text/html' in content_type or not content_type def format_documentation_result(url: str, content: str, start_index: int, max_length: int) -> str: """Format documentation result with pagination information. Args: url: Documentation URL content: Content to format start_index: Start index for pagination max_length: Maximum content length Returns: Formatted documentation result """ original_length = len(content) if start_index >= original_length: return f'OpenTelemetry Documentation from {url}:\n\n<e>No more content available.</e>' # Calculate the end index, ensuring we don't go beyond the content length end_index = min(start_index + max_length, original_length) truncated_content = content[start_index:end_index] if not truncated_content: return f'OpenTelemetry Documentation from {url}:\n\n<e>No more content available.</e>' actual_content_length = len(truncated_content) remaining_content = original_length - (start_index + actual_content_length) result = f'OpenTelemetry Documentation from {url}:\n\n{truncated_content}' # Only add the prompt to continue fetching if there is still remaining content if remaining_content > 0: next_start = start_index + actual_content_length result += f'\n\n<e>Content truncated. Call the read_documentation tool with start_index={next_start} to get more content.</e>' return result def parse_search_results(data: Dict[str, Any]) -> List[SearchResult]: """Parse Google Custom Search results into structured format. Args: data: Raw API response data from Google Custom Search Returns: List of SearchResult objects in a standard format """ results = [] if 'items' in data: for i, item in enumerate(data['items']): results.append( SearchResult( rank_order=i + 1, url=item.get('link', ''), title=item.get('title', ''), context=item.get('snippet'), ) ) return results

Loading blob content...

Implementation Reference

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ryu1maniwa/opentelemetry-documentation-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

util.py•6.34 KiB