# Copyright 2025 ryu1maniwa. All Rights Reserved.
#
# This file is derived from awslabs.aws-documentation-mcp-server, which is licensed as follows:
#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
# with the License. A copy of the License is located at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
# and limitations under the License.
"""opentelemetry-documentation-mcp-server implementation."""
import argparse
import httpx
import json
import os
import re
import sys
# Handle imports relative to the current package using relative imports
from .models import (
SearchResult,
)
# Handle imports relative to the current package using relative imports
from .util import (
extract_content_from_html,
format_documentation_result,
is_html_content,
parse_search_results,
)
from loguru import logger
from mcp.server.fastmcp import Context, FastMCP
from pydantic import AnyUrl, Field
from typing import List, Union
# Set up logging
logger.remove()
logger.add(sys.stderr, level=os.getenv('FASTMCP_LOG_LEVEL', 'WARNING'))
DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 ModelContextProtocol/1.0 (OpenTelemetry Documentation Server)'
# Google Custom Search configuration
GOOGLE_SEARCH_API_URL = 'https://customsearch.googleapis.com/customsearch/v1'
OPENTELEMETRY_SEARCH_CX = '015faf7de29c34606' # opentelemetry.io Custom Search ID
mcp = FastMCP(
'opentelemetry-documentation-mcp-server',
instructions="""
# OpenTelemetry Documentation MCP Server
This server provides tools to access public OpenTelemetry documentation, search for content, and discover related articles.
## Best Practices
- For long documentation pages, make multiple calls to `read_documentation` with different `start_index` values for pagination
- For very long documents (>30,000 characters), stop reading if you've found the needed information
- When searching, use specific technical terms rather than general phrases
- Include specific OpenTelemetry concepts in search terms (e.g., "tracing instrumentation" instead of just "tracing")
- Use terms that would appear in the documentation (e.g., "SDK configuration" instead of "how to configure")
- Always cite the documentation URL when providing information to users
## Tool Selection Guide
- Use `search_documentation` when: You need to find documentation about a specific OpenTelemetry feature, concept, or instrumentation
- Use `read_documentation` when: You have a specific documentation URL and need its content
""",
dependencies=[
'pydantic',
'httpx',
'beautifulsoup4',
'markdownify',
],
)
@mcp.tool()
async def read_documentation(
ctx: Context,
url: Union[AnyUrl, str] = Field(description='URL of the OpenTelemetry documentation page to read'),
max_length: int = Field(
default=5000,
description='Maximum number of characters to return.',
gt=0,
lt=1000000,
),
start_index: int = Field(
default=0,
description='On return output starting at this character index, useful if a previous fetch was truncated and more content is required.',
ge=0,
),
) -> str:
"""Fetch and convert a OpenTelemetry documentation page to markdown format.
## Usage
This tool retrieves the content of a OpenTelemetry documentation page and converts it to markdown format.
For long documents, you can make multiple calls with different start_index values to retrieve
the entire content in chunks.
## URL Requirements
- Must be from the opentelemetry.io domain
- Must be a documentation page
## Example URLs
- https://opentelemetry.io/docs/concepts/observability-primer/
- https://opentelemetry.io/docs/instrumentation/
- https://opentelemetry.io/docs/collector/
## Output Format
The output is formatted as markdown text with:
- Preserved headings and structure
- Code blocks for examples
- Lists and tables converted to markdown format
## Handling Long Documents
If the response indicates the document was truncated, you have several options:
1. **Continue Reading**: Make another call with start_index set to the end of the previous response
2. **Stop Early**: For very long documents (>30,000 characters), if you've already found the specific information needed, you can stop reading
Args:
ctx: MCP context for logging and error handling
url: URL of the OpenTelemetry documentation page to read
max_length: Maximum number of characters to return
start_index: On return output starting at this character index
Returns:
Markdown content of the OpenTelemetry documentation
"""
# Validate that URL is from opentelemetry.io
url_str = str(url)
if not re.match(r'^https?://opentelemetry\.io/', url_str):
await ctx.error(f'Invalid URL: {url_str}. URL must be from the opentelemetry.io domain')
raise ValueError('URL must be from the opentelemetry.io domain')
logger.debug(f'Fetching documentation from {url_str}')
async with httpx.AsyncClient() as client:
try:
response = await client.get(
url_str,
follow_redirects=True,
headers={'User-Agent': DEFAULT_USER_AGENT},
timeout=30,
)
except httpx.HTTPError as e:
error_msg = f'Failed to fetch {url_str}: {str(e)}'
logger.error(error_msg)
await ctx.error(error_msg)
return error_msg
if response.status_code >= 400:
error_msg = f'Failed to fetch {url_str} - status code {response.status_code}'
logger.error(error_msg)
await ctx.error(error_msg)
return error_msg
page_raw = response.text
content_type = response.headers.get('content-type', '')
if is_html_content(page_raw, content_type):
content = extract_content_from_html(page_raw)
else:
content = page_raw
result = format_documentation_result(url_str, content, start_index, max_length)
# Log if content was truncated
if len(content) > start_index + max_length:
logger.debug(
f'Content truncated at {start_index + max_length} of {len(content)} characters'
)
return result
@mcp.tool()
async def search_documentation(
ctx: Context,
search_phrase: str = Field(description='Search phrase to use'),
limit: int = Field(
default=10,
description='Maximum number of results to return',
ge=1,
le=50,
),
) -> List[SearchResult]:
"""Search OpenTelemetry documentation using Google Custom Search.
## Usage
This tool searches across OpenTelemetry documentation for pages matching your search phrase.
Use it to find relevant documentation when you don't have a specific URL.
## Search Tips
- Use specific technical terms rather than general phrases
- Include OpenTelemetry concepts to narrow results (e.g., "tracing instrumentation" instead of just "tracing")
- Use quotes for exact phrase matching (e.g., "SDK configuration")
- Include abbreviations and alternative terms to improve results (e.g., "OTEL collector")
## API Limits
The search uses Google's Custom Search API which has usage limits:
- Free tier: 100 queries per day
- Results are limited to 10 per page
## Result Interpretation
Each result includes:
- rank_order: The relevance ranking (lower is more relevant)
- url: The documentation page URL
- title: The page title
- context: A brief excerpt or summary (if available)
Args:
ctx: MCP context for logging and error handling
search_phrase: Search phrase to use
limit: Maximum number of results to return (will be capped at 10 due to API limitations)
Returns:
List of search results with URLs, titles, and context snippets
"""
logger.debug(f'Searching OpenTelemetry documentation for: {search_phrase}')
# Get API key from environment variable
api_key = os.getenv('GOOGLE_API_KEY')
if not api_key:
error_msg = (
'Google API key not found. To use the search_documentation tool, you need to set the '
'GOOGLE_API_KEY environment variable. You can get an API key from the Google Cloud Console '
'(https://console.cloud.google.com/apis/credentials) and add it to your MCP configuration.'
)
logger.error(error_msg)
await ctx.error(error_msg)
return [SearchResult(rank_order=1, url='', title=error_msg, context=None)]
# Cap limit at 10 due to Google CSE API constraints
if limit > 10:
logger.warning(f'Limiting search results to 10 (API limit), requested: {limit}')
limit = 10
params = {
'key': api_key,
'cx': OPENTELEMETRY_SEARCH_CX,
'q': search_phrase,
'num': limit,
}
async with httpx.AsyncClient() as client:
try:
response = await client.get(
GOOGLE_SEARCH_API_URL,
params=params,
headers={'User-Agent': DEFAULT_USER_AGENT},
timeout=30,
)
except httpx.HTTPError as e:
error_msg = f'Error searching OpenTelemetry docs: {str(e)}'
logger.error(error_msg)
await ctx.error(error_msg)
return [SearchResult(rank_order=1, url='', title=error_msg, context=None)]
if response.status_code >= 400:
error_msg = f'Error searching OpenTelemetry docs - status code {response.status_code}'
logger.error(error_msg)
await ctx.error(error_msg)
return [
SearchResult(
rank_order=1,
url='',
title=error_msg,
context=None,
)
]
try:
data = response.json()
except json.JSONDecodeError as e:
error_msg = f'Error parsing search results: {str(e)}'
logger.error(error_msg)
await ctx.error(error_msg)
return [
SearchResult(
rank_order=1,
url='',
title=error_msg,
context=None,
)
]
results = parse_search_results(data)
logger.debug(f'Found {len(results)} search results for: {search_phrase}')
return results
def main():
"""Run the MCP server with CLI argument support."""
parser = argparse.ArgumentParser(
description='A Model Context Protocol (MCP) server for OpenTelemetry Documentation'
)
parser.add_argument('--sse', action='store_true', help='Use SSE transport')
parser.add_argument('--port', type=int, default=8889, help='Port to run the server on')
args = parser.parse_args()
# Log startup information
logger.info('Starting OpenTelemetry Documentation MCP Server')
# Run server with appropriate transport
if args.sse:
logger.info(f'Using SSE transport on port {args.port}')
mcp.settings.port = args.port
mcp.run(transport='sse')
else:
logger.info('Using standard stdio transport')
mcp.run()
if __name__ == '__main__':
main()