# Copyright 2025 ryu1maniwa. All Rights Reserved.
#
# This file is derived from awslabs.aws-documentation-mcp-server, which is licensed as follows:
#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
# with the License. A copy of the License is located at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
# and limitations under the License.
"""Tests for utility functions in the OpenTelemetry Documentation MCP Server."""
from opentelemetry_documentation_mcp_server.util import (
extract_content_from_html,
format_documentation_result,
is_html_content,
parse_search_results,
)
from unittest.mock import patch
class TestIsHtmlContent:
"""Tests for is_html_content function."""
def test_html_tag_in_content(self):
"""Test detection of HTML content by HTML tag."""
content = '<html><body>Test content</body></html>'
assert is_html_content(content, '') is True
def test_html_content_type(self):
"""Test detection of HTML content by content type."""
content = 'Some content'
assert is_html_content(content, 'text/html; charset=utf-8') is True
def test_empty_content_type(self):
"""Test detection with empty content type."""
content = 'Some content without HTML tags'
assert is_html_content(content, '') is True
def test_non_html_content(self):
"""Test detection of non-HTML content."""
content = 'Plain text content'
assert is_html_content(content, 'text/plain') is False
class TestFormatDocumentationResult:
"""Tests for format_documentation_result function."""
def test_normal_content(self):
"""Test formatting normal content."""
url = 'https://opentelemetry.io/docs/concepts/overview/'
content = 'Test content'
result = format_documentation_result(url, content, 0, 100)
assert result == f'OpenTelemetry Documentation from {url}:\n\n{content}'
def test_start_index_beyond_content(self):
"""Test when start_index is beyond content length."""
url = 'https://opentelemetry.io/docs/concepts/overview/'
content = 'Test content'
result = format_documentation_result(url, content, 100, 100)
assert '<e>No more content available.</e>' in result
def test_empty_truncated_content(self):
"""Test when truncated content is empty."""
url = 'https://opentelemetry.io/docs/concepts/overview/'
content = 'Test content'
# This should result in empty truncated content
result = format_documentation_result(url, content, 12, 100)
assert '<e>No more content available.</e>' in result
def test_truncated_content_with_more_available(self):
"""Test when content is truncated with more available."""
url = 'https://opentelemetry.io/docs/concepts/overview/'
content = 'A' * 200 # 200 characters
max_length = 100
result = format_documentation_result(url, content, 0, max_length)
assert 'A' * 100 in result
assert 'start_index=100' in result
assert 'Content truncated' in result
def test_truncated_content_exact_fit(self):
"""Test when content fits exactly in max_length."""
url = 'https://opentelemetry.io/docs/concepts/overview/'
content = 'A' * 100
result = format_documentation_result(url, content, 0, 100)
assert 'Content truncated' not in result
class TestExtractContentFromHtml:
"""Tests for extract_content_from_html function."""
@patch('opentelemetry_documentation_mcp_server.util.BeautifulSoup')
@patch('opentelemetry_documentation_mcp_server.util.markdownify.markdownify')
def test_successful_extraction(self, mock_markdownify, mock_soup):
"""Test successful HTML content extraction."""
# Setup mocks
mock_soup_instance = mock_soup.return_value
mock_soup_instance.body = mock_soup_instance
mock_soup_instance.select_one.return_value = None # No main content found
# Setup mock markdownify return value
mock_markdownify.return_value = 'Test content'
# Call function
result = extract_content_from_html('<html><body><p>Test content</p></body></html>')
# Assertions
assert 'Test content' in result
mock_soup.assert_called_once()
mock_markdownify.assert_called_once()
@patch('opentelemetry_documentation_mcp_server.util.BeautifulSoup')
def test_empty_content(self, mock_soup):
"""Test extraction with empty content."""
# Call function with empty content
result = extract_content_from_html('')
# Assertions
assert result == '<e>Empty HTML content</e>'
mock_soup.assert_not_called()
@patch('opentelemetry_documentation_mcp_server.util.BeautifulSoup')
@patch('opentelemetry_documentation_mcp_server.util.markdownify.markdownify')
def test_extract_content_with_code_blocks(self, mock_markdownify, mock_soup):
"""Test extraction of HTML content with code blocks."""
# Create a simple HTML with code blocks
html_content = """
<html>
<body>
<div class="content">
<h1>OpenTelemetry Documentation</h1>
<p>Basic paragraph</p>
<pre><code class="language-javascript">
const { NodeSDK } = require('@opentelemetry/sdk-node');
const { getNodeAutoInstrumentations } = require('@opentelemetry/auto-instrumentations-node');
const sdk = new NodeSDK({
instrumentations: [getNodeAutoInstrumentations()]
});
sdk.start();
</code></pre>
</div>
</body>
</html>
"""
# Setup mock
mock_soup_instance = mock_soup.return_value
mock_soup_instance.body = mock_soup_instance
mock_soup_instance.select_one.return_value = None
# Setup mock markdownify return value
mock_markdownify.return_value = """# OpenTelemetry Documentation
Basic paragraph
```javascript
const { NodeSDK } = require('@opentelemetry/sdk-node');
const { getNodeAutoInstrumentations } = require('@opentelemetry/auto-instrumentations-node');
const sdk = new NodeSDK({
instrumentations: [getNodeAutoInstrumentations()]
});
sdk.start();
```
"""
# Call function
result = extract_content_from_html(html_content)
# Check that the OpenTelemetry heading is in the result
assert '# OpenTelemetry Documentation' in result
# Check that the code block is properly formatted
assert "```javascript" in result
assert "NodeSDK" in result
assert "getNodeAutoInstrumentations" in result
class TestParseSearchResults:
"""Tests for parse_search_results function."""
def test_empty_data(self):
"""Test parsing empty data."""
data = {}
results = parse_search_results(data)
assert results == []
def test_parse_search_results(self):
"""Test parsing search results from Google Custom Search API."""
# Mock response data from Google Custom Search API
data = {
"items": [
{
"title": "OpenTelemetry Overview",
"link": "https://opentelemetry.io/docs/concepts/observability-primer/",
"snippet": "OpenTelemetry is a collection of tools, APIs, and SDKs..."
},
{
"title": "Instrumentation",
"link": "https://opentelemetry.io/docs/instrumentation/",
"snippet": "Instrumentation is the act of adding observability code to an app..."
}
]
}
results = parse_search_results(data)
assert len(results) == 2
assert results[0].rank_order == 1
assert results[0].url == "https://opentelemetry.io/docs/concepts/observability-primer/"
assert results[0].title == "OpenTelemetry Overview"
assert results[0].context == "OpenTelemetry is a collection of tools, APIs, and SDKs..."
assert results[1].rank_order == 2
assert results[1].url == "https://opentelemetry.io/docs/instrumentation/"
assert results[1].title == "Instrumentation"
assert results[1].context == "Instrumentation is the act of adding observability code to an app..."