"""Tools for searching and querying MkDocs documentation."""
import logging
import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
import aiofiles
import frontmatter
from mcp.types import Tool, CallToolResult, TextContent
logger = logging.getLogger(__name__)
class DocumentationToolManager:
"""Manages tools for searching and querying MkDocs documentation."""
def __init__(self, docs_path: Path) -> None:
"""Initialize the tool manager.
Args:
docs_path: Path to the documentation directory
"""
self.docs_path = docs_path
async def list_tools(self) -> List[Tool]:
"""List all available documentation tools.
Returns:
List of Tool objects
"""
return [
Tool(
name="search_docs",
description="Search through documentation content for specific terms or phrases",
inputSchema={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query string"
},
"case_sensitive": {
"type": "boolean",
"description": "Whether search should be case sensitive",
"default": False
},
"max_results": {
"type": "integer",
"description": "Maximum number of results to return",
"default": 10,
"minimum": 1,
"maximum": 50
}
},
"required": ["query"]
}
),
Tool(
name="find_by_title",
description="Find documentation pages by title or heading",
inputSchema={
"type": "object",
"properties": {
"title": {
"type": "string",
"description": "Title or heading to search for"
},
"exact_match": {
"type": "boolean",
"description": "Whether to match exactly or use fuzzy matching",
"default": False
}
},
"required": ["title"]
}
),
Tool(
name="list_pages",
description="List all available documentation pages with their metadata",
inputSchema={
"type": "object",
"properties": {
"include_content": {
"type": "boolean",
"description": "Whether to include content preview",
"default": False
},
"pattern": {
"type": "string",
"description": "Optional filename pattern to filter pages"
}
},
"required": []
}
),
Tool(
name="get_page_outline",
description="Get the outline/structure of a documentation page",
inputSchema={
"type": "object",
"properties": {
"page_path": {
"type": "string",
"description": "Relative path to the documentation page"
}
},
"required": ["page_path"]
}
),
Tool(
name="search_code_blocks",
description="Search for code examples and snippets in documentation",
inputSchema={
"type": "object",
"properties": {
"language": {
"type": "string",
"description": "Programming language to filter by (optional)"
},
"query": {
"type": "string",
"description": "Search term within code blocks (optional)"
}
},
"required": []
}
)
]
async def call_tool(self, name: str, arguments: Dict[str, Any]) -> CallToolResult:
"""Call a documentation tool.
Args:
name: Tool name to call
arguments: Tool arguments
Returns:
CallToolResult with the tool output
Raises:
ValueError: If the tool name is not recognized
"""
logger.debug(f"Calling tool {name} with arguments: {arguments}")
if name == "search_docs":
return await self._search_docs(**arguments)
elif name == "find_by_title":
return await self._find_by_title(**arguments)
elif name == "list_pages":
return await self._list_pages(**arguments)
elif name == "get_page_outline":
return await self._get_page_outline(**arguments)
elif name == "search_code_blocks":
return await self._search_code_blocks(**arguments)
else:
raise ValueError(f"Unknown tool: {name}")
async def _search_docs(
self,
query: str,
case_sensitive: bool = False,
max_results: int = 10
) -> CallToolResult:
"""Search through documentation content."""
logger.info(f"Searching docs for: {query}")
if not self.docs_path.exists():
return CallToolResult(
content=[TextContent(
type="text",
text="Documentation directory not found."
)],
isError=True
)
results = []
pattern = re.compile(re.escape(query), re.IGNORECASE if not case_sensitive else 0)
for md_file in self.docs_path.rglob("*.md"):
try:
async with aiofiles.open(md_file, 'r', encoding='utf-8') as f:
content = await f.read()
# Parse frontmatter
post = frontmatter.loads(content)
body = post.content
metadata = post.metadata
# Search in content
matches = list(pattern.finditer(body))
if matches:
relative_path = md_file.relative_to(self.docs_path)
title = metadata.get('title', md_file.stem.replace('-', ' ').title())
# Get context around matches
contexts = []
for match in matches[:3]: # Limit to first 3 matches per file
start = max(0, match.start() - 100)
end = min(len(body), match.end() + 100)
context = body[start:end].replace('\n', ' ')
contexts.append(f"...{context}...")
results.append({
'file': str(relative_path),
'title': title,
'matches': len(matches),
'contexts': contexts
})
if len(results) >= max_results:
break
except Exception as e:
logger.error(f"Error searching {md_file}: {e}")
continue
if not results:
result_text = f"No results found for '{query}'"
else:
result_text = f"Found {len(results)} results for '{query}':\n\n"
for result in results:
result_text += f"**{result['title']}** ({result['file']})\n"
result_text += f" - {result['matches']} match(es)\n"
for context in result['contexts']:
result_text += f" - {context}\n"
result_text += "\n"
return CallToolResult(
content=[TextContent(type="text", text=result_text)],
isError=False
)
async def _find_by_title(self, title: str, exact_match: bool = False) -> CallToolResult:
"""Find documentation pages by title."""
logger.info(f"Finding pages by title: {title}")
if not self.docs_path.exists():
return CallToolResult(
content=[TextContent(
type="text",
text="Documentation directory not found."
)],
isError=True
)
results = []
search_pattern = re.compile(
re.escape(title) if exact_match else title,
re.IGNORECASE
)
for md_file in self.docs_path.rglob("*.md"):
try:
async with aiofiles.open(md_file, 'r', encoding='utf-8') as f:
content = await f.read()
post = frontmatter.loads(content)
metadata = post.metadata
body = post.content
# Check title in frontmatter
page_title = metadata.get('title', '')
if search_pattern.search(page_title):
relative_path = md_file.relative_to(self.docs_path)
results.append({
'file': str(relative_path),
'title': page_title,
'match_type': 'frontmatter'
})
continue
# Check headings in content
heading_matches = re.findall(r'^#+\s+(.+)$', body, re.MULTILINE)
for heading in heading_matches:
if search_pattern.search(heading):
relative_path = md_file.relative_to(self.docs_path)
results.append({
'file': str(relative_path),
'title': heading,
'match_type': 'heading'
})
break # Only first match per file
except Exception as e:
logger.error(f"Error processing {md_file}: {e}")
continue
if not results:
result_text = f"No pages found with title matching '{title}'"
else:
result_text = f"Found {len(results)} pages matching '{title}':\n\n"
for result in results:
result_text += f"**{result['title']}** ({result['file']})\n"
result_text += f" - Match type: {result['match_type']}\n\n"
return CallToolResult(
content=[TextContent(type="text", text=result_text)],
isError=False
)
async def _list_pages(
self,
include_content: bool = False,
pattern: Optional[str] = None
) -> CallToolResult:
"""List all documentation pages."""
logger.info("Listing documentation pages")
if not self.docs_path.exists():
return CallToolResult(
content=[TextContent(
type="text",
text="Documentation directory not found."
)],
isError=True
)
results = []
pattern_re = re.compile(pattern, re.IGNORECASE) if pattern else None
for md_file in sorted(self.docs_path.rglob("*.md")):
try:
relative_path = md_file.relative_to(self.docs_path)
# Apply pattern filter if provided
if pattern_re and not pattern_re.search(str(relative_path)):
continue
async with aiofiles.open(md_file, 'r', encoding='utf-8') as f:
content = await f.read()
post = frontmatter.loads(content)
metadata = post.metadata
body = post.content
title = metadata.get('title')
if not title:
# Extract from first heading
match = re.search(r'^#\s+(.+)$', body, re.MULTILINE)
title = match.group(1) if match else md_file.stem.replace('-', ' ').title()
description = metadata.get('description', '')
page_info = {
'file': str(relative_path),
'title': title,
'description': description,
'size': len(content)
}
if include_content:
# Add content preview
preview = body[:200] + "..." if len(body) > 200 else body
page_info['preview'] = preview
results.append(page_info)
except Exception as e:
logger.error(f"Error processing {md_file}: {e}")
continue
if not results:
result_text = "No documentation pages found"
if pattern:
result_text += f" matching pattern '{pattern}'"
else:
result_text = f"Found {len(results)} documentation pages:\n\n"
for page in results:
result_text += f"**{page['title']}** ({page['file']})\n"
if page['description']:
result_text += f" - {page['description']}\n"
result_text += f" - Size: {page['size']} characters\n"
if include_content and 'preview' in page:
result_text += f" - Preview: {page['preview']}\n"
result_text += "\n"
return CallToolResult(
content=[TextContent(type="text", text=result_text)],
isError=False
)
async def _get_page_outline(self, page_path: str) -> CallToolResult:
"""Get the outline of a documentation page."""
logger.info(f"Getting outline for page: {page_path}")
file_path = self.docs_path / page_path
if not file_path.exists():
return CallToolResult(
content=[TextContent(
type="text",
text=f"Page not found: {page_path}"
)],
isError=True
)
try:
async with aiofiles.open(file_path, 'r', encoding='utf-8') as f:
content = await f.read()
post = frontmatter.loads(content)
body = post.content
# Extract all headings
headings = []
for match in re.finditer(r'^(#+)\s+(.+)$', body, re.MULTILINE):
level = len(match.group(1))
title = match.group(2)
headings.append((level, title))
if not headings:
result_text = f"No headings found in {page_path}"
else:
result_text = f"Outline for {page_path}:\n\n"
for level, title in headings:
indent = " " * (level - 1)
result_text += f"{indent}- {title}\n"
return CallToolResult(
content=[TextContent(type="text", text=result_text)],
isError=False
)
except Exception as e:
logger.error(f"Error reading {file_path}: {e}")
return CallToolResult(
content=[TextContent(
type="text",
text=f"Error reading page: {e}"
)],
isError=True
)
async def _search_code_blocks(
self,
language: Optional[str] = None,
query: Optional[str] = None
) -> CallToolResult:
"""Search for code blocks in documentation."""
logger.info(f"Searching code blocks - language: {language}, query: {query}")
if not self.docs_path.exists():
return CallToolResult(
content=[TextContent(
type="text",
text="Documentation directory not found."
)],
isError=True
)
results = []
# Pattern to match fenced code blocks
code_pattern = re.compile(r'```(\w*)\n(.*?)\n```', re.DOTALL)
query_pattern = re.compile(re.escape(query), re.IGNORECASE) if query else None
for md_file in self.docs_path.rglob("*.md"):
try:
async with aiofiles.open(md_file, 'r', encoding='utf-8') as f:
content = await f.read()
post = frontmatter.loads(content)
body = post.content
# Find all code blocks
for match in code_pattern.finditer(body):
block_lang = match.group(1) or "text"
code_content = match.group(2)
# Filter by language if specified
if language and block_lang.lower() != language.lower():
continue
# Filter by query if specified
if query_pattern and not query_pattern.search(code_content):
continue
relative_path = md_file.relative_to(self.docs_path)
# Get surrounding context
start_pos = match.start()
lines_before = body[:start_pos].split('\n')
context_line = len(lines_before)
results.append({
'file': str(relative_path),
'language': block_lang,
'content': code_content[:300] + "..." if len(code_content) > 300 else code_content,
'line': context_line,
'full_content': code_content
})
except Exception as e:
logger.error(f"Error processing {md_file}: {e}")
continue
if not results:
filter_desc = []
if language:
filter_desc.append(f"language '{language}'")
if query:
filter_desc.append(f"query '{query}'")
result_text = "No code blocks found"
if filter_desc:
result_text += f" matching {' and '.join(filter_desc)}"
else:
result_text = f"Found {len(results)} code blocks:\n\n"
for result in results:
result_text += f"**{result['file']}** (line ~{result['line']})\n"
result_text += f"Language: {result['language']}\n"
result_text += f"```{result['language']}\n{result['content']}\n```\n\n"
return CallToolResult(
content=[TextContent(type="text", text=result_text)],
isError=False
)