tool_discovery.py•9.14 kB
"""File-based tool discovery system for on-demand loading.
Following Anthropic's MCP architecture recommendations:
- Organize tools in filesystem hierarchy
- Load tool definitions on-demand
- Implement search_tools capability with detail levels
- Reduce token consumption by 98.7% (150,000 → 2,000 tokens)
"""
import logging
from pathlib import Path
from typing import Any, Literal
from dataclasses import dataclass
from mcp.types import Tool
import json
logger = logging.getLogger(__name__)
DetailLevel = Literal["minimal", "standard", "full"]
@dataclass
class ToolMetadata:
"""Lightweight tool metadata for search results."""
name: str
category: str
description: str
file_path: Path
class ToolDiscoverySystem:
"""File-based tool discovery with on-demand loading."""
def __init__(self, tools_dir: Path | None = None):
"""Initialize tool discovery system.
Args:
tools_dir: Directory containing tool definitions (default: ./tools/)
"""
self.tools_dir = tools_dir or Path("tools")
self._tool_cache: dict[str, Tool] = {}
self._metadata_cache: dict[str, ToolMetadata] = {}
self._initialize_metadata()
def _initialize_metadata(self) -> None:
"""Initialize lightweight metadata index for all tools."""
if not self.tools_dir.exists():
logger.warning(f"Tools directory not found: {self.tools_dir}")
self._load_default_tools()
return
for tool_file in self.tools_dir.rglob("*.json"):
try:
with open(tool_file) as f:
data = json.load(f)
metadata = ToolMetadata(
name=data["name"],
category=data.get("category", "general"),
description=data.get("description", ""),
file_path=tool_file,
)
self._metadata_cache[metadata.name] = metadata
logger.debug(f"Indexed tool: {metadata.name}")
except Exception as e:
logger.error(f"Failed to index tool {tool_file}: {e}")
def _load_default_tools(self) -> None:
"""Load default tool metadata when no tools directory exists."""
default_tools = [
ToolMetadata(
name="delegate_task",
category="orchestration",
description="Delegate a coding task to appropriate AI agent",
file_path=Path("tools/orchestration/delegate_task.json"),
),
ToolMetadata(
name="list_orchestrators",
category="orchestration",
description="List available orchestrators and their status",
file_path=Path("tools/orchestration/list_orchestrators.json"),
),
ToolMetadata(
name="get_statistics",
category="monitoring",
description="Get delegation statistics and metrics",
file_path=Path("tools/monitoring/get_statistics.json"),
),
]
for metadata in default_tools:
self._metadata_cache[metadata.name] = metadata
def search_tools(
self,
query: str | None = None,
category: str | None = None,
detail: DetailLevel = "minimal",
) -> list[dict[str, Any]]:
"""Search tools with configurable detail level.
Args:
query: Search query to match against tool names/descriptions
category: Filter by tool category
detail: Level of detail to return
- minimal: name + category only (lowest token cost)
- standard: + description
- full: + complete schema (highest token cost)
Returns:
List of tool information at requested detail level
"""
results = []
for name, metadata in self._metadata_cache.items():
# Apply filters
if category and metadata.category != category:
continue
if query and query.lower() not in name.lower() and query.lower() not in metadata.description.lower():
continue
# Build result based on detail level
if detail == "minimal":
results.append({
"name": name,
"category": metadata.category,
})
elif detail == "standard":
results.append({
"name": name,
"category": metadata.category,
"description": metadata.description,
})
else: # full
tool = self.load_tool(name)
if tool:
results.append({
"name": name,
"category": metadata.category,
"description": tool.description,
"inputSchema": tool.inputSchema,
})
logger.info(f"Tool search: query={query}, category={category}, detail={detail}, results={len(results)}")
return results
def load_tool(self, name: str) -> Tool | None:
"""Load complete tool definition on-demand.
Args:
name: Tool name
Returns:
Complete Tool object with schema, or None if not found
"""
# Check cache first
if name in self._tool_cache:
logger.debug(f"Tool cache hit: {name}")
return self._tool_cache[name]
# Load from file
metadata = self._metadata_cache.get(name)
if not metadata:
logger.warning(f"Tool not found: {name}")
return None
# If file doesn't exist, create tool from metadata (for default tools)
if not metadata.file_path.exists():
tool = self._create_default_tool(name)
if tool:
self._tool_cache[name] = tool
return tool
try:
with open(metadata.file_path) as f:
data = json.load(f)
tool = Tool(
name=data["name"],
description=data.get("description", ""),
inputSchema=data.get("inputSchema", {"type": "object", "properties": {}}),
)
self._tool_cache[name] = tool
logger.debug(f"Loaded tool from file: {name}")
return tool
except Exception as e:
logger.error(f"Failed to load tool {name}: {e}")
return None
def _create_default_tool(self, name: str) -> Tool | None:
"""Create default tool definitions for backward compatibility."""
if name == "delegate_task":
return Tool(
name="delegate_task",
description="Route task to specialist agent or confirm orchestrator should handle directly. Always call BEFORE code work to get routing guidance.",
inputSchema={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Full user request/task to route",
},
"orchestrator": {
"type": "string",
"description": "Override primary orchestrator",
},
"force_delegate": {
"type": "string",
"description": "Force delegation to specific agent",
},
"guidance_only": {
"type": "boolean",
"description": "Return routing guidance without executing (default: false)",
"default": False,
},
},
"required": ["query"],
},
)
elif name == "list_orchestrators":
return Tool(
name="list_orchestrators",
description="List available orchestrators and their status",
inputSchema={"type": "object", "properties": {}},
)
elif name == "get_statistics":
return Tool(
name="get_statistics",
description="Get delegation statistics and metrics",
inputSchema={"type": "object", "properties": {}},
)
return None
def list_categories(self) -> list[str]:
"""List all available tool categories."""
categories = {metadata.category for metadata in self._metadata_cache.values()}
return sorted(categories)
def get_tool_count(self) -> dict[str, int]:
"""Get tool count by category."""
counts: dict[str, int] = {}
for metadata in self._metadata_cache.values():
counts[metadata.category] = counts.get(metadata.category, 0) + 1
return counts