"""MCP Server implementation for WebScout."""
import json
import logging
import sys
from typing import Any
from .constants import DEFAULT_MAX_LENGTH, DEEPSEEK_API_KEY
from .tools import fetch, search_ddgs
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler(sys.stderr)],
)
def send_response(response: dict[str, Any]) -> None:
"""Send JSON-RPC response to stdout."""
output = json.dumps(response, ensure_ascii=False) + "\n"
sys.stdout.buffer.write(output.encode("utf-8"))
sys.stdout.buffer.flush()
def main() -> int:
"""Main entry point for the MCP server."""
logging.info("MCP WebScout Server starting")
try:
for line_bytes in sys.stdin.buffer:
line = line_bytes.decode("utf-8").strip()
if not line:
continue
try:
request = json.loads(line)
method = request.get("method")
req_id = request.get("id")
params = request.get("params", {})
# ===== initialize =====
if method == "initialize":
send_response({
"jsonrpc": "2.0",
"id": req_id,
"result": {
"protocolVersion": "2024-11-05",
"capabilities": {"tools": {}},
"serverInfo": {
"name": "mcp-webscout",
"version": "0.1.0",
},
},
})
# ===== tools/list =====
elif method == "tools/list":
send_response({
"jsonrpc": "2.0",
"id": req_id,
"result": {
"tools": [
{
"name": "search",
"description": "使用bing, brave, duckduckgo, google, grokipedia, mojeek, yandex, yahoo, wikipedia搜索引擎搜索网络信息",
"inputSchema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "搜索关键词",
},
"max_results": {
"type": "integer",
"default": 5,
"minimum": 1,
"maximum": 10,
"description": "最大结果数量",
},
},
"required": ["query"],
},
},
{
"name": "fetch",
"description": (
"使用 Crawl4AI 抓取网页内容,支持普通模式和 LLM 智能提取模式。"
"普通模式使用 Crawl4AI 直接抓取并生成 Markdown。"
"LLM 模式在抓取后使用 DeepSeek API 进行智能内容提取。"
"支持 JavaScript 渲染,适合抓取动态页面。"
),
"inputSchema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "要抓取的网页 URL",
},
"mode": {
"type": "string",
"enum": ["simple", "llm"],
"default": "simple",
"description": "抓取模式: simple=普通模式, llm=LLM智能提取",
},
"prompt": {
"type": "string",
"description": "LLM提取指令(仅mode=llm时有效)",
},
"model": {
"type": "string",
"default": "deepseek-chat",
"description": "LLM模型名称",
},
"api_key": {
"type": "string",
"description": "DeepSeek API密钥",
},
"max_length": {
"type": "integer",
"default": 10000,
"description": "最大返回字符数",
},
"use_proxy": {
"type": "boolean",
"default": True,
"description": "是否使用代理",
},
"js_render": {
"type": "boolean",
"default": True,
"description": "是否启用JS渲染",
},
"timeout": {
"type": "integer",
"default": 30,
"description": "超时时间(秒)",
},
},
"required": ["url"],
},
},
]
},
})
# ===== tools/call =====
elif method == "tools/call":
tool = params.get("name")
args = params.get("arguments", {})
# ----- search tool -----
if tool == "search":
try:
results = search_ddgs(
args.get("query", ""),
min(args.get("max_results", 5), 10)
)
if not results:
text = "未找到相关搜索结果。"
else:
text = f"找到 {len(results)} 条结果:\n\n"
for i, r in enumerate(results, 1):
text += f"{i}. 【{r['title']}】\n"
text += f" 🔗 {r['url']}\n"
if r["snippet"]:
text += f" 📝 {r['snippet'][:200]}\n"
text += "\n"
send_response({
"jsonrpc": "2.0",
"id": req_id,
"result": {
"content": [{"type": "text", "text": text}]
}
})
except Exception as e:
logging.exception("Search tool execution failed")
send_response({
"jsonrpc": "2.0",
"id": req_id,
"error": {
"code": -32603,
"message": f"搜索失败: {str(e)}"
}
})
# ----- fetch tool -----
elif tool == "fetch":
try:
# Get API key for LLM mode
api_key = args.get("api_key") or DEEPSEEK_API_KEY
mode = args.get("mode", "simple")
# Validate LLM mode requirements
if mode == "llm" and not api_key:
send_response({
"jsonrpc": "2.0",
"id": req_id,
"error": {
"code": -32603,
"message": (
"DeepSeek API key not configured. "
"Please set DEEPSEEK_API_KEY environment variable "
"or provide api_key in arguments."
)
}
})
continue
# Call fetch using asyncio.run since main() is sync
import asyncio
result = asyncio.run(fetch(
url=args.get("url", ""),
mode=mode,
prompt=args.get("prompt"),
model=args.get("model", "deepseek-chat"),
api_key=api_key,
max_length=args.get("max_length", 10000),
use_proxy=args.get("use_proxy", True),
js_render=args.get("js_render", True),
timeout=args.get("timeout", 30),
))
# Format response text
content_text = "# Fetch v2 结果\n\n"
content_text += f"**URL**: {args.get('url')}\n"
content_text += f"**模式**: {mode}\n"
content_text += f"**成功**: {'是' if result['success'] else '否'}\n"
if result.get('title'):
content_text += f"**标题**: {result['title']}\n"
if not result['success']:
content_text += f"**错误**: {result.get('error', 'Unknown error')}\n"
else:
# Add LLM extraction results
if mode == "llm" and "extracted" in result:
content_text += f"\n## LLM 提取结果\n\n"
if result.get("model"):
content_text += f"**模型**: {result['model']}\n"
if result.get("usage"):
content_text += f"**Token 使用**: {result['usage']['total_tokens']} "
content_text += f"(提示词: {result['usage']['prompt_tokens']}, "
content_text += f"生成: {result['usage']['completion_tokens']})\n"
content_text += f"\n```json\n"
content_text += json.dumps(result["extracted"], ensure_ascii=False, indent=2)
content_text += f"\n```\n"
# Add markdown content
if result.get("fit_markdown"):
content_text += f"\n## 精简内容\n\n"
content_text += result["fit_markdown"]
if result.get("markdown"):
content_text += f"\n## 完整内容\n\n"
content_text += result["markdown"][:args.get("max_length", 10000)]
if len(result["markdown"]) > args.get("max_length", 10000):
content_text += "\n\n[内容已截断,使用更大的 max_length 获取完整内容]"
send_response({
"jsonrpc": "2.0",
"id": req_id,
"result": {
"content": [{
"type": "text",
"text": content_text
}]
}
})
except Exception as e:
logging.exception("fetch tool execution failed")
send_response({
"jsonrpc": "2.0",
"id": req_id,
"error": {
"code": -32603,
"message": f"fetch 执行失败: {str(e)}"
}
})
else:
send_response({
"jsonrpc": "2.0",
"id": req_id,
"error": {
"code": -32601,
"message": f"Unknown tool: {tool}"
}
})
except json.JSONDecodeError as e:
logging.error(f"JSON parse failed: {e}")
send_response({
"jsonrpc": "2.0",
"id": None,
"error": {
"code": -32700,
"message": f"JSON parse error: {e}"
}
})
except Exception as e:
logging.exception("Request processing failed")
send_response({
"jsonrpc": "2.0",
"id": None,
"error": {
"code": -32603,
"message": f"Internal error: {e}"
}
})
except KeyboardInterrupt:
logging.info("MCP server shutting down")
except Exception as e:
logging.exception("Server error")
return 1
return 0