DevLens MCP

server.py•16.4 KiB

"""WebDocx MCP Server. An MCP server for web search, scraping, and documentation gathering. """ from fastmcp import FastMCP from devlens.tools.search import search_web from devlens.tools.scraper import scrape_url, crawl_docs from devlens.tools.research import deep_dive, summarize_page from devlens.tools.advanced import ( compare_sources, find_related, extract_links, monitor_changes, ) from devlens.utils.orchestrator import ( suggest_tools, classify_intent, ResearchContext, ) # Create MCP server mcp = FastMCP( name="webdocx", instructions="""Web search, scraping, and documentation gathering for LLMs. Provides comprehensive web research capabilities including: - Search: DuckDuckGo web search with filters - Scraping: Extract clean Markdown from any URL - Research: Multi-source topic research with aggregation - Crawling: Follow links to build documentation collections - Analysis: Compare sources, find related content, extract links - Monitoring: Track page changes over time """, ) # Register tools @mcp.tool() async def tool_search_web(query: str, limit: int = 5) -> list[dict]: """Search the web using DuckDuckGo. Args: query: Search query string. limit: Maximum results (1-20, default 5). Returns: List of results with title, url, snippet. """ return await search_web(query, limit) @mcp.tool() async def tool_scrape_url(url: str) -> str: """Scrape content from a URL as Markdown. Args: url: URL to scrape. Returns: Markdown content with source attribution. """ return await scrape_url(url) @mcp.tool() async def tool_crawl_docs(root_url: str, max_pages: int = 5) -> str: """Crawl multi-page documentation. Follows same-domain links to build combined docs. Args: root_url: Starting URL. max_pages: Max pages to crawl (1-20, default 5). Returns: Combined Markdown with table of contents. """ return await crawl_docs(root_url, max_pages) @mcp.tool() async def tool_deep_dive(topic: str, depth: int = 3) -> str: """Research a topic from multiple sources. Searches and scrapes multiple pages to build a report. Args: topic: Topic to research. depth: Number of sources (1-10, default 3). Returns: Aggregated research report. """ return await deep_dive(topic, depth) @mcp.tool() async def tool_summarize_page(url: str) -> str: """Get a quick overview of a page. Extracts headings and key sections. Args: url: URL to summarize. Returns: Page summary with sections. """ return await summarize_page(url) @mcp.tool() async def tool_compare_sources(topic: str, sources: list[str]) -> str: """Compare information across multiple sources. Analyzes differences and similarities between sources. Args: topic: Topic being compared. sources: List of URLs (2-5) to compare. Returns: Comparison report with common topics and differences. """ return await compare_sources(topic, sources) @mcp.tool() async def tool_find_related(url: str, limit: int = 5) -> str: """Find pages related to a given URL. Uses the page content to discover similar resources. Args: url: Base URL to find related content for. limit: Max related pages (1-10, default 5). Returns: List of related pages with descriptions. """ return await find_related(url, limit) @mcp.tool() async def tool_extract_links(url: str, filter_external: bool = True) -> str: """Extract all links from a page. Useful for discovering navigation structure and resources. Args: url: URL to extract links from. filter_external: Only return same-domain links (default True). Returns: Organized list of internal and external links. """ return await extract_links(url, filter_external=filter_external) @mcp.tool() async def tool_monitor_changes(url: str, previous_hash: str | None = None) -> str: """Check if a page has changed. Tracks content modifications over time. Args: url: URL to monitor. previous_hash: Previous content hash to compare against. Returns: Change detection report with content hash. """ return await monitor_changes(url, previous_hash) @mcp.tool() def tool_suggest_workflow(query: str, known_urls: list[str] = None) -> dict: """Suggest optimal research workflow for a query. Analyzes the query and recommends the best tools and workflow to answer it. Uses smart intent classification and dynamic workflow generation. Args: query: Research question or task description. known_urls: Optional list of already known URLs (default None). Returns: Dictionary with intent, workflow steps, and suggested parameters. """ # Build context from known URLs context = ResearchContext() if known_urls: context.known_urls = known_urls # Get workflow suggestions result = suggest_tools(query, context) return result @mcp.tool() def tool_classify_research_intent(query: str) -> dict: """Classify the research intent of a query. Analyzes a query to determine the user's research goal (quick answer, deep research, documentation, comparison, discovery, or monitoring). Returns confidence scores for each detected intent. Args: query: Research question or task description. Returns: Dictionary with primary and secondary intents with confidence scores. """ intent_scores = classify_intent(query) return { "primary_intent": { "type": intent_scores[0].intent.value, "confidence": intent_scores[0].confidence, "reasons": intent_scores[0].reasons, "keywords": intent_scores[0].keywords_matched, }, "secondary_intents": [ { "type": score.intent.value, "confidence": score.confidence, "reasons": score.reasons, "keywords": score.keywords_matched, } for score in intent_scores[1:3] ] if len(intent_scores) > 1 else [], } @mcp.tool() def get_server_docs(topic: str = "overview") -> str: """Get documentation about the WebDocx MCP server. Provides guidance on server capabilities, tool usage, workflows, and best practices. Args: topic: Documentation topic - 'overview', 'tools', 'workflows', 'orchestration', or 'examples' Returns: Formatted documentation for the requested topic. """ docs = { "overview": """ # WebDocx MCP Server MCP server for intelligent web research. 12 tools in 3 layers. ## Tools Primitives: search_web, scrape_url, crawl_docs, summarize_page, extract_links Composed: deep_dive, compare_sources, find_related, monitor_changes Meta: suggest_workflow, classify_research_intent, get_server_docs ## Design - Composable: small tools combine powerfully - Smart: auto-orchestration via suggest_workflow - Efficient: Markdown output, token-optimized - Context-aware: workflows adapt to research state ## Usage search_web → scrape_url (simple) suggest_workflow (auto-recommends) deep_dive (multi-source aggregation) ## Topics tools, philosophy, workflows, orchestration, examples """, "tools": """ # Tools ## Primitives (fast, focused) search_web(query, limit=5) - DuckDuckGo search, returns [{title,url,snippet}] scrape_url(url) - Extract clean Markdown with metadata summarize_page(url) - Headings only, triage before full scrape extract_links(url, filter_external=True) - Categorize internal/external links crawl_docs(root_url, max_pages=5) - Follow links, aggregate docs with TOC ## Composed (workflows) deep_dive(topic, depth=3) - Search + parallel scraping + aggregation compare_sources(topic, sources) - Analyze consensus/differences across 2-5 URLs find_related(url, limit=5) - Discover similar resources via content analysis monitor_changes(url, previous_hash) - Track content changes via hashing ## Meta (intelligence) suggest_workflow(query, known_urls=[]) - Auto-recommend optimal tool sequence classify_research_intent(query) - Detect research goal (7 patterns) get_server_docs(topic) - This documentation ## Guidelines - Simple: search_web → scrape_url - Complex: suggest_workflow → follow steps - Multi-source: deep_dive - Provide known_urls to skip search - summarize_page before expensive scrape """, "workflows": """ # Workflows Quick: search_web → scrape_url Deep: search_web(limit=10) → deep_dive(depth=5) Docs: search_web → crawl_docs(max_pages=25) Compare: search_web → scrape_url(parallel) → compare_sources Discover: search_web → find_related → extract_links Monitor: monitor_changes(url, prev_hash) Smart: suggest_workflow → follow steps """, "orchestration": """ # Orchestration 7 Intents: quick_answer, deep_research, documentation, comparison, discovery, monitoring, validation Adapts to: - Intent confidence - Known URLs (skips search) - Failed tools (fallbacks) - Search history (adjusts limits) Parameter optimization: - Quick: limit=3, max_pages=5 - Deep: limit=10, depth=comprehensive, max_pages=100 - Docs: max_pages=25, filter_external=False Context tracks: known_urls, failed_tools, search_attempts, previous_results Example: suggest_workflow("integrate API?") → quick_answer, [search_web(3), scrape_url] suggest_workflow("API docs", ["url"]) → documentation, [crawl_docs(25)] [skips search] """, "examples": """ # Examples API Integration: search_web("LidgiCash API") → scrape_url → deep_dive(depth=3) Framework Compare: search_web("FastAPI vs Flask") → scrape_url(parallel) → compare_sources Learn Docs: search_web("FastAPI docs") → crawl_docs(max_pages=50) Find Alternatives: search_web("Stripe alternatives") → find_related(limit=10) → summarize_page Smart Research: suggest_workflow("mobile payments Africa") → follow steps Best: - suggest_workflow when uncertain - Provide known_urls to skip search - summarize_page before full scrape - compare_sources for 2-5 sources """, "philosophy": """ # Design Philosophy & Developer Mindset ## Core Principles ### 1. Composability Over Complexity Build small, focused tools that combine powerfully rather than monolithic solutions. **Why it matters:** - Easier to test, debug, maintain - Users compose workflows, not forced into rigid patterns - New capabilities emerge from combinations **Example:** ```python # Bad: One giant tool research_everything(query, mode="deep", compare=True, monitor=True) # Good: Composable primitives search_web(query) → scrape_url(top_result) → find_related(url) ``` ### 2. Intelligence at the Edges Put smarts in orchestration layer, keep primitives simple and predictable. **Why it matters:** - Primitives remain reliable, testable, fast - Intelligence adapts without breaking core tools - Users choose: manual control or auto-orchestration **Architecture:** ``` Meta Layer (suggest_workflow) ← Smart decisions here ↓ Composed Tools (deep_dive) ← Convenience combinations ↓ Primitives (search, scrape) ← Dumb, fast, reliable ``` ### 3. Optimize for LLM Token Economy Markdown output is not just formatting—it's an optimization strategy. **Why it matters:** - 50-70% smaller than HTML - Preserves semantic structure (headings, lists, links) - Directly consumable by LLMs without parsing **Design choices:** - Strip boilerplate (nav, footer, ads) - Preserve code blocks with syntax hints - Keep attribution (source URLs) - Nested lists → flat hierarchy where possible ### 4. Fail Fast, Fail Clearly Errors are data. Surface them immediately with actionable context. **Why it matters:** - Silent failures waste time and tokens - Clear errors enable self-correction - Partial success > complete failure **Error handling:** ```python # Bad: Silent degradation results = [scrape(url) for url in urls] # Some might be None # Good: Explicit failure reporting results = [] for url in urls: try: results.append(scrape(url)) except ScraperError as e: results.append({"error": str(e), "url": url}) ``` ### 5. Developer Velocity First Ship fast, iterate based on real usage, avoid premature optimization. **Decisions made:** - DuckDuckGo (no API keys) over Google (auth complexity) - Sync + threadpool over async (simpler, good enough) - Content hashing over diff algorithms (fast, 80% solution) - Python stdlib where possible (fewer deps) **When to optimize:** - After measuring actual bottlenecks - When users hit real limits (not theoretical) - If it doesn't add complexity tax ### 6. Context is King Track research state, adapt workflows, avoid redundant work. **Why it matters:** - User provides URL → skip search - Previous tool failed → use fallback - Multiple searches → increase limit **ResearchContext tracks:** - known_urls: Don't search for what you have - failed_tools: Don't retry what doesn't work - search_attempts: Escalate if not finding results - previous_results: Learn from success patterns ### 7. Batteries Included, Escape Hatches Available Work out-of-box for 80% case, allow customization for power users. **Defaults are opinionated:** - limit=5 (fast, usually enough) - depth=3 (balance speed vs thoroughness) - filter_external=True (stay on topic) - region="wt-wt" (global results) **But everything is tunable:** ```python # Beginner: Just works search_web("FastAPI") # Power user: Full control search_web("FastAPI", limit=20, region="us-en", safesearch="off") ``` ## Anti-Patterns Avoided ### ❌ Feature Bloat Every tool must justify its existence. If two tools overlap 70%+, merge or remove one. ### ❌ Magic Configuration No config files, environment variables, or setup scripts. Works immediately after install. ### ❌ Abstract Interfaces No "Strategy Pattern" with 5 implementations. Concrete tools with clear purpose. ### ❌ Premature Generalization Solve the problem at hand. Don't build for "future requirements" that may never exist. ### ❌ Hidden State Tools are stateless. ResearchContext is explicit parameter, not global variable. ## Performance Philosophy **"Fast enough" beats "theoretically optimal"** - 2s scrape is fine, 200ms not worth complexity - Parallelization when trivial (threadpool), not when hard (async rewrite) - Cache when obvious (content hashing), not by default **Measure before optimizing:** - Actual bottleneck: network I/O (not our code) - User tolerance: 30s for deep_dive is acceptable - Token cost > compute cost in LLM context ## Success Metrics Good design enables: 1. **New users productive in <5 minutes** (suggest_workflow) 2. **Power users not frustrated** (full parameter control) 3. **Composable for automation** (stateless, predictable) 4. **Maintainable codebase** (small tools, clear boundaries) 5. **Extensible without forking** (add tools, don't modify existing) ## When to Break Rules Rules are guidelines, not laws. Break them when: - User data at risk (add complexity for security) - Silent failure would corrupt results (add validation) - Performance blocks real usage (optimize hot paths) - Alternative is genuinely simpler (challenge assumptions) **But document why:** ```python # Breaking "no config" rule here because: # - 100+ page crawls OOM on default limits # - Too expensive to make dynamic # - Power user feature, not beginner concern MAX_CRAWL_PAGES = int(os.getenv("WEBDOCX_MAX_PAGES", "100")) ``` ## Evolution Strategy **Add, don't modify:** - New tool > changing existing tool behavior - New parameter > changing default - New workflow pattern > forcing migration **Deprecate gracefully:** - Warning for 2 versions before removal - Provide migration path - Document why (not just "use X instead") **Learn from usage:** - Which tools are never used? (remove) - Which combinations repeat? (compose) - Which errors are common? (improve messages) This isn't academic computer science—it's pragmatic engineering for real problems. """, } topic_lower = topic.lower() if topic_lower in docs: return docs[topic_lower] else: available = ", ".join(sorted(docs.keys())) return f"Unknown topic '{topic}'. Available topics: {available}\n\nRecommended reading order:\n1. overview - Start here for capabilities\n2. philosophy - Understand the design mindset\n3. tools - Deep dive into each tool\n4. workflows - Common usage patterns\n5. orchestration - Smart automation\n6. examples - Real-world scenarios" def main(): """Run the MCP server.""" mcp.run() if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Y4NN777/devlens-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

server.py•16.4 KiB