Glama
DuckDuckGo Web Search MCP Server

from mcp.server.fastmcp import FastMCP
from dotenv import load_dotenv
import httpx
from bs4 import BeautifulSoup
import asyncio


# Initialize FastMCP and load environment variables
mcp = FastMCP("search")
load_dotenv()

USER_AGENT = "search-app/1.0"
DUCKDUCKGO_URL = "https://html.duckduckgo.com/html/"

async def search_duckduckgo(query: str, limit: int) -> list:
    """Fetch search results from DuckDuckGo"""
    try:
        # Format query for URL
        formatted_query = query.replace(" ", "+")
        url = f"{DUCKDUCKGO_URL}?q={formatted_query}"
        
        # Set headers to avoid blocking
        headers = {
            "User-Agent": USER_AGENT,
            "Content-Type": "application/json",
        }
        
        async with httpx.AsyncClient() as client:
            response = await client.get(url, headers=headers, timeout=30.0)
            response.raise_for_status()
            
            # Parse HTML response
            soup = BeautifulSoup(response.text, "html.parser")
            result_elements = soup.select('.result__body')
            
            # Extract results up to limit
            results = []
            for result in result_elements[:limit]:
                title_elem = result.select_one('.result__a')
                url_elem = result.select_one('.result__url')
                snippet_elem = result.select_one('.result__snippet')
                
                if title_elem and url_elem:
                    result_dict = {
                        "title": title_elem.get_text().strip(),
                        "url": url_elem.get_text().strip(),
                        "snippet": snippet_elem.get_text().strip() if snippet_elem else ""
                    }
                    results.append(result_dict)
            
            return results
            
    except httpx.TimeoutException:
        return [{"error": "Request timed out"}]
    except Exception as e:
        return [{"error": f"Search failed: {str(e)}"}]
    

async def fetch_url(url: str):
    jina_timeout = 15.0
    raw_html_timeout = 5.0
    url = f"https://r.jina.ai/{url}"
    async with httpx.AsyncClient() as client:
        try:
            print(f"fetching result from\n{url}")
            response = await client.get(url, timeout=jina_timeout)
            """ using jina api to convert html to markdown """
            text = response.text
            return text
        except httpx.TimeoutException:
            try:
                print("Jina API timed out, fetching raw HTML...")
                response = await client.get(url, timeout=raw_html_timeout)
                """ using raw html """
                soup = BeautifulSoup(response.text, "html.parser")
                text = soup.get_text()
                return text
            except httpx.TimeoutException:
                return "Timeout error"

@mcp.tool()
async def search_and_fetch(query: str, limit: int = 3):
    """
    Search the web using DuckDuckGo and return results.

    Args:
        query: The search query string
        limit: Maximum number of results to return (default: 3, maximum 10)

    Returns:
        List of dictionaries containing 
        - title
        - url
        - snippet 
        - summary markdown (empty if not available)
    """
    if not isinstance(query, str) or not query.strip():
        raise ValueError("Query must be a non-empty string")
    
    if not isinstance(limit, int) or limit < 1:
        raise ValueError("Limit must be a positive integer")
    
    # Cap limit at reasonable maximum
    limit = min(limit, 10)
    
    results = await search_duckduckgo(query, limit)
    
    if not results:
        return [{"message": f"No results found for '{query}'"}]
    
    # Create a list of fetch_url coroutines
    fetch_tasks = [fetch_url(item["url"]) for item in results]
    
    # Execute all fetch requests in parallel and wait for results
    summaries = await asyncio.gather(*fetch_tasks)
    
    # Assign summaries to their respective result items
    for item, summary in zip(results, summaries):
        item["summary"] = summary
    
    return results

# @mcp.tool()
async def search(query: str, limit: int = 3):
    """
    Search the web using DuckDuckGo and return results without scraping.

    Args:
        query: The search query string
        limit: Maximum number of results to return (default: 3, maximum 10)

    Returns:
        List of dictionaries containing 
        - title
        - url
        - snippet 
    """
    if not isinstance(query, str) or not query.strip():
        raise ValueError("Query must be a non-empty string")
    
    if not isinstance(limit, int) or limit < 1:
        raise ValueError("Limit must be a positive integer")
    
    # Cap limit at reasonable maximum
    limit = min(limit, 10)
    
    results = await search_duckduckgo(query, limit)
    
    if not results:
        return [{"message": f"No results found for '{query}'"}]
    
    return results

@mcp.tool()
async def fetch(url: str):
    """
    scrape the html content and return the markdown format using jina api.

    Args:
        url: The search query string

    Returns:
        text : html in markdown format 
    """
    if not isinstance(url, str):
        raise ValueError("Query must be a non-empty string")
    
    text = await fetch_url(url)
    
    return text

def test_fetch_url():
    import asyncio
    async def run_test():
        # Mocking. In a real test, you would mock this, but for this example, we will call a real url.
        result = await fetch_url("communityforums.atmeta.com/t5/Get-Help/Beat-saber-wont-load/td-p/1187498")
        # In a real test you would assert the returned result with a known good result.
        # For this example, we will just test that a result is returned.
        assert isinstance(result, str)
        # Add more specific assertions here.
        print("result recieved")
        print(result)

    try:
        asyncio.run(run_test())
    except Exception as e:
        print(f"Test failed: {e}")
        assert False

if __name__ == "__main__":
    # Required packages: pip install mcp httpx beautifulsoup4 python-dotenv
    mcp.run(transport="stdio")
    # test_fetch_url()