mcp-local-rag

src
mcp_local_rag
from duckduckgo_search import DDGS
from mediapipe.tasks import python
from mediapipe.tasks.python import text
from bs4 import BeautifulSoup
import requests
from concurrent.futures import ThreadPoolExecutor
from typing import List, Dict, Optional
import time
from importlib.resources import files

# imports from mcp
# https://modelcontextprotocol.io/quickstart/server
from mcp.server.fastmcp import FastMCP

mcp = FastMCP("RAG Web Search", dependencies=["duckduckgo-search", "mediapipe", 
                                  "beautifulsoup4", "requests"])

# Dynamically locate embedder.tflite within the installed package
# PATH = "src/mcp_local_rag/embedder/embedder.tflite"
PATH = files('mcp_local_rag').joinpath('embedder/embedder.tflite')

@mcp.tool()
def rag_search(query: str, num_results:int=10, top_k:int=5) -> Dict:
    """
    Search the web for a given query. Give back context to the LLM
    with a RAG-like similarity sort.

    Args:
        query (str): The query to search for.
        num_results (int): Number of results to return.
        top_k (int): Use top "k" results for content.

    Returns:
        Dict of strings containing best search based on input query. Formatted in markdown.
    """
    ddgs = DDGS()
    results = ddgs.text(query, max_results=num_results) 
    scored_results = sort_by_score(add_score_to_dict(query, results))
    top_results = scored_results[0:top_k]

    # fetch content using thread pool
    md_content = fetch_all_content(top_results)

    # formatted as dict
    return {
        "content": md_content
            }

def add_score_to_dict(query: str, results: List[Dict]) -> List[Dict]:
    """Add similarity scores to search results."""
    base_options = python.BaseOptions(model_asset_path=PATH)
    l2_normalize, quantize = True, False
    options = text.TextEmbedderOptions(
        base_options=base_options, l2_normalize=l2_normalize, quantize=quantize)
    embedder = text.TextEmbedder.create_from_options(options)
    query_embedding = embedder.embed(query)

    for i in results:
        i['score'] = text.TextEmbedder.cosine_similarity(
                        embedder.embed(i['body']).embeddings[0],
                        query_embedding.embeddings[0])

    return results

def sort_by_score(results: List[Dict]) -> List[Dict]:
    """Sort results by similarity score."""
    return sorted(results, key=lambda x: x['score'], reverse=True)

def fetch_content(url: str, timeout: int = 5) -> Optional[str]:
    """Fetch content from a URL with timeout."""
    try:
        start_time = time.time()
        response = requests.get(url, timeout=timeout)
        response.raise_for_status()
        content = BeautifulSoup(response.text, "html.parser").get_text()
        print(f"Fetched {url} in {time.time() - start_time:.2f}s")
        return content[:10000]  # limitting content to 10k
    except requests.RequestException as e:
        print(f"Error fetching {url}: {type(e).__name__} - {str(e)}")
        return None

def fetch_all_content(results: List[Dict]) -> List[str]:
    """Fetch content from all URLs using a thread pool."""
    urls = [site['href'] for site in results if site.get('href')]
    
    # parallelize requests
    with ThreadPoolExecutor(max_workers=5) as executor:
        # submit fetch tasks to executor
        future_to_url = {executor.submit(fetch_content, url): url for url in urls}
        
        content_list = []
        for future in future_to_url:
            try:
                content = future.result()
                if content:
                    content_list.append({
                        "type": "text",
                        "text": content
                    })
            except Exception as e:
                print(f"Request failed with exception: {e}")
        
    return content_list