"""
LangSearch MCP Server
Provides tools for web search and semantic reranking using the LangSearch API.
"""
import os
import re
from typing import Literal
from pydantic import BaseModel, Field
import httpx
from mcp.server.fastmcp import FastMCP
# Initialize FastMCP server
mcp = FastMCP(
"LangSearch",
instructions="A server providing web search and semantic reranking capabilities using LangSearch API"
)
# Constants
API_BASE_URL = "https://api.langsearch.com/v1"
DEFAULT_TIMEOUT = 30.0
# ========== Helper Functions ==========
def is_english_text(text: str) -> bool:
"""Check if text is primarily English (heuristic based on ASCII characters)"""
if not text:
return False
# Count ASCII alphabetic characters vs total characters
ascii_chars = sum(1 for c in text if ord(c) < 128 and c.isalpha())
total_chars = sum(1 for c in text if c.isalpha())
if total_chars == 0:
return True
# If more than 70% of alphabetic chars are ASCII, consider it English
return (ascii_chars / total_chars) > 0.7
# ==========
# ========== Data Models ==========
class WebPageValue(BaseModel):
"""Web page search result"""
id: str = Field(description="Unique identifier")
name: str = Field(description="Page title")
url: str = Field(description="Page URL")
displayUrl: str = Field(description="Display URL")
snippet: str = Field(description="Brief snippet from the page")
summary: str | None = Field(default=None, description="Full summary if requested")
datePublished: str | None = Field(default=None, description="Publication date")
dateLastCrawled: str | None = Field(default=None, description="Last crawl date")
class WebSearchResult(BaseModel):
"""Web search results with metadata"""
total_results: int = Field(description="Total number of results")
results: list[WebPageValue] = Field(description="List of web pages")
query: str = Field(description="Original search query")
class RerankResult(BaseModel):
"""Reranked document with relevance score"""
index: int = Field(description="Position in original document list")
text: str = Field(description="Document content")
relevance_score: float = Field(description="Semantic relevance score (0-1)")
class RerankResponse(BaseModel):
"""Semantic reranking results"""
results: list[RerankResult] = Field(description="Reranked documents")
model: str = Field(description="Reranker model used")
# ========== Tools ==========
@mcp.tool()
async def web_search(
query: str,
count: int = 10,
summary: bool = True,
freshness: Literal["noLimit", "day", "week", "month"] = "noLimit",
language: str = "en",
filter_non_english: bool = True
) -> WebSearchResult:
"""
Search the web for information across billions of documents.
Returns web pages with titles, URLs, snippets, and optional summaries.
Optimized for AI applications with accurate, machine-readable results.
By default filters to English content only (filter_non_english=True).
"""
api_key = os.getenv("LANGSEARCH_API_KEY")
if not api_key:
raise ValueError("LANGSEARCH_API_KEY environment variable is required")
url = f"{API_BASE_URL}/web-search"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
payload = {
"query": query,
"count": count,
"summary": summary,
"freshness": freshness,
"market": language # LangSearch uses 'market' for language filtering
}
async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
try:
response = await client.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
# Parse response
if data["code"] != 200:
raise ValueError(f"API error: {data.get('msg', 'Unknown error')}")
search_data = data["data"]
web_pages = search_data.get("webPages", {})
results = web_pages.get("value", [])
total = web_pages.get("totalEstimatedMatches", len(results))
# Filter non-English results if requested
if filter_non_english and language == "en":
english_results = []
for result in results:
# Check if title and snippet are primarily English
title_check = is_english_text(result.get("name", ""))
snippet_check = is_english_text(result.get("snippet", ""))
if title_check and snippet_check:
english_results.append(result)
results = english_results
return WebSearchResult(
total_results=total or len(results),
results=[WebPageValue(**result) for result in results],
query=search_data["queryContext"]["originalQuery"]
)
except httpx.HTTPStatusError as e:
raise ValueError(f"HTTP error {e.response.status_code}: {e.response.text}")
except Exception as e:
raise ValueError(f"Search failed: {str(e)}")
@mcp.tool()
async def semantic_rerank(
query: str,
documents: list[str],
top_n: int | None = None,
model: str = "langsearch-reranker-v1"
) -> RerankResponse:
"""
Rerank documents based on semantic relevance to a query.
Uses deep semantic understanding to reorder search results,
improving accuracy over traditional keyword or vector search.
Returns documents with relevance scores (0-1, higher is better).
"""
api_key = os.getenv("LANGSEARCH_API_KEY")
if not api_key:
raise ValueError("LANGSEARCH_API_KEY environment variable is required")
if not documents:
raise ValueError("At least one document is required")
url = f"{API_BASE_URL}/rerank"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
payload = {
"model": model,
"query": query,
"documents": documents,
"return_documents": True
}
if top_n is not None:
payload["top_n"] = top_n
async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
try:
response = await client.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
# Parse response
if data["code"] != 200:
raise ValueError(f"API error: {data.get('msg', 'Unknown error')}")
results = []
for result in data["results"]:
results.append(RerankResult(
index=result["index"],
text=result["document"]["text"],
relevance_score=result["relevance_score"]
))
return RerankResponse(
results=results,
model=data["model"]
)
except httpx.HTTPStatusError as e:
raise ValueError(f"HTTP error {e.response.status_code}: {e.response.text}")
except Exception as e:
raise ValueError(f"Rerank failed: {str(e)}")
# ========== Main ==========
if __name__ == "__main__":
# Run with stdio transport for local use
mcp.run()