LangSearch MCP Server

main.py•7.32 KiB

""" LangSearch MCP Server Provides tools for web search and semantic reranking using the LangSearch API. """ import os import re from typing import Literal from pydantic import BaseModel, Field import httpx from mcp.server.fastmcp import FastMCP # Initialize FastMCP server mcp = FastMCP( "LangSearch", instructions="A server providing web search and semantic reranking capabilities using LangSearch API" ) # Constants API_BASE_URL = "https://api.langsearch.com/v1" DEFAULT_TIMEOUT = 30.0 # ========== Helper Functions ========== def is_english_text(text: str) -> bool: """Check if text is primarily English (heuristic based on ASCII characters)""" if not text: return False # Count ASCII alphabetic characters vs total characters ascii_chars = sum(1 for c in text if ord(c) < 128 and c.isalpha()) total_chars = sum(1 for c in text if c.isalpha()) if total_chars == 0: return True # If more than 70% of alphabetic chars are ASCII, consider it English return (ascii_chars / total_chars) > 0.7 # ========== # ========== Data Models ========== class WebPageValue(BaseModel): """Web page search result""" id: str = Field(description="Unique identifier") name: str = Field(description="Page title") url: str = Field(description="Page URL") displayUrl: str = Field(description="Display URL") snippet: str = Field(description="Brief snippet from the page") summary: str | None = Field(default=None, description="Full summary if requested") datePublished: str | None = Field(default=None, description="Publication date") dateLastCrawled: str | None = Field(default=None, description="Last crawl date") class WebSearchResult(BaseModel): """Web search results with metadata""" total_results: int = Field(description="Total number of results") results: list[WebPageValue] = Field(description="List of web pages") query: str = Field(description="Original search query") class RerankResult(BaseModel): """Reranked document with relevance score""" index: int = Field(description="Position in original document list") text: str = Field(description="Document content") relevance_score: float = Field(description="Semantic relevance score (0-1)") class RerankResponse(BaseModel): """Semantic reranking results""" results: list[RerankResult] = Field(description="Reranked documents") model: str = Field(description="Reranker model used") # ========== Tools ========== @mcp.tool() async def web_search( query: str, count: int = 10, summary: bool = True, freshness: Literal["noLimit", "day", "week", "month"] = "noLimit", language: str = "en", filter_non_english: bool = True ) -> WebSearchResult: """ Search the web for information across billions of documents. Returns web pages with titles, URLs, snippets, and optional summaries. Optimized for AI applications with accurate, machine-readable results. By default filters to English content only (filter_non_english=True). """ api_key = os.getenv("LANGSEARCH_API_KEY") if not api_key: raise ValueError("LANGSEARCH_API_KEY environment variable is required") url = f"{API_BASE_URL}/web-search" headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } payload = { "query": query, "count": count, "summary": summary, "freshness": freshness, "market": language # LangSearch uses 'market' for language filtering } async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client: try: response = await client.post(url, headers=headers, json=payload) response.raise_for_status() data = response.json() # Parse response if data["code"] != 200: raise ValueError(f"API error: {data.get('msg', 'Unknown error')}") search_data = data["data"] web_pages = search_data.get("webPages", {}) results = web_pages.get("value", []) total = web_pages.get("totalEstimatedMatches", len(results)) # Filter non-English results if requested if filter_non_english and language == "en": english_results = [] for result in results: # Check if title and snippet are primarily English title_check = is_english_text(result.get("name", "")) snippet_check = is_english_text(result.get("snippet", "")) if title_check and snippet_check: english_results.append(result) results = english_results return WebSearchResult( total_results=total or len(results), results=[WebPageValue(**result) for result in results], query=search_data["queryContext"]["originalQuery"] ) except httpx.HTTPStatusError as e: raise ValueError(f"HTTP error {e.response.status_code}: {e.response.text}") except Exception as e: raise ValueError(f"Search failed: {str(e)}") @mcp.tool() async def semantic_rerank( query: str, documents: list[str], top_n: int | None = None, model: str = "langsearch-reranker-v1" ) -> RerankResponse: """ Rerank documents based on semantic relevance to a query. Uses deep semantic understanding to reorder search results, improving accuracy over traditional keyword or vector search. Returns documents with relevance scores (0-1, higher is better). """ api_key = os.getenv("LANGSEARCH_API_KEY") if not api_key: raise ValueError("LANGSEARCH_API_KEY environment variable is required") if not documents: raise ValueError("At least one document is required") url = f"{API_BASE_URL}/rerank" headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } payload = { "model": model, "query": query, "documents": documents, "return_documents": True } if top_n is not None: payload["top_n"] = top_n async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client: try: response = await client.post(url, headers=headers, json=payload) response.raise_for_status() data = response.json() # Parse response if data["code"] != 200: raise ValueError(f"API error: {data.get('msg', 'Unknown error')}") results = [] for result in data["results"]: results.append(RerankResult( index=result["index"], text=result["document"]["text"], relevance_score=result["relevance_score"] )) return RerankResponse( results=results, model=data["model"] ) except httpx.HTTPStatusError as e: raise ValueError(f"HTTP error {e.response.status_code}: {e.response.text}") except Exception as e: raise ValueError(f"Rerank failed: {str(e)}") # ========== Main ========== if __name__ == "__main__": # Run with stdio transport for local use mcp.run()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/OJamals/langsearch-mcp-python'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

main.py•7.32 KiB