server.py•8.83 kB
#!/usr/bin/env python3
"""
=============================================================================
Jina Web Search MCP Server
=============================================================================
A Model Context Protocol (MCP) server that provides tools for fetching URL
content and performing web searches using the Jina AI API.
Features:
- fetch_url_content: Retrieve content from any URL via Jina's crawler
- web_search: Perform semantic web searches via Jina's search API
- Streamable HTTP transport on port 5003
- Docker-ready with health checks and logging
Author: hypersniper05
License: MIT
Repository: https://github.com/hypersniper05/JinaWebSearchMCP
=============================================================================
"""
# Standard library imports
import os
import logging
import urllib.parse
from typing import Dict, Any
# Third-party imports
import requests
from mcp.server.fastmcp import FastMCP
# =============================================================================
# Logging Configuration
# =============================================================================
# Configure logging for debugging and monitoring
# In Docker, logs will be captured by Docker's logging driver
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# =============================================================================
# MCP Server Configuration
# =============================================================================
# Create the MCP server instance with Streamable HTTP transport
# This server will handle Model Context Protocol requests from clients
mcp = FastMCP(
name="JinaWebCrawler",
instructions="Provides tools for fetching URL content and performing web searches using Jina AI API",
port=5003,
host="0.0.0.0", # Listen on all interfaces (required for Docker networking)
)
# =============================================================================
# API Configuration
# =============================================================================
# Get Jina AI API key from environment variable with fallback
# Priority: 1) Environment variable, 2) .env file, 3) Default placeholder
JINA_API_KEY = os.getenv("JINA_API_KEY", "your_jina_api_key_here")
# Validate API key is set (warn if using placeholder)
if JINA_API_KEY == "your_jina_api_key_here":
logger.warning("Using placeholder API key. Set JINA_API_KEY environment variable for production use.")
# Headers for Jina AI URL content fetching API
# These headers optimize content extraction for better results
JINA_HEADERS = {
"Accept": "application/json", # Request JSON response format
"Authorization": JINA_API_KEY, # API authentication
"DNT": "1", # Do Not Track header
"X-Base": "final", # Use final base URL resolution
"X-Engine": "browser", # Use browser-based crawling engine
"X-Return-Format": "text", # Return plain text content
"X-With-Iframe": "true", # Include iframe content
"X-With-Links-Summary": "true", # Include summary of links found
"X-With-Shadow-Dom": "true" # Include shadow DOM content
}
# Headers for Jina AI search API
# Simpler header set for search operations
SEARCH_HEADERS = {
'Authorization': JINA_API_KEY, # API authentication
'X-Respond-With': 'no-content' # Optimize response size
}
# =============================================================================
# MCP Tool Definitions
# =============================================================================
# These functions are exposed as tools via the Model Context Protocol
# They can be called by MCP clients (like Claude Desktop, VS Code, etc.)
@mcp.tool(
name="fetch_url_content",
description="Fetches content from a given URL using Jina AI API"
)
def fetch_url_content(url: str) -> str:
"""
Fetch and return the content of a web page using Jina AI's crawler.
This tool uses Jina AI's r.jina.ai service which:
- Handles JavaScript rendering
- Extracts clean text content
- Processes iframe and shadow DOM content
- Returns structured, readable text
Args:
url (str): The URL to fetch content from (must include protocol: http/https)
Returns:
str: The extracted text content from the webpage, or error message if failed
Example:
fetch_url_content("https://example.com") -> "Welcome to Example.com..."
"""
try:
# Construct Jina AI URL fetching endpoint
# The r.jina.ai service acts as a proxy that processes the target URL
jina_url = f"https://r.jina.ai/{url}"
logger.info(f"Fetching content from: {jina_url}")
# Make request with optimized headers for content extraction
response = requests.get(jina_url, headers=JINA_HEADERS)
response.raise_for_status() # Raise exception for HTTP error status codes
logger.info(f"Successfully fetched content, length: {len(response.text)} characters")
return response.text
except requests.exceptions.RequestException as e:
# Handle network-related errors (timeout, connection, etc.)
error_msg = f"Network error fetching URL content: {str(e)}"
logger.error(error_msg)
return error_msg
except Exception as e:
# Handle any other unexpected errors
error_msg = f"Unexpected error fetching URL content: {str(e)}"
logger.error(error_msg)
return error_msg
@mcp.tool(
name="web_search",
description="Performs a web search using Jina AI API with the provided query"
)
def web_search(query: str) -> str:
"""
Perform a semantic web search using Jina AI's search API.
This tool uses Jina AI's s.jina.ai service which:
- Performs semantic search across the web
- Returns relevant, summarized results
- Optimized for AI consumption
- Handles complex queries and natural language
Args:
query (str): The search query (can be natural language)
Returns:
str: Search results in a structured format, or error message if failed
Example:
web_search("latest Python frameworks 2025") -> "1. FastAPI continues to gain..."
"""
try:
# URL-encode the query to handle special characters and spaces
# This ensures the query is properly formatted for HTTP requests
encoded_query = urllib.parse.quote(query)
url = f'https://s.jina.ai/{encoded_query}'
logger.info(f"Performing search with query: {query}")
logger.info(f"Search URL: {url}")
# Make request to Jina AI search endpoint
response = requests.get(url, headers=SEARCH_HEADERS)
response.raise_for_status() # Raise exception for HTTP error status codes
logger.info(f"Search completed, result length: {len(response.text)} characters")
return response.text
except requests.exceptions.RequestException as e:
# Handle network-related errors (timeout, connection, etc.)
error_msg = f"Network error performing web search: {str(e)}"
logger.error(error_msg)
return error_msg
except Exception as e:
# Handle any other unexpected errors
error_msg = f"Unexpected error performing web search: {str(e)}"
logger.error(error_msg)
return error_msg
# =============================================================================
# Server Startup
# =============================================================================
if __name__ == "__main__":
"""
Main entry point for the MCP server.
This block only runs when the script is executed directly (not imported).
The server will:
1. Log startup information
2. Bind to 0.0.0.0:5003 (all interfaces, port 5003)
3. Use streamable HTTP transport for MCP communication
4. Handle requests until stopped (Ctrl+C or container shutdown)
"""
logger.info("Starting Jina Web Crawler MCP Server on port 5003...")
logger.info(f"Using API key: {'SET' if JINA_API_KEY != 'your_jina_api_key_here' else 'PLACEHOLDER'}")
logger.info("Server will be available at http://0.0.0.0:5003")
logger.info("Press Ctrl+C to stop the server")
try:
# Start the MCP server with streamable HTTP transport
# This will block until the server is stopped
mcp.run(transport="streamable-http")
except KeyboardInterrupt:
logger.info("Server stopped by user (Ctrl+C)")
except Exception as e:
logger.error(f"Server error: {str(e)}")
raise