Wikidata MCP Server

server_sse.py•8.49 KiB

""" Wikidata MCP Server with FastMCP SSE Transport This module implements a Model Context Protocol (MCP) server using FastMCP's built-in SSE transport that connects Large Language Models to Wikidata's structured knowledge base. """ import os # Configure FastMCP environment variables BEFORE importing MCP modules # This ensures the configuration is applied when the modules are loaded host = '0.0.0.0' # Always bind to all interfaces for production deployment port = int(os.getenv('PORT', '10000')) # Use PORT from environment (Render default: 10000) # Set FastMCP environment variables for host and port binding os.environ['FASTMCP_HOST'] = host os.environ['FASTMCP_PORT'] = str(port) print(f"Pre-configuring FastMCP: HOST={host}, PORT={port}") from mcp.server.fastmcp import FastMCP from datetime import datetime # Import all the MCP tools and prompts from the existing modules from mcp.server.fastmcp.prompts import base from wikidata_api import ( search_entity, search_property, get_entity_metadata, get_entity_properties, execute_sparql ) # Try to import advanced orchestration (optional) try: from src.wikidata_mcp.orchestration.query_orchestrator import QueryOrchestrator # Initialize the orchestrator orchestrator = QueryOrchestrator() ORCHESTRATION_AVAILABLE = True print("Advanced orchestration available") except (ImportError, ValueError) as e: ORCHESTRATION_AVAILABLE = False orchestrator = None print(f"Warning: Advanced orchestration not available: {e}") # Initialize FastMCP mcp = FastMCP(name="Wikidata Knowledge") # ============= MCP TOOLS ============= @mcp.tool() def search_wikidata_entity(query: str, limit: int = 10) -> str: """ Search for Wikidata entities by name. Returns multiple candidates for disambiguation. Args: query: The search term (entity name) limit: Maximum candidates to return (default 10, max 50) Returns: JSON with total_candidates, ambiguous flag, and list of candidates with qid/label/description """ return search_entity(query, limit) @mcp.tool() def search_wikidata_property(query: str) -> str: """Search for Wikidata properties by name or description.""" return search_property(query) @mcp.tool() def get_wikidata_metadata(entity_id: str) -> dict: """Get detailed metadata for a Wikidata entity.""" return get_entity_metadata(entity_id) @mcp.tool() def get_wikidata_properties(entity_id: str) -> dict: """Get all properties and their values for a Wikidata entity.""" return get_entity_properties(entity_id) @mcp.tool() def execute_wikidata_sparql(query: str) -> dict: """Execute a SPARQL query against Wikidata.""" return execute_sparql(query) # Add advanced tool only if orchestration is available if ORCHESTRATION_AVAILABLE: @mcp.tool() def query_wikidata_complex(query: str) -> dict: """Advanced Wikidata query with vector database and LLM orchestration.""" return orchestrator.process_query(query) # ============= MCP PROMPTS ============= @mcp.prompt() def entity_search_template() -> str: """Template for searching Wikidata entities efficiently.""" return """ # Wikidata Entity Search Guide ## Performance-First Approach - **Basic tools** (search_wikidata_entity, get_wikidata_metadata): ~200ms ⚡ - **Advanced tool** (query_wikidata_complex): 1-11s 🐌 - **Speed difference**: Basic tools are 50x faster! ## Search Strategy 1. **Start with basic search**: Use `search_wikidata_entity` for initial discovery 2. **Get details**: Use `get_wikidata_metadata` for entity information 3. **Advanced only when needed**: Use `query_wikidata_complex` for complex relationships ## Example Workflow ``` User: "Find information about Marie Curie" 1. search_wikidata_entity("Marie Curie") → Q7186 2. get_wikidata_metadata("Q7186") → Full details ``` Always prefer basic tools for simple queries! """ @mcp.prompt() def reconciliation_guide() -> str: """Guide for disambiguating entities when multiple candidates match (the 'John Smith problem').""" return """ # Wikidata Reconciliation Guide ## The Problem When searching for common names like "John Smith", Wikidata returns many candidates. Your job is to help the user identify the CORRECT Q-ID for their specific entity. ## Workflow ### Step 1: Initial Search Use `search_wikidata_entity` - it returns ALL candidates with: - `total_candidates`: How many matches exist - `ambiguous`: true if multiple candidates - `candidates`: List with qid, label, description, aliases ### Step 2: Assess Ambiguity ``` If total_candidates == 0: No match found, try alternative spellings If total_candidates == 1: High confidence match ✓ If total_candidates > 1: DISAMBIGUATION NEEDED ``` ### Step 3: Disambiguation Strategy When ambiguous, ask the user for additional context: **For PEOPLE, ask about:** - Birth/death dates - Occupation or profession - Nationality or country - Notable works or achievements **For PLACES, ask about:** - Country or region - Type (city, river, mountain) - Population or size **For ORGANIZATIONS, ask about:** - Type (company, university, NGO) - Location/headquarters - Industry or field ### Step 4: Verify with Properties Use `get_wikidata_properties(qid)` to confirm the match: - P569 = date of birth - P570 = date of death - P106 = occupation - P27 = country of citizenship - P19 = place of birth ### Step 5: Report Confidence Always tell the user: - How many candidates were found - Why you selected a specific Q-ID - Confidence level (high/medium/low) ## Example Dialogue **User**: Find John Smith the explorer **You**: 1. Search returns 47 candidates for "John Smith" 2. Ask: "Which John Smith? I found 47 matches. Can you provide: - Approximate birth year? - Which country? - What did they explore?" 3. User says: "Born around 1580, English, explored Virginia" 4. Use SPARQL to filter: explorers, English, 16th-17th century 5. Return: Q327071 (John Smith, English explorer, 1580-1631) - HIGH confidence ## Key Principle **Never guess when ambiguous.** Always ask for clarifying information or present the top candidates for the user to choose. """ @mcp.prompt() def general_wikidata_guidance() -> str: """General guidance for using Wikidata MCP tools effectively.""" return """ # Wikidata MCP Server - Performance Guide ## 🚀 Tool Performance Hierarchy ### ⚡ FAST Tools (140-250ms) - Use First - `search_wikidata_entity`: Find entities by name - `search_wikidata_property`: Find properties by name - `get_wikidata_metadata`: Get entity details - `get_wikidata_properties`: Get all entity properties - `execute_wikidata_sparql`: Run SPARQL queries ### 🐌 SLOW Tool (1-11s) - Use Sparingly - `query_wikidata_complex`: Advanced reasoning with vector DB ## 📋 Usage Guidelines ### ✅ RIGHT Tool for the Job - **Simple lookups**: Basic tools (50x faster!) - **Known entities**: Basic tools - **Complex temporal queries**: Advanced tool - **Multi-step reasoning**: Advanced tool ### ❌ WRONG Usage Patterns - Using advanced tool for simple entity lookups - Using advanced tool when you know the entity ID - Using advanced tool for basic property searches ## 🎯 Performance Tips 1. **Always start with basic tools** 2. **Use advanced tool only for complex reasoning** 3. **Cache entity IDs when possible** 4. **Prefer SPARQL for structured queries** By following these guidelines, you'll provide accurate, up-to-date, and performant Wikidata interactions. """ # ============= MCP RESOURCES ============= @mcp.resource("wikidata://common-properties") def common_properties_resource() -> str: """Common Wikidata properties for reference.""" return """ # Common Wikidata Properties ## Basic Properties - P31: instance of - P279: subclass of - P106: occupation - P27: country of citizenship - P19: place of birth - P20: place of death - P569: date of birth - P570: date of death ## Relationships - P22: father - P25: mother - P26: spouse - P40: child - P3373: sibling ## Locations - P17: country - P131: located in administrative territorial entity - P625: coordinate location ## Works & Achievements - P800: notable work - P166: award received - P69: educated at - P108: employer """ # ============= SERVER EXECUTION ============= if __name__ == "__main__": print("Starting Wikidata MCP Server with FastMCP Streamable HTTP transport...") print(f"Server will bind to {host}:{port} (configured at module load)") # Start the server using MCP SDK with streamable HTTP transport # FastMCP configuration (host/port) was set at module import time mcp.run(transport="streamable-http")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ebaenamar/wikidata-mcp-mirror'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

server_sse.py•8.49 KiB