MCP Spark Documentation Server

Overview Schema Related Servers Score Discussions

mcp-spark-documentation
src
mcp_spark_documentation

server.py•4.33 KiB

"""FastMCP server for Spark documentation search and retrieval.""" import json import logging from pathlib import Path from fastmcp import FastMCP from mcp_spark_documentation.database import DocumentDatabase # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Default database path DEFAULT_DB_PATH = Path(__file__).parent.parent.parent / "data" / "spark_docs.db" # Initialise FastMCP server mcp = FastMCP(name="spark-documentation") # Lazy database initialisation _database: DocumentDatabase | None = None def get_database() -> DocumentDatabase: """Get or initialise the database instance. Returns: DocumentDatabase instance. """ global _database if _database is None: db_path = Path(DEFAULT_DB_PATH) db_path.parent.mkdir(parents=True, exist_ok=True) _database = DocumentDatabase(db_path) return _database def _search_documentation_impl( query: str, section: str | None = None, limit: int = 10, ) -> str: """Core implementation of search_documentation. Args: query: Search terms to find in the documentation. section: Optional section to filter results. limit: Maximum number of results to return. Returns: JSON-formatted search results. """ db = get_database() # Validate and cap limit limit = min(max(1, limit), 50) results = db.search(query, section=section, limit=limit) if not results: return json.dumps( { "message": f"No results found for query: '{query}'", "results": [], } ) output = { "query": query, "section_filter": section, "result_count": len(results), "results": [ { "title": r.title, "url": r.url, "path": r.path, "section": r.section, "snippet": r.snippet, "relevance_score": round(r.score, 4), } for r in results ], } return json.dumps(output, indent=2) def _read_documentation_impl(path: str) -> str: """Core implementation of read_documentation. Args: path: The relative path to the documentation file. Returns: JSON-formatted document content or error message. """ db = get_database() document = db.get_document(path) if not document: return json.dumps( { "error": f"Document not found: {path}", "suggestion": "Use search_documentation to find valid document paths.", } ) return json.dumps( { "path": document.path, "title": document.title, "description": document.description, "section": document.section, "url": document.url, "content": document.content, }, indent=2, ) @mcp.tool() def search_documentation( query: str, section: str | None = None, limit: int = 10, ) -> str: """Search Apache Spark documentation by keyword query. Args: query: Search terms to find in the documentation. Supports full-text search with stemming (e.g., "stream" matches "streaming", "streams"). section: Optional section to filter results. Common sections include: 'sql-ref', 'api', 'streaming', 'mllib', 'graphx', 'structured-streaming', etc. limit: Maximum number of results to return (default: 10, max: 50). Returns: JSON-formatted search results with title, URL, snippet, and relevance score. """ return _search_documentation_impl(query, section, limit) @mcp.tool() def read_documentation(path: str) -> str: """Read the full content of a specific Spark documentation page. Args: path: The relative path to the documentation file (e.g., 'sql-ref/sql-syntax.md' or 'api/python/index.md'). This path is returned in search results. Returns: The full markdown content of the documentation page, or an error message if the page is not found. """ return _read_documentation_impl(path) def run_server() -> None: """Run the MCP server with STDIO transport.""" mcp.run(transport="stdio") if __name__ == "__main__": run_server()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/martoc/mcp-spark-documentation'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

server.py•4.33 KiB