ChatSpatial

ChatSpatial
chatspatial

spatial_mcp_adapter.py•20 KiB

""" Spatial MCP Adapter for ChatSpatial This module provides a clean abstraction layer between MCP protocol requirements and ChatSpatial's spatial analysis functionality. """ import logging from dataclasses import dataclass, field from typing import Any, Optional from mcp.server.fastmcp import Context, FastMCP from mcp.types import ToolAnnotations from .utils.exceptions import DataNotFoundError, ParameterError logger = logging.getLogger(__name__) # ============================================================================= # TOOL ANNOTATIONS - Single Source of Truth # ============================================================================= # These annotations are passed to FastMCP's @mcp.tool() decorator to inform # LLM clients about tool behavior characteristics. # # Annotation meanings (from MCP spec): # - readOnlyHint: Tool only reads data, doesn't modify state # - idempotentHint: Repeated calls with same args have no additional effect # - openWorldHint: Tool may interact with external entities (network, files) # ============================================================================= TOOL_ANNOTATIONS: dict[str, ToolAnnotations] = { # ========================================================================== # Data I/O tools - interact with filesystem and memory state # ========================================================================== "load_data": ToolAnnotations( readOnlyHint=False, # Creates new dataset in memory (data_store) idempotentHint=False, # Each call generates new data_id openWorldHint=True, # Reads from filesystem ), "export_data": ToolAnnotations( readOnlyHint=False, # Writes to filesystem idempotentHint=True, # Same data to same path is idempotent openWorldHint=True, # Writes to filesystem ), "reload_data": ToolAnnotations( readOnlyHint=False, # Replaces in-memory adata idempotentHint=True, # Same file yields same result openWorldHint=True, # Reads from filesystem ), # ========================================================================== # Preprocessing - modifies adata in-place # ========================================================================== "preprocess_data": ToolAnnotations( readOnlyHint=False, # Modifies adata in-place (filtering, normalization) idempotentHint=False, # Re-running may change state openWorldHint=False, # No external access ), "compute_embeddings": ToolAnnotations( readOnlyHint=False, # Adds PCA/UMAP/clustering to adata idempotentHint=False, # Has force param; UMAP has stochasticity openWorldHint=False, # No external access ), # ========================================================================== # Visualization - saves to filesystem # ========================================================================== "visualize_data": ToolAnnotations( readOnlyHint=False, # Saves image to filesystem idempotentHint=False, # Default behavior writes new file each call openWorldHint=True, # Writes to filesystem ), # ========================================================================== # Analysis tools - modify adata by adding results # ========================================================================== "annotate_cell_types": ToolAnnotations( readOnlyHint=False, # Adds cell type annotations to adata.obs idempotentHint=False, # Methods may have stochasticity openWorldHint=True, # May use external references/databases ), "analyze_spatial_statistics": ToolAnnotations( readOnlyHint=False, # Adds statistics to adata.uns idempotentHint=True, # Deterministic computation openWorldHint=False, # No external access ), "find_markers": ToolAnnotations( readOnlyHint=False, # Adds results to adata.uns['rank_genes_groups'] idempotentHint=True, # Deterministic (wilcoxon/t-test) openWorldHint=False, # No external access ), "compare_conditions": ToolAnnotations( readOnlyHint=False, # Adds results to adata.uns idempotentHint=True, # Deterministic (DESeq2) openWorldHint=False, # No external access ), "analyze_velocity_data": ToolAnnotations( readOnlyHint=False, # Adds velocity to adata idempotentHint=False, # Stochastic methods openWorldHint=False, # No external access ), "analyze_trajectory_data": ToolAnnotations( readOnlyHint=False, # Adds trajectory info to adata idempotentHint=False, # May have stochastic elements openWorldHint=False, # No external access ), "integrate_samples": ToolAnnotations( readOnlyHint=False, # Creates new integrated dataset in memory idempotentHint=False, # Each call creates new data_id openWorldHint=False, # No external access ), "deconvolve_data": ToolAnnotations( readOnlyHint=False, # Adds deconvolution results to adata idempotentHint=False, # Deep learning methods are stochastic openWorldHint=True, # May load external reference data ), "identify_spatial_domains": ToolAnnotations( readOnlyHint=False, # Adds domain labels to adata.obs idempotentHint=False, # Clustering algorithms can vary openWorldHint=False, # No external access ), "analyze_cell_communication": ToolAnnotations( readOnlyHint=False, # Adds communication results to adata.uns idempotentHint=True, # Deterministic given same inputs openWorldHint=True, # Uses LR databases (CellChat, CellPhoneDB) ), "analyze_enrichment": ToolAnnotations( readOnlyHint=False, # Adds enrichment scores to adata idempotentHint=True, # Deterministic openWorldHint=True, # Uses gene set databases (GO, KEGG, MSigDB) ), "find_spatial_genes": ToolAnnotations( readOnlyHint=False, # Adds spatial gene info to adata.var idempotentHint=True, # Deterministic methods (SPARK-X, SpatialDE) openWorldHint=False, # No external access ), "analyze_cnv": ToolAnnotations( readOnlyHint=False, # Adds CNV results to adata idempotentHint=True, # Deterministic (inferCNV) openWorldHint=False, # No external access (uses adata only) ), "register_spatial_data": ToolAnnotations( readOnlyHint=False, # Modifies spatial coordinates idempotentHint=False, # Registration algorithms can vary openWorldHint=False, # No external access ), } def get_tool_annotations(tool_name: str) -> ToolAnnotations: """Get annotations for a tool by name. Args: tool_name: Name of the tool (e.g., 'load_data', 'preprocess_data') Returns: ToolAnnotations object for the tool. Returns conservative defaults if tool is not in registry. Usage: @mcp.tool(annotations=get_tool_annotations("load_data")) async def load_data(...): ... """ return TOOL_ANNOTATIONS.get( tool_name, # Conservative defaults: assume tool modifies state and is not idempotent ToolAnnotations(readOnlyHint=False, idempotentHint=False), ) class SpatialMCPAdapter: """Main adapter class that bridges MCP and spatial analysis functionality.""" def __init__(self, mcp_server: FastMCP, data_manager: "DefaultSpatialDataManager"): self.mcp = mcp_server self.data_manager = data_manager class DefaultSpatialDataManager: """In-memory spatial data management with async interface. Design Note: Methods are async for interface consistency and future extensibility (e.g., remote storage, database backends), even though current implementation is synchronous. This is intentional - async overhead is negligible and changing the interface later would break 20+ call sites. """ def __init__(self): self.data_store: dict[str, Any] = {} self._next_id = 1 async def load_dataset( self, path: str, data_type: str, name: Optional[str] = None ) -> str: """Load a spatial dataset and return its ID""" from typing import cast from .models.data import SpatialPlatform from .utils.data_loader import load_spatial_data # Load data - cast to SpatialPlatform (validated at load_spatial_data) dataset_info = await load_spatial_data( path, cast(SpatialPlatform, data_type), name ) # Generate ID data_id = f"data_{self._next_id}" self._next_id += 1 # Store data self.data_store[data_id] = dataset_info return data_id async def get_dataset(self, data_id: str) -> Any: """Get a dataset by ID""" if data_id not in self.data_store: raise DataNotFoundError(f"Dataset {data_id} not found") return self.data_store[data_id] async def list_datasets(self) -> list[dict[str, Any]]: """List all loaded datasets""" return [ { "id": data_id, "name": info.get("name", f"Dataset {data_id}"), "type": info.get("type", "unknown"), "n_cells": info.get("n_cells", 0), "n_genes": info.get("n_genes", 0), } for data_id, info in self.data_store.items() ] async def save_result(self, data_id: str, result_type: str, result: Any) -> None: """Save analysis results""" if data_id not in self.data_store: raise DataNotFoundError(f"Dataset {data_id} not found") if "results" not in self.data_store[data_id]: self.data_store[data_id]["results"] = {} self.data_store[data_id]["results"][result_type] = result async def get_result(self, data_id: str, result_type: str) -> Any: """Get analysis results""" if data_id not in self.data_store: raise DataNotFoundError(f"Dataset {data_id} not found") results = self.data_store[data_id].get("results", {}) if result_type not in results: raise DataNotFoundError( f"No {result_type} results found for dataset {data_id}" ) return results[result_type] def dataset_exists(self, data_id: str) -> bool: """Check if a dataset exists. Args: data_id: Dataset identifier Returns: True if the dataset exists, False otherwise """ return data_id in self.data_store async def update_adata(self, data_id: str, adata: Any) -> None: """Update the adata object for an existing dataset. Use this when preprocessing creates a new adata object (e.g., copy, subsample, or format conversion). Args: data_id: Dataset identifier adata: New AnnData object to store Raises: DataNotFoundError: If dataset not found """ if data_id not in self.data_store: raise DataNotFoundError(f"Dataset {data_id} not found") self.data_store[data_id]["adata"] = adata async def create_dataset( self, data_id: str, adata: Any, name: Optional[str] = None, metadata: Optional[dict[str, Any]] = None, ) -> None: """Create a new dataset with specified ID. Use this when creating derived datasets (e.g., integration results, subset data). Args: data_id: Unique identifier for the new dataset adata: AnnData object to store name: Optional display name for the dataset metadata: Optional additional metadata dict Raises: ParameterError: If dataset with same ID already exists """ if data_id in self.data_store: raise ParameterError( f"Dataset {data_id} already exists. Use update_adata() to update." ) dataset_info: dict[str, Any] = {"adata": adata} if name: dataset_info["name"] = name if metadata: dataset_info.update(metadata) self.data_store[data_id] = dataset_info @dataclass class ToolContext: """Unified context for ChatSpatial tool execution. This class provides a clean interface for tools to access data and logging without the redundant data_store dict wrapping pattern. Design Rationale: - Python dict assignment is reference, not copy. The old pattern of wrapping dataset_info in a temp dict and "writing back" was completely unnecessary. - Tools should access adata directly via get_adata(), not through dict wrapping. - Logging methods fall back gracefully when MCP context is unavailable. Logging Strategy: - User-visible messages: await ctx.info(), await ctx.warning(), await ctx.error() These appear in Claude's conversation and provide user-friendly progress updates. - Developer debugging: ctx.debug() This writes to Python logger for debugging, not visible to users. Usage: async def my_tool(data_id: str, ctx: ToolContext, params: Params) -> Result: adata = await ctx.get_adata(data_id) await ctx.info(f"Processing {adata.n_obs} cells") # User sees this ctx.debug(f"Internal state: {some_detail}") # Developer log only # ... analysis logic ... return result """ _data_manager: "DefaultSpatialDataManager" _mcp_context: Optional[Context] = None _logger: Optional[logging.Logger] = field(default=None, repr=False) def __post_init__(self) -> None: """Initialize the logger for debug messages.""" if self._logger is None: self._logger = logging.getLogger("chatspatial.tools") def debug(self, msg: str) -> None: """Log debug message for developers (not visible to users). Use this for detailed technical information that helps with debugging but would be noise for end users. These messages go to Python logger. Args: msg: Debug message to log """ if self._logger: self._logger.debug(msg) def log_config(self, title: str, config: dict[str, Any]) -> None: """Log configuration details for developers. Convenience method for logging parameter configurations in a structured format. Goes to Python logger, not user-visible. Args: title: Configuration section title config: Dictionary of configuration key-value pairs """ if self._logger: self._logger.debug("=" * 50) self._logger.debug(f"{title}:") for key, value in config.items(): self._logger.debug(f" {key}: {value}") self._logger.debug("=" * 50) async def get_adata(self, data_id: str) -> Any: """Get AnnData object directly by ID. This is the primary data access method for tools. Returns the AnnData object directly without intermediate dict wrapping. Args: data_id: Dataset identifier Returns: AnnData object for the dataset Raises: ValueError: If dataset not found """ dataset_info = await self._data_manager.get_dataset(data_id) return dataset_info["adata"] async def get_dataset_info(self, data_id: str) -> dict[str, Any]: """Get full dataset info dict when metadata is needed. Use this only when you need access to metadata beyond adata, such as 'name', 'type', 'source_path', etc. """ return await self._data_manager.get_dataset(data_id) async def set_adata(self, data_id: str, adata: Any) -> None: """Update the AnnData object for a dataset. Use this when preprocessing creates a new adata object (e.g., copy, subsample, or format conversion). This updates the reference in the data manager's store. Args: data_id: Dataset identifier adata: New AnnData object to store Raises: ValueError: If dataset not found """ await self._data_manager.update_adata(data_id, adata) async def add_dataset( self, data_id: str, adata: Any, name: Optional[str] = None, metadata: Optional[dict[str, Any]] = None, ) -> None: """Add a new dataset to the data store. Use this when creating new datasets (e.g., integration results, subset data, or derived datasets). Args: data_id: Unique identifier for the new dataset adata: AnnData object to store name: Optional display name for the dataset metadata: Optional additional metadata dict Raises: ValueError: If dataset with same ID already exists """ await self._data_manager.create_dataset(data_id, adata, name, metadata) async def info(self, msg: str) -> None: """Log info message to MCP context if available.""" if self._mcp_context: await self._mcp_context.info(msg) async def warning(self, msg: str) -> None: """Log warning message to MCP context if available.""" if self._mcp_context: await self._mcp_context.warning(msg) async def error(self, msg: str) -> None: """Log error message to MCP context if available.""" if self._mcp_context: await self._mcp_context.error(msg) def create_spatial_mcp_server( server_name: str = "ChatSpatial", data_manager: Optional[DefaultSpatialDataManager] = None, ) -> tuple[FastMCP, SpatialMCPAdapter]: """ Create and configure a spatial MCP server with adapter Args: server_name: Name of the MCP server data_manager: Optional custom data manager (uses default if None) Returns: Tuple of (FastMCP server instance, SpatialMCPAdapter instance) """ # Server instructions for LLM guidance on tool usage instructions = """ChatSpatial provides spatial transcriptomics analysis through 60+ integrated methods across 15 analytical categories. CORE WORKFLOW PATTERN: 1. Always start with load_data() to import spatial transcriptomics data 2. Run preprocess_data() before most analytical tools (required for clustering, spatial analysis, etc.) 3. Use visualize_data() to inspect results after each analysis step CRITICAL OPERATIONAL CONSTRAINTS: - Preprocessing creates filtered gene sets for efficiency but preserves raw data in adata.raw - Cell communication analysis automatically uses adata.raw when available for comprehensive gene coverage - Species-specific parameters are critical: set species="mouse" or "human" and use appropriate resources (e.g., liana_resource="mouseconsensus" for mouse) - Reference data for annotation methods (tangram, scanvi) must be PREPROCESSED before use PLATFORM-SPECIFIC GUIDANCE: - Spot-based platforms (Visium, Slide-seq): Deconvolution is recommended to infer cell type compositions - Single-cell platforms (MERFISH, Xenium, CosMx): Skip deconvolution - native single-cell resolution provided - Visium with histology images: Use SpaGCN for spatial domain identification - High-resolution data without images: Use STAGATE or GraphST TOOL RELATIONSHIPS: - Spatial domain identification → Enables spatial statistics (neighborhood enrichment, co-occurrence) - Cell type annotation → Required for cell communication analysis - Deconvolution results → Can be used for downstream spatial statistics - Integration → Recommended before cross-sample comparative analyses PARAMETER GUIDANCE: All tools include comprehensive parameter documentation in their schemas. Refer to tool descriptions for default values, platform-specific optimizations, and method-specific requirements. For multi-step analyses, preserve data_id across operations to maintain analysis continuity.""" # Create MCP server with instructions mcp = FastMCP(server_name, instructions=instructions) # Create data manager if not provided if data_manager is None: data_manager = DefaultSpatialDataManager() # Create adapter adapter = SpatialMCPAdapter(mcp, data_manager) return mcp, adapter

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cafferychen777/ChatSpatial'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

spatial_mcp_adapter.py•20 KiB