ChatSpatial

analysis.py•16.2 KiB

""" Analysis result models for spatial transcriptomics data. """ from typing import Any, Optional from pydantic import BaseModel, ConfigDict, Field class BaseAnalysisResult(BaseModel): """Base class for all analysis results. Provides common configuration and optional shared fields. All analysis result models should inherit from this class. """ model_config = ConfigDict(arbitrary_types_allowed=True) class PreprocessingResult(BaseAnalysisResult): """Result of data preprocessing""" data_id: str n_cells: int n_genes: int n_hvgs: int clusters: int qc_metrics: Optional[dict[str, Any]] = None class DifferentialExpressionResult(BaseAnalysisResult): """Result of differential expression analysis Note on serialization: For consistency with other result models, the statistics dict is excluded from JSON serialization. Key summary info is in explicit fields. Fields included in MCP response: - data_id, comparison (basic info) - n_genes (count) - top_genes (top differentially expressed genes) Fields excluded from MCP response: - statistics (detailed DE metrics per group) """ data_id: str comparison: str n_genes: int top_genes: list[str] = Field(default_factory=list) # Detailed statistics - excluded from MCP response statistics: dict[str, Any] = Field( default_factory=dict, exclude=True, # Exclude from JSON serialization to LLM ) class AnnotationResult(BaseAnalysisResult): """Result of cell type annotation Attributes: data_id: Dataset identifier method: Annotation method used output_key: Column name in adata.obs where cell types are stored (e.g., "cell_type_tangram") confidence_key: Column name in adata.obs where confidence scores are stored (e.g., "confidence_tangram") cell_types: List of unique cell types identified counts: Number of cells per cell type confidence_scores: Confidence scores per cell type (when available). Empty dict or None indicates no confidence data available. Only contains real statistical measures, never arbitrary values. tangram_mapping_score: For Tangram method - overall mapping quality score """ data_id: str method: str output_key: str # Column name where cell types are stored confidence_key: Optional[str] = ( None # Column name where confidence scores are stored ) cell_types: list[str] counts: dict[str, int] confidence_scores: Optional[dict[str, float]] = None tangram_mapping_score: Optional[float] = None # For Tangram method - mapping score class SpatialStatisticsResult(BaseAnalysisResult): """Result of spatial analysis Note on serialization: To minimize MCP response size, detailed per-gene/per-spot statistics are excluded from JSON serialization using Field(exclude=True). Summary fields are always included. Fields included in MCP response: - data_id, analysis_type (basic info) - n_features_analyzed, n_significant (summary counts) - top_features (top significant genes/clusters) - summary_metrics (compact key metrics) - results_key (for accessing full results) Fields excluded from MCP response (stored in adata): - statistics (full detailed results dict) Visualization is handled separately via the visualize_data tool. """ data_id: str analysis_type: str # Summary fields - always included in MCP response n_features_analyzed: int = 0 n_significant: int = 0 top_features: list[str] = Field(default_factory=list) summary_metrics: dict[str, float] = Field(default_factory=dict) results_key: Optional[str] = None # Key in adata.uns for full results # Detailed statistics - excluded from MCP response statistics: Optional[dict[str, Any]] = Field( default=None, exclude=True, # Exclude from JSON serialization to LLM ) class RNAVelocityResult(BaseAnalysisResult): """Result of RNA velocity analysis""" data_id: str velocity_computed: bool velocity_graph_key: Optional[str] = None # Key for velocity graph in adata.uns mode: str # RNA velocity computation mode class TrajectoryResult(BaseAnalysisResult): """Result of trajectory analysis""" data_id: str pseudotime_computed: bool velocity_computed: bool pseudotime_key: str method: str # Trajectory analysis method used spatial_weight: float # Spatial kernel weight (CellRank only) class IntegrationResult(BaseAnalysisResult): """Result of sample integration""" data_id: str n_samples: int integration_method: str class DeconvolutionResult(BaseAnalysisResult): """Result of spatial deconvolution Note on serialization: To minimize MCP response size, detailed per-cell-type statistics are excluded from JSON serialization using Field(exclude=True). Fields included in MCP response: - data_id, method, n_cell_types, cell_types (basic info) - n_spots, genes_used (summary counts) - dominant_type_key, proportions_key (storage keys) Fields excluded from MCP response (stored in adata): - statistics (includes mean_proportions, dominant_types dicts) """ data_id: str method: str dominant_type_key: str # Column name where dominant cell type is stored cell_types: list[str] n_cell_types: int proportions_key: str # Key in adata.obsm where cell type proportions are stored # Summary fields - always included n_spots: int = 0 genes_used: int = 0 # Detailed statistics - excluded from MCP response statistics: dict[str, Any] = Field( default_factory=dict, exclude=True, # Exclude from JSON serialization to LLM ) class SpatialDomainResult(BaseAnalysisResult): """Result of spatial domain identification Note on serialization: For consistency with other result models, the detailed statistics dict is excluded from JSON serialization. Key summary info is in explicit fields. Fields included in MCP response: - data_id, method, n_domains (basic info) - domain_key, refined_domain_key, embeddings_key (storage keys) - domain_counts (number of spots per domain - typically compact) Fields excluded from MCP response: - statistics (method parameters, stored in adata.uns) """ data_id: str method: str n_domains: int domain_key: str # Key in adata.obs where domain labels are stored domain_counts: dict[str, int] # Number of spots in each domain refined_domain_key: Optional[str] = ( None # Key for refined domains if refinement was applied ) embeddings_key: Optional[str] = ( None # Key in adata.obsm where embeddings are stored ) # Detailed statistics - excluded from MCP response statistics: dict[str, Any] = Field( default_factory=dict, exclude=True, # Exclude from JSON serialization to LLM ) class SpatialVariableGenesResult(BaseAnalysisResult): """Result of spatial variable genes identification. Note on serialization: To minimize MCP response size, detailed statistics are excluded from JSON serialization using Field(exclude=True). These fields are still stored in the Python object and saved to adata.var for downstream visualization and export. Access complete statistics via: - adata.var['spatialde_pval'], adata.var['spatialde_qval'] (SpatialDE) - adata.var['sparkx_pval'], adata.var['sparkx_qval'] (SPARK-X) """ data_id: str method: str # Method used for analysis # Summary statistics - always returned to LLM n_genes_analyzed: int # Total number of genes analyzed n_significant_genes: int # Total significant genes found (q < 0.05) # Top spatial genes - returned to LLM (truncated for token efficiency) spatial_genes: list[str] # Storage key for accessing full results in adata results_key: str # ============================================================ # Fields excluded from MCP response (stored in adata.var) # ============================================================ gene_statistics: dict[str, float] = Field( default_factory=dict, exclude=True, # Exclude from JSON serialization to LLM ) p_values: dict[str, float] = Field( default_factory=dict, exclude=True, ) q_values: dict[str, float] = Field( default_factory=dict, exclude=True, ) spatialde_results: Optional[dict[str, Any]] = Field( default=None, exclude=True, ) sparkx_results: Optional[dict[str, Any]] = Field( default=None, exclude=True, ) class CellCommunicationResult(BaseAnalysisResult): """Result of cell-cell communication analysis. All CCC results are stored in a unified structure at adata.uns["ccc"]. This model provides a summary for MCP response while full data is in adata. Note on serialization: To minimize MCP response size, detailed statistics are excluded. Access full results via adata.uns["ccc"]. Autocrine loops: Autocrine signaling occurs when source == target cell type. Automatically detected for cluster-based methods (LIANA cluster, CellPhoneDB, CellChat R, FastCCC). Not supported for spatial analysis. """ data_id: str method: str # "liana", "cellphonedb", "cellchat_r", "fastccc" species: str database: str analysis_type: str # "cluster" or "spatial" # LR pairs summary n_lr_pairs: int # Total LR pairs tested n_significant_pairs: int # Significant LR pairs top_lr_pairs: list[str] = Field(default_factory=list) # Format: "LIGAND_RECEPTOR" # Autocrine analysis (source == target) n_autocrine_loops: int = 0 top_autocrine_loops: list[str] = Field(default_factory=list) # Storage key (unified location) results_key: str = "ccc" # adata.uns["ccc"] # Detailed statistics - excluded from MCP response statistics: dict[str, Any] = Field( default_factory=dict, exclude=True, ) class EnrichmentResult(BaseAnalysisResult): """Result from gene set enrichment analysis Note on serialization: To minimize MCP response size (~12k tokens -> ~0.5k tokens), large dictionaries are excluded from JSON serialization using Field(exclude=True). These fields are still stored in the Python object and saved to adata.uns for downstream visualization. Fields included in MCP response (sent to LLM): - method, n_gene_sets, n_significant (basic info) - top_gene_sets, top_depleted_sets (top 10 pathway names) - spatial_scores_key (for spatial methods) Fields excluded from MCP response (stored in adata.uns): - enrichment_scores, pvalues, adjusted_pvalues (full dicts) - gene_set_statistics (detailed stats per pathway) - spatial_metrics (spatial autocorrelation data) """ # Basic information - always included in MCP response method: str # Method used (pathway_gsea, pathway_ora, etc.) n_gene_sets: int # Number of gene sets analyzed n_significant: int # Number of significant gene sets # Top results - always included (compact, just pathway names) top_gene_sets: list[str] # Top enriched gene sets (max 10) top_depleted_sets: list[str] # Top depleted gene sets (max 10) # Spatial info key - included spatial_scores_key: Optional[str] = None # Key in adata.obsm # ============================================================ # EXCLUDED FROM MCP RESPONSE - stored in adata.uns for viz # Full data available via visualize_data() tool # ============================================================ enrichment_scores: dict[str, float] = Field( default_factory=dict, exclude=True, # Exclude from JSON serialization to LLM ) pvalues: Optional[dict[str, float]] = Field( default=None, exclude=True, ) adjusted_pvalues: Optional[dict[str, float]] = Field( default=None, exclude=True, ) gene_set_statistics: dict[str, dict[str, Any]] = Field( default_factory=dict, exclude=True, ) spatial_metrics: Optional[dict[str, Any]] = Field( default=None, exclude=True, ) class CNVResult(BaseAnalysisResult): """Result of Copy Number Variation (CNV) analysis Note on serialization: For consistency with other result models, the statistics dict is excluded from JSON serialization. Key summary info is in explicit fields. Fields included in MCP response: - data_id, method, reference_key, reference_categories (basic info) - n_chromosomes, n_genes_analyzed (summary counts) - cnv_score_key (storage key) - visualization_available (status flag) Fields excluded from MCP response: - statistics (detailed CNV metrics) """ data_id: str method: str # Method used (e.g., "infercnvpy") reference_key: str # Column used for reference cells reference_categories: list[str] # Categories used as reference n_chromosomes: int # Number of chromosomes analyzed n_genes_analyzed: int # Number of genes analyzed cnv_score_key: Optional[str] = None # Key in adata.obsm (e.g., "X_cnv") visualization_available: bool = False # Whether visualization is available # Detailed statistics - excluded from MCP response statistics: Optional[dict[str, Any]] = Field( default=None, exclude=True, # Exclude from JSON serialization to LLM ) class DEGene(BaseAnalysisResult): """A single differentially expressed gene with statistics""" gene: str log2fc: float pvalue: float padj: float mean_expr_condition1: Optional[float] = None mean_expr_condition2: Optional[float] = None class CellTypeComparisonResult(BaseAnalysisResult): """Differential expression result for a single cell type""" cell_type: str n_cells_condition1: int n_cells_condition2: int n_samples_condition1: int n_samples_condition2: int n_significant_genes: int top_upregulated: list[DEGene] # Upregulated in condition1 top_downregulated: list[DEGene] # Downregulated in condition1 all_de_genes: list[DEGene] = Field( default_factory=list, exclude=True, # Exclude from MCP response to reduce size ) class ConditionComparisonResult(BaseAnalysisResult): """Result of multi-sample condition comparison analysis. Attributes: data_id: Dataset identifier method: Method used for differential expression comparison: Human-readable comparison string (e.g., "Treatment vs Control") condition_key: Column used for condition grouping condition1: First condition (experimental group) condition2: Second condition (reference group) sample_key: Column used for sample identification cell_type_key: Column used for cell type stratification (if provided) n_samples_condition1: Number of samples in condition1 n_samples_condition2: Number of samples in condition2 global_results: Results when no cell type stratification (cell_type_key=None) cell_type_results: Results stratified by cell type (when cell_type_key provided) results_key: Key in adata.uns where full results are stored statistics: Overall statistics about the comparison """ data_id: str method: str comparison: str condition_key: str condition1: str condition2: str sample_key: str cell_type_key: Optional[str] = None # Sample counts n_samples_condition1: int n_samples_condition2: int # Global results (when cell_type_key is None) global_n_significant: Optional[int] = None global_top_upregulated: Optional[list[DEGene]] = None global_top_downregulated: Optional[list[DEGene]] = None # Cell type stratified results (when cell_type_key is provided) cell_type_results: Optional[list[CellTypeComparisonResult]] = None # Storage keys results_key: str # Key in adata.uns for full results # Summary statistics statistics: dict[str, Any]

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cafferychen777/ChatSpatial'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

analysis.py•16.2 KiB