Skip to main content
Glama

join_counts

Calculate spatial autocorrelation for binary variables in shapefiles to identify clustering patterns using join count statistics.

Instructions

Global Binary Join Counts.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
shapefile_pathYes
dependent_varNoLAND_USE
target_crsNoEPSG:4326
distance_thresholdNo

Implementation Reference

  • Main handler for the 'join_counts' MCP tool. Loads shapefile, creates spatial weights, computes global binary join counts using esda.Join_Counts, and returns statistics including join counts, expected, variance, z-score, p-value, and data preview.
    @gis_mcp.tool()
    def join_counts(shapefile_path: str, dependent_var: str = "LAND_USE", target_crs: str = "EPSG:4326",
                    distance_threshold: float = 100000) -> Dict[str, Any]:
        """Global Binary Join Counts."""
        gdf, y, w, (threshold, unit), err = pysal_load_data(shapefile_path, dependent_var, target_crs, distance_threshold)
        if err:
            return {"status": "error", "message": err}
    
        # Join counts requires binary/categorical data - user must ensure y is binary (0/1 or True/False)
        import esda
        stat = esda.Join_Counts(y, w)
        preview = gdf[['geometry', dependent_var]].head(5).copy()
        preview['geometry'] = preview['geometry'].apply(lambda g: g.wkt)
    
        # Join_Counts attributes: J (total joins), bb, ww, bw, etc.
        join_count_val = None
        if hasattr(stat, "J"):
            join_count_val = float(stat.J)
        elif hasattr(stat, "jc"):
            join_count_val = float(stat.jc)
        elif hasattr(stat, "join_count"):
            join_count_val = float(stat.join_count)
        
        # Handle expected, variance, z_score - these might be DataFrames or scalars
        def safe_float(val):
            """Convert value to float, handling DataFrames and numpy types."""
            if val is None:
                return None
            if isinstance(val, pd.DataFrame):
                # If it's a DataFrame, extract the first value
                return float(val.iloc[0, 0]) if not val.empty else None
            if isinstance(val, (np.ndarray, list, tuple)):
                return float(val[0]) if len(val) > 0 else None
            try:
                return float(val)
            except (ValueError, TypeError):
                return None
        
        expected_val = getattr(stat, "expected", None)
        variance_val = getattr(stat, "variance", None)
        z_score_val = getattr(stat, "z_score", None)
        p_val = None
        if hasattr(stat, "p_value"):
            p_val = safe_float(stat.p_value)
        elif hasattr(stat, "p_sim"):
            p_val = safe_float(stat.p_sim)
        
        return {
            "status": "success",
            "message": f"Join Counts completed successfully (threshold: {threshold} {unit})",
            "result": {
                "join_counts": join_count_val,
                "expected": safe_float(expected_val),
                "variance": safe_float(variance_val),
                "z_score": safe_float(z_score_val),
                "p_value": p_val,
                "data_preview": preview.to_dict(orient="records")
            }
        }
  • Shared helper function called by 'join_counts' (and other ESDA tools) to load GeoDataFrame, validate inputs, reproject, compute effective distance threshold, create row-standardized DistanceBand weights, and handle islands by zeroing their weights.
    def pysal_load_data(shapefile_path: str, dependent_var: str, target_crs: str, distance_threshold: float):
        """Common loader and weight creation for esda statistics."""
        if not os.path.exists(shapefile_path):
            return None, None, None, None, f"Shapefile not found: {shapefile_path}"
    
        gdf = gpd.read_file(shapefile_path)
        if dependent_var not in gdf.columns:
            return None, None, None, None, f"Dependent variable '{dependent_var}' not found in shapefile columns"
    
        gdf = gdf.to_crs(target_crs)
    
        effective_threshold = distance_threshold
        unit = "meters"
        if target_crs.upper() == "EPSG:4326":
            effective_threshold = distance_threshold / 111000
            unit = "degrees"
    
        y = gdf[dependent_var].values.astype(np.float64)
        import libpysal
        w = libpysal.weights.DistanceBand.from_dataframe(gdf, threshold=effective_threshold, binary=False)
        w.transform = 'r'
    
        for island in w.islands:
            w.weights[island] = [0] * len(w.weights[island])
            w.cardinalities[island] = 0
    
        return gdf, y, w, (effective_threshold, unit), None
  • MCP resource that lists available ESDA spatial analysis tools, including 'join_counts', discoverable via gis://operations/esda.
    @gis_mcp.resource("gis://operations/esda")
    def get_spatial_operations() -> Dict[str, List[str]]:
        """List available spatial analysis operations. This is for esda library. They are using pysal library."""
        return {
            "operations": [
                "getis_ord_g",
                "morans_i",
                "gearys_c",
                "gamma_statistic",
                "moran_local",
                "getis_ord_g_local",
                "join_counts",
                "join_counts_local",
                "adbscan"
            ]
        }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mahdin75/gis-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server