ols_with_spatial_diagnostics_safe
Perform spatial diagnostics and run Ordinary Least Squares (OLS) regression on geospatial datasets. Input shapefiles or GeoPackages, specify dependent and independent variables, and optionally define spatial weights for analysis.
Instructions
Safe MCP pipeline: Read shapefile, build/load W, convert numeric, check NaNs, run OLS.
Parameters:
- data_path: path to shapefile or GeoPackage
- y_field: dependent variable column name
- x_fields: list of independent variable column names
- weights_path: optional path to existing weights file (.gal or .gwt)
- weights_method: 'queen', 'rook', 'distance_band', or 'knn' (used if weights_path not provided)
- id_field: optional attribute name to use as observation IDs
- threshold: required if method='distance_band'
- k: required if method='knn'
- binary: True for binary weights (DistanceBand only)
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| binary | No | ||
| data_path | Yes | ||
| id_field | No | ||
| k | No | ||
| threshold | No | ||
| weights_method | No | queen | |
| weights_path | No | ||
| x_fields | Yes | ||
| y_field | Yes |
Implementation Reference
- src/gis_mcp/pysal_functions.py:636-734 (handler)The core handler function 'ols_with_spatial_diagnostics_safe' decorated with @gis_mcp.tool(). It loads geospatial data, constructs or loads spatial weights, performs OLS spatial lag regression using libpysal.model.ML_Lag, computes diagnostics like Moran's I on residuals, and returns structured results with error handling.@gis_mcp.tool() def ols_with_spatial_diagnostics_safe( data_path: str, y_field: str, x_fields: List[str], weights_path: Optional[str] = None, weights_method: str = "queen", id_field: Optional[str] = None, threshold: Optional[float] = None, k: Optional[int] = None, binary: bool = True ) -> Dict[str, Any]: """ Safe MCP pipeline: Read shapefile, build/load W, convert numeric, check NaNs, run OLS. Parameters: - data_path: path to shapefile or GeoPackage - y_field: dependent variable column name - x_fields: list of independent variable column names - weights_path: optional path to existing weights file (.gal or .gwt) - weights_method: 'queen', 'rook', 'distance_band', or 'knn' (used if weights_path not provided) - id_field: optional attribute name to use as observation IDs - threshold: required if method='distance_band' - k: required if method='knn' - binary: True for binary weights (DistanceBand only) """ try: # --- Step 1: Read data --- if not os.path.exists(data_path): return {"status": "error", "message": f"Data file not found: {data_path}"} gdf = gpd.read_file(data_path) if gdf.empty: return {"status": "error", "message": "Input file contains no features"} # --- Step 2: Extract and convert y and X --- if y_field not in gdf.columns: return {"status": "error", "message": f"Dependent variable '{y_field}' not found in dataset"} if any(xf not in gdf.columns for xf in x_fields): return {"status": "error", "message": f"Independent variable(s) {x_fields} not found in dataset"} y = gdf[[y_field]].astype(float).values X = gdf[x_fields].astype(float).values # --- Step 3: Check for NaNs or infinite values --- if not np.all(np.isfinite(y)): return {"status": "error", "message": "Dependent variable contains NaN or infinite values"} if not np.all(np.isfinite(X)): return {"status": "error", "message": "Independent variables contain NaN or infinite values"} # --- Step 4: Load or build weights --- if weights_path: if not os.path.exists(weights_path): return {"status": "error", "message": f"Weights file not found: {weights_path}"} w = libpysal.open(weights_path).read() else: coords = [(geom.x, geom.y) for geom in gdf.geometry] wm = weights_method.lower() if wm == "queen": w = libpysal.weights.Queen.from_dataframe(gdf, idVariable=id_field) elif wm == "rook": w = libpysal.weights.Rook.from_dataframe(gdf, idVariable=id_field) elif wm == "distance_band": if threshold is None: return {"status": "error", "message": "Threshold is required for distance_band"} ids = gdf[id_field].tolist() if id_field and id_field in gdf.columns else None w = libpysal.weights.DistanceBand(coords, threshold=threshold, binary=binary, ids=ids) elif wm == "knn": if k is None: return {"status": "error", "message": "k is required for knn"} ids = gdf[id_field].tolist() if id_field and id_field in gdf.columns else None w = libpysal.weights.KNN(coords, k=k, ids=ids) else: return {"status": "error", "message": f"Unsupported weights method: {weights_method}"} w.transform = "r" # Row-standardize for regression # --- Step 5: Fit OLS with spatial diagnostics --- ols_model = libpysal.model.ML_Lag.from_dataframe(gdf, y_field, x_fields, w=w, name_y=y_field, name_x=x_fields) # --- Step 6: Collect results --- results = { "n_obs": ols_model.n, "r2": float(ols_model.r2), "std_error": ols_model.std_err.tolist(), "betas": {name: float(beta) for name, beta in zip(ols_model.name_x + [ols_model.name_y], ols_model.betas.flatten())}, "moran_residual": float(ols_model.moran_res[0]) if hasattr(ols_model, "moran_res") else None, "moran_pvalue": float(ols_model.moran_res[1]) if hasattr(ols_model, "moran_res") else None, } return { "status": "success", "message": "OLS regression with spatial diagnostics completed successfully", "result": results } except Exception as e: logger.error(f"Error in ols_with_spatial_diagnostics_safe: {str(e)}") return {"status": "error", "message": f"Failed to run OLS regression: {str(e)}"}