Statsource MCP Server

Verified
Overview InspectNew Schema Related Servers Reviews Score
statsource-mcp
mcp_server_stats
"""
MCP (Message Control Protocol) Server for Statistical Analysis

This module implements an MCP server that acts as a middleware between
clients (like Claude Desktop app) and our existing API. It runs independently
and forwards requests to the API whose URL is configurable via environment variables.
"""

import os
import json
import logging
import requests
from typing import Dict, List, Any, Optional, Union
from pydantic import BaseModel, Field, field_validator
from mcp.server.fastmcp import FastMCP
from datetime import datetime

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("mcp_server_stats")

# Read API location from environment variable with a default fallback
API_URL = "https://api.statsource.me"
API_KEY = os.getenv("API_KEY", None)  # Optional API key for authentication
# Database connection string from environment variable
DB_CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING", None)
DB_SOURCE_TYPE = os.getenv("DB_SOURCE_TYPE", "database")  # Default to database if not specified

# Initialize MCP server
mcp = FastMCP("ai_mcp_server")

# Define input models for data validation
class StatisticsRequest(BaseModel):
    """Request model for statistical operations."""
    operation: str = Field(..., description="Statistical operation to perform (mean, median, sum, etc.)")
    data: List[float] = Field(..., description="List of numeric data points")
    
    @field_validator('operation')
    @classmethod
    def validate_operation(cls, v):
        valid_operations = ['mean', 'median', 'sum', 'min', 'max', 'std', 'var', 'count']
        if v.lower() not in valid_operations:
            raise ValueError(f"Operation must be one of {valid_operations}")
        return v.lower()
    
    @field_validator('data')
    @classmethod
    def validate_data(cls, v):
        if not v:
            raise ValueError("Data list cannot be empty")
        return v

# Helper function to check if API is available
def is_api_available() -> bool:
    """
    Check if the API is available.
    
    Returns:
        bool: True if API is available, False otherwise
    """
    try:
        # Try to connect to the base URL
        response = requests.get(API_URL, timeout=5)
        return response.status_code < 500  # Consider 2xx, 3xx, 4xx as "available"
    except requests.RequestException:
        return False

# Helper function to make API calls
def call_api(endpoint: str, data: Dict[str, Any], params: Dict[str, Any] = None) -> Dict[str, Any]:
    """
    Make a request to the API.
    
    Args:
        endpoint: API endpoint path (without base URL)
        data: Request payload
        params: URL query parameters
        
    Returns:
        API response as dictionary
    
    Raises:
        Exception: If the API request fails
    """
    # Check if API is available first
    if not is_api_available():
        raise Exception(f"API at {API_URL} is not available")
    
    headers = {"Content-Type": "application/json"}
    
    # Add authentication if API key is provided
    if API_KEY:
        headers["API-Key"] = API_KEY
    
    try:
        logger.info(f"Calling API endpoint: {endpoint}")
        response = requests.post(f"{API_URL}{endpoint}", json=data, headers=headers, params=params)
        response.raise_for_status()  # Raise exception for 4XX/5XX responses
        return response.json()
    except requests.RequestException as e:
        logger.error(f"API request failed: {str(e)}")
        if hasattr(e, 'response') and e.response is not None:
            error_text = e.response.text
            status_code = e.response.status_code
            return {"error": f"API request failed with status {status_code}: {error_text}"}
        else:
            error_text = str(e)
            return {"error": f"API request failed: {error_text}"}

# Define MCP tools
@mcp.tool()
def suggest_feature(description: str, use_case: str, priority: str = "medium") -> str:
    """
    Suggest a new feature or improvement for the StatSource analytics platform.
    
    ### What this tool does:
    This tool allows you to submit feature suggestions or enhancement requests for 
    the StatSource platform. Suggestions are logged and reviewed by the development team.
    
    ### When to use this tool:
    - When a user asks for functionality that doesn't currently exist
    - When you identify gaps or limitations in the current analytics capabilities
    - When a user expresses frustration about missing capabilities
    - When you think of enhancements that would improve the user experience
    
    ### Required inputs:
    - description: A clear, detailed description of the suggested feature
    - use_case: Explanation of how and why users would use this feature
    
    ### Optional inputs:
    - priority: Suggested priority level ("low", "medium", "high")
    
    ### Returns:
    A confirmation message and reference ID for the feature suggestion.
    """
    try:
        # Format the request
        suggestion_data = {
            "description": description,
            "use_case": use_case,
            "priority": priority,
            "source": "ai_agent"
        }
        
        # Call the feature suggestion endpoint
        endpoint = "/api/v1/feature_suggestions"
        response = call_api(endpoint, suggestion_data)
        
        if "error" in response:
            return f"Error: {response['error']}"
        
        # Format the response
        suggestion_id = response.get("id", "unknown")
        return json.dumps({
            "status": "received",
            "message": "Thank you for your feature suggestion. Our team will review it.",
            "suggestion_id": f"FEAT-{suggestion_id}"
        }, indent=2)
    except Exception as e:
        return f"Error submitting feature suggestion: {str(e)}"

@mcp.tool()
def get_statistics(data_source: Optional[str] = None, source_type: Optional[str] = None, table_name: Optional[str] = None, columns: List[str] = [], statistics: Optional[List[str]] = None, query_type: str = "statistics", periods: Optional[int] = None) -> str:
    """
    Analyze data and calculate statistics or generate ML predictions based on provided parameters.
    
    ### What this tool does:
    This tool connects to our analytics API and provides two main functionalities:
    1. Statistical Analysis: Calculate various statistical measures on specified data columns
    2. ML Predictions: Generate time-series forecasts for future periods based on historical data
    
    ### IMPORTANT INSTRUCTIONS FOR AI AGENTS:
    - DO NOT make up or guess any parameter values, especially data sources or column names
    - NEVER, UNDER ANY CIRCUMSTANCES, create or invent database connection strings - this is a severe security risk
    - ALWAYS ask the user explicitly for all required information
    - For CSV files: The user MUST first upload their file to statsource.me, then provide the filename
    - For database connections: Ask the user for their exact PostgreSQL connection string - DO NOT GUESS OR MODIFY IT
    - For database sources: You MUST provide the table_name parameter with the exact table name
    - Never suggest default values, sample data, or example parameters - request specific information from the user
    - If the user has configured a default database connection in their MCP config, inform them it will be used if they don't specify a data source
    - If no database connection is provided in the MCP config and the user doesn't provide one, DO NOT PROCEED - ask user to provide connection details
    
    ### When to use this tool:
    - When a user needs statistical analysis of their data (means, medians, etc.)
    - When a user wants to predict future values based on historical trends
    - When analyzing trends, patterns, or distributions in datasets
    - When generating forecasts for business planning or decision-making
    
    ### Required inputs:
    - columns: List of column names to analyze or predict (ask user for exact column names in their data)
    
    ### Optional inputs:
    - data_source: Path to data file, database connection string, or API endpoint
      * For CSV: Filename of a previously uploaded file on statsource.me (ask user to upload first)
      * For Database: Full connection string (ask user for exact string)
      * If not provided, will use the connection string from MCP config if available
    - source_type: Type of data source ("csv", "database", or "api")
      * If not provided, will use the source type from MCP config if available
    - table_name: Name of the database table to use (REQUIRED for database sources)
      * Must be provided when source_type is "database"
      * Ask user for the exact table name in their database
    - statistics: List of statistics to calculate (only required for statistical analysis)
    - query_type: Type of query ("statistics" or "ml_prediction")
    - periods: Number of future periods to predict (only used for ML predictions)
    
    ### Valid statistics options:
    - Basic (free tier): "mean", "median", "min", "max", "count", "sum", "std", "var"
    - Advanced (paid tier): "skewness", "kurtosis", "percentile", "histogram", "correlation"
    
    ### ML Prediction features:
    - Time series forecasting with customizable prediction periods
    - Trend direction analysis ("increasing", "decreasing", "stable")
    - Model quality metrics (r-squared, slope)
    - Works with numeric data columns from any supported data source
    
    ### Returns:
    For statistics queries:
    - Statistical measures for each requested column and statistic
    
    For ML prediction queries:
    - Predicted future values for specified columns
    - Trend direction and model quality metrics
    - R-squared value and slope indicators
    
    ### Examples of QUESTIONS to ask users (DO NOT use these as defaults):
    1. "Have you already uploaded your CSV file to statsource.me? What is the filename?"
    2. "What is your exact PostgreSQL connection string?" (if not configured in MCP config)
    3. "Which specific columns in your data would you like to analyze?"
    4. "Which statistics would you like to calculate for these columns?"
    5. "How many future periods would you like to predict?"
    6. "What is the exact name of the table in your database that contains this data?"
    
    ### Configuration:
    Users can set a default database connection string in their MCP config:
    
    ```json
    {
        "mcpServers": {
            "statsource": {
                "command": "python",
                "args": ["path/to/mcp_server.py"],
                "env": {
                    "API_KEY": "your_api_key",
                    "DB_CONNECTION_STRING": "postgresql://username:password@localhost:5432/your_db",
                    "DB_SOURCE_TYPE": "database"
                }
            }
        }
    }
    ```
    """
    try:
        # Use connection string from config if available and none was provided
        if data_source is None and DB_CONNECTION_STRING is not None:
            data_source = DB_CONNECTION_STRING
            if source_type is None:
                source_type = DB_SOURCE_TYPE
        
        # Check if we have the minimum required data
        if not columns:
            return json.dumps({
                "error": "No columns specified. Please provide column names to analyze."
            }, indent=2)
        
        # Validate that table_name is provided for database sources
        if source_type == "database" and not table_name:
            return json.dumps({
                "error": "Table name is required for database sources. Please specify the table_name parameter."
            }, indent=2)
            
        # Format the request based on the query type
        if query_type == "statistics":
            if not statistics:
                return json.dumps({
                    "error": "No statistics specified. Please provide a list of statistics to calculate."
                }, indent=2)
            
            # Prepare statistics request
            request_data = {
                "data_source": DB_CONNECTION_STRING if data_source is None else data_source,
                "source_type": source_type or DB_SOURCE_TYPE,
                "columns": columns,
                "statistics": statistics,
                "query_type": query_type
            }
            
            # Add table_name for database sources
            if source_type == "database" and table_name:
                request_data["table_name"] = table_name
            
            # Call the statistics endpoint
            endpoint = "/api/v1/get_statistics"
            
        elif query_type == "ml_prediction":
            # Convert periods to int if it's a string
            if isinstance(periods, str):
                try:
                    periods = int(periods)
                except ValueError:
                    return json.dumps({
                        "error": "Invalid prediction periods. Must be a valid integer."
                    }, indent=2)
                    
            if not periods or periods <= 0:
                return json.dumps({
                    "error": "Invalid prediction periods. Please provide a positive number of periods to predict."
                }, indent=2)
            
            # Prepare ML prediction request
            request_data = {
                "data_source": DB_CONNECTION_STRING if data_source is None else data_source,
                "source_type": source_type or DB_SOURCE_TYPE,
                "columns": columns
            }
            
            # Add table_name for database sources
            if (source_type == "database" or DB_SOURCE_TYPE == "database") and table_name:
                request_data["table_name"] = table_name
            
            # Set up query parameters for ML prediction
            query_params = {
                "query_type": "ml_prediction",  # API expects lowercase
                "periods": periods
            }
            
            # Call the statistics endpoint for ML prediction as well
            endpoint = "/api/v1/get_statistics"
            
        else:
            return json.dumps({
                "error": f"Invalid query type: {query_type}. Must be 'statistics' or 'ml_prediction'."
            }, indent=2)
        
        # Call the API and return the response
        response = call_api(endpoint, request_data, query_params if query_type == "ml_prediction" else None)
        
        if "error" in response:
            return json.dumps({"error": response["error"]}, indent=2)
        
        # Return formatted response
        return json.dumps(response, indent=2)
    
    except Exception as e:
        return json.dumps({"error": f"Error getting statistics: {str(e)}"}, indent=2)

def run_server():
    """Run the MCP server."""
    logger.info("Starting MCP Server for statistics...")
    mcp.run()

if __name__ == "__main__":
    run_server()