@arizeai/phoenix-mcp

Official

Overview Schema Related Servers Score Discussions

phoenix
scripts
llm_token_pricing_tables

litellm_model_prices.py•4.91 KiB

"""Script to process and analyze LLM model pricing data from LiteLLM.

This script fetches model pricing information from LiteLLM's pricing JSON file,
processes it into a structured format, and creates two CSV files:
1. model_prices.csv - Contains the full model pricing information
2. model_prices_by_token_type.csv - Contains a token-type focused view of the pricing data

The script handles nested JSON data by flattening it into a tabular format,
making it easier to analyze and compare different model pricing structures.
"""

from typing import Any
from urllib.parse import urlparse

import pandas as pd
import requests


def flatten_dict(d: dict[str, Any], parent_key: str = "", sep: str = "_") -> dict[str, Any]:
    """Flatten nested dictionaries with custom separator.

    Args:
        d: Dictionary to flatten
        parent_key: Parent key for nested dictionaries
        sep: Separator to use between keys

    Returns:
        Flattened dictionary
    """
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)


def process_model_prices(url: str) -> pd.DataFrame:
    """Process the model prices JSON file from a URL into a pandas DataFrame.

    Args:
        url: URL to fetch the JSON data from

    Returns:
        DataFrame containing the processed model prices

    Raises:
        ValueError: If URL is invalid
        requests.RequestException: If request fails
        ValueError: If JSON parsing fails
    """
    # Validate URL
    try:
        result = urlparse(url)
        if not all([result.scheme, result.netloc]):
            raise ValueError("Invalid URL format")
    except Exception as e:
        raise ValueError(f"Invalid URL: {str(e)}")

    # Fetch the JSON data from URL
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
    except requests.RequestException as e:
        raise requests.RequestException(f"Failed to fetch data: {str(e)}")
    except ValueError as e:
        raise ValueError(f"Failed to parse JSON: {str(e)}")

    # Remove the sample_spec entry as it's just documentation
    if "sample_spec" in data:
        del data["sample_spec"]

    # Process each model's data
    processed_data = []
    for model_name, model_info in data.items():
        # Flatten the nested dictionary
        flat_info = flatten_dict(model_info)
        # Add model name
        flat_info["model_name"] = model_name
        processed_data.append(flat_info)

    # Create DataFrame
    df = pd.DataFrame(processed_data)

    # Reorder columns to put model_name first
    cols = ["model_name"] + [col for col in df.columns if col != "model_name"]
    df = df[cols]

    # Sort by litellm_provider and model_name
    df = df.sort_values(["litellm_provider", "model_name"])

    return df


def create_token_type_df(df: pd.DataFrame) -> pd.DataFrame:
    """Create a transposed version of the DataFrame focusing on cost-related columns.

    Args:
        df: Input DataFrame containing model prices

    Returns:
        DataFrame with token type information
    """
    # Get all columns that contain '_cost_per_'
    cost_columns = [col for col in df.columns if "_cost_per_" in col or col.endswith("_cost")]

    # Select only the columns we want to keep
    keep_columns = ["model_name", "litellm_provider"] + cost_columns
    df_subset = df[keep_columns].copy()

    # Melt the DataFrame to create the token_type format
    df_melted = pd.melt(
        df_subset,
        id_vars=["model_name", "litellm_provider"],
        value_vars=cost_columns,
        var_name="unit_type",
        value_name="unit_cost",
    )

    # Remove rows where unit_cost is missing
    df_melted = df_melted.dropna(subset=["unit_cost"])

    # Reorder columns to put litellm_provider first
    df_melted = df_melted[["litellm_provider", "model_name", "unit_type", "unit_cost"]]

    # Sort by model_name, litellm_provider, and token_type
    df_melted = df_melted.sort_values(["litellm_provider", "model_name", "unit_type"])

    return df_melted


if __name__ == "__main__":
    # URL for the model prices JSON file
    url = "https://raw.githubusercontent.com/BerriAI/litellm/refs/heads/main/model_prices_and_context_window.json"

    # Process the file
    df = process_model_prices(url)

    # Display basic information about the DataFrame
    print("\nDataFrame Info:")
    print(df.info())

    # Save to CSV for easy viewing
    df.to_csv("litellm_model_prices.csv", index=False)
    print("\nData has been saved to 'litellm_model_prices.csv'")

    # Create and save the token type version
    df_token_type = create_token_type_df(df)
    df_token_type.to_csv("litellm_model_prices_by_token_type.csv", index=False)
    print("\nToken type data has been saved to 'litellm_model_prices_by_token_type.csv'")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

litellm_model_prices.py•4.91 KiB

"""Script to process and analyze LLM model pricing data from LiteLLM.

This script fetches model pricing information from LiteLLM's pricing JSON file,
processes it into a structured format, and creates two CSV files:
1. model_prices.csv - Contains the full model pricing information
2. model_prices_by_token_type.csv - Contains a token-type focused view of the pricing data

The script handles nested JSON data by flattening it into a tabular format,
making it easier to analyze and compare different model pricing structures.
"""

from typing import Any
from urllib.parse import urlparse

import pandas as pd
import requests


def flatten_dict(d: dict[str, Any], parent_key: str = "", sep: str = "_") -> dict[str, Any]:
    """Flatten nested dictionaries with custom separator.

    Args:
        d: Dictionary to flatten
        parent_key: Parent key for nested dictionaries
        sep: Separator to use between keys

    Returns:
        Flattened dictionary
    """
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)


def process_model_prices(url: str) -> pd.DataFrame:
    """Process the model prices JSON file from a URL into a pandas DataFrame.

    Args:
        url: URL to fetch the JSON data from

    Returns:
        DataFrame containing the processed model prices

    Raises:
        ValueError: If URL is invalid
        requests.RequestException: If request fails
        ValueError: If JSON parsing fails
    """
    # Validate URL
    try:
        result = urlparse(url)
        if not all([result.scheme, result.netloc]):
            raise ValueError("Invalid URL format")
    except Exception as e:
        raise ValueError(f"Invalid URL: {str(e)}")

    # Fetch the JSON data from URL
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
    except requests.RequestException as e:
        raise requests.RequestException(f"Failed to fetch data: {str(e)}")
    except ValueError as e:
        raise ValueError(f"Failed to parse JSON: {str(e)}")

    # Remove the sample_spec entry as it's just documentation
    if "sample_spec" in data:
        del data["sample_spec"]

    # Process each model's data
    processed_data = []
    for model_name, model_info in data.items():
        # Flatten the nested dictionary
        flat_info = flatten_dict(model_info)
        # Add model name
        flat_info["model_name"] = model_name
        processed_data.append(flat_info)

    # Create DataFrame
    df = pd.DataFrame(processed_data)

    # Reorder columns to put model_name first
    cols = ["model_name"] + [col for col in df.columns if col != "model_name"]
    df = df[cols]

    # Sort by litellm_provider and model_name
    df = df.sort_values(["litellm_provider", "model_name"])

    return df


def create_token_type_df(df: pd.DataFrame) -> pd.DataFrame:
    """Create a transposed version of the DataFrame focusing on cost-related columns.

    Args:
        df: Input DataFrame containing model prices

    Returns:
        DataFrame with token type information
    """
    # Get all columns that contain '_cost_per_'
    cost_columns = [col for col in df.columns if "_cost_per_" in col or col.endswith("_cost")]

    # Select only the columns we want to keep
    keep_columns = ["model_name", "litellm_provider"] + cost_columns
    df_subset = df[keep_columns].copy()

    # Melt the DataFrame to create the token_type format
    df_melted = pd.melt(
        df_subset,
        id_vars=["model_name", "litellm_provider"],
        value_vars=cost_columns,
        var_name="unit_type",
        value_name="unit_cost",
    )

    # Remove rows where unit_cost is missing
    df_melted = df_melted.dropna(subset=["unit_cost"])

    # Reorder columns to put litellm_provider first
    df_melted = df_melted[["litellm_provider", "model_name", "unit_type", "unit_cost"]]

    # Sort by model_name, litellm_provider, and token_type
    df_melted = df_melted.sort_values(["litellm_provider", "model_name", "unit_type"])

    return df_melted


if __name__ == "__main__":
    # URL for the model prices JSON file
    url = "https://raw.githubusercontent.com/BerriAI/litellm/refs/heads/main/model_prices_and_context_window.json"

    # Process the file
    df = process_model_prices(url)

    # Display basic information about the DataFrame
    print("\nDataFrame Info:")
    print(df.info())

    # Save to CSV for easy viewing
    df.to_csv("litellm_model_prices.csv", index=False)
    print("\nData has been saved to 'litellm_model_prices.csv'")

    # Create and save the token type version
    df_token_type = create_token_type_df(df)
    df_token_type.to_csv("litellm_model_prices_by_token_type.csv", index=False)
    print("\nToken type data has been saved to 'litellm_model_prices_by_token_type.csv'")