Skip to main content
Glama

@arizeai/phoenix-mcp

Official
by Arize-ai
plotresults.py13.8 kB
# type:ignore import os from typing import Dict import pandas as pd from matplotlib import pyplot as plt # type:ignore def remove_all_zeros_rows(df: pd.DataFrame) -> pd.DataFrame: filtered_df = df[ ( df[ [ "context_precision_at_1", "context_precision_at_2", "context_precision_at_3", "context_precision_at_4", ] ] != 0 ).any(axis=1) ] return filtered_df def plot_mrr_graphs( all_data: Dict[int, Dict[str, Dict[int, pd.DataFrame]]], save_dir: str = "./", show: bool = True, remove_zero: bool = True, ) -> None: # Determine the number of rows (distinct chunk sizes) and columns (methods) for the subplot grid chunk_sizes = list(all_data.keys()) n_rows = len(chunk_sizes) n_cols = max(len(method_data) for method_data in all_data.values()) # Compute the global minimum and maximum MRR for setting consistent Y-axis min_mrr = 0 max_mrr = 1.1 # Create a figure with subplots fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5 * n_rows), squeeze=False) for i, (chunk_size, method_data) in enumerate(sorted(all_data.items())): for j, (method, k_data) in enumerate(sorted(method_data.items())): mrrs_dict = {} for k, df in k_data.items(): if remove_zero: df = remove_all_zeros_rows(df) if method == "multistep": continue mrr_i = (1 / df[f"rank_at_{k}"]).mean() mrrs_dict[k] = mrr_i # Convert the dictionary to a DataFrame for easier plotting df_mrrs = pd.Series(mrrs_dict).to_frame(name="MRR") # Plot on the respective subplot axis df_mrrs.plot(kind="bar", ax=axes[i][j], legend=False, ylim=[min_mrr, max_mrr]) axes[i][j].set_title(f"Chunk Size = {chunk_size}, Method = {method}") axes[i][j].set_ylabel("MRR") # Add legend to the last plot in a row if j == n_cols - 1: axes[i][j].legend(title="K", bbox_to_anchor=(1, 1)) # Adjust layout and save plt.tight_layout() plt.savefig(f"{save_dir}/all_mrr.png") if show: plt.show() else: plt.close(fig) def plot_ndcg_graphs( all_data: Dict[int, Dict[str, Dict[int, pd.DataFrame]]], save_dir: str = "./", show: bool = True, remove_zero: bool = True, ) -> None: # Determine the number of rows (distinct chunk sizes) and columns (methods) for the subplot grid chunk_sizes = list(all_data.keys()) n_rows = len(chunk_sizes) n_cols = max(len(method_data) for method_data in all_data.values()) max_average_ndcg = 1.1 # Get unique 'k' values for consistent X-axis set(k for method_data in all_data.values() for k in method_data.keys()) # Create a figure with subplots fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5 * n_rows), squeeze=False) for i, (chunk_size, method_data) in enumerate(sorted(all_data.items())): for j, (method, k_data) in enumerate(sorted(method_data.items())): average_ndcgs_dict = {} for k, df in k_data.items(): if remove_zero: df = remove_all_zeros_rows(df) if method == "multistep": continue average_ndcg_i = df[f"ndcg_at_{k}"].mean() average_ndcgs_dict[k] = average_ndcg_i # Convert the dictionary to a DataFrame for easier plotting df_average_ndcgs = pd.Series(average_ndcgs_dict).to_frame(name="Average NDCG") # Plot on the respective subplot axis df_average_ndcgs.plot(kind="bar", ax=axes[i][j], legend=False) axes[i][j].set_title(f"Chunk Size = {chunk_size}, Method = {method}") axes[i][j].set_ylabel("Average NDCG") axes[i][j].set_ylim( 0, max_average_ndcg * 1.1 ) # Set consistent Y-axis with a small margin # Add legend to the last plot in a row if j == n_cols - 1: axes[i][j].legend(title="K", bbox_to_anchor=(1, 1)) # Adjust layout and save plt.tight_layout() plt.savefig(f"{save_dir}/all_ndcg.png") if show: plt.show() else: plt.close(fig) def plot_latency_graphs( all_data: Dict[int, Dict[str, Dict[int, pd.DataFrame]]], save_dir: str = "./", show: bool = True, ) -> None: # Determine the number of rows (distinct chunk sizes) and columns (methods) for the subplot grid chunk_sizes = list(all_data.keys()) n_rows = len(chunk_sizes) n_cols = max(len(method_data) for method_data in all_data.values()) # Compute the global maximum median latency for setting consistent Y-axis max_median_latency = 0.0 for _, method_data in all_data.items(): for _, k_data in method_data.items(): for _, df in k_data.items(): current_median = df["response_latency"].median() if current_median > max_median_latency: max_median_latency = current_median # Get unique 'k' values for consistent X-axis set(k for method_data in all_data.values() for k in method_data.keys()) # Create a figure with subplots fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5 * n_rows), squeeze=False) # Iterate over all_data items to plot graphs for i, (chunk_size, method_data) in enumerate(sorted(all_data.items())): for j, (method, k_data) in enumerate(sorted(method_data.items())): median_latency_dict = {} for k, df in k_data.items(): median_latency = df["response_latency"].median() median_latency_dict[k] = median_latency # Convert the dictionary to a DataFrame for easier plotting df_median_latency = pd.Series(median_latency_dict).to_frame(name="Median Latency") # Plot on the respective subplot axis df_median_latency.plot(kind="bar", ax=axes[i][j], legend=False) axes[i][j].set_title(f"Chunk Size = {chunk_size}, Method = {method}") axes[i][j].set_ylabel("Median Latency (seconds)") axes[i][j].set_ylim( 0, max_median_latency * 1.1 ) # Set consistent Y-axis with a small margin # Add legend to the last plot in a row if j == n_cols - 1: axes[i][j].legend(title="K", bbox_to_anchor=(1, 1)) # Adjust layout and save plt.tight_layout() plt.savefig(os.path.join(save_dir, "median_latency_all.png")) if show: plt.show() else: plt.close(fig) def plot_mean_average_precision_graphs( all_data: Dict[int, Dict[str, Dict[int, pd.DataFrame]]], save_dir: str = "./", show: bool = True, remove_zero: bool = True, ) -> None: # Determine the number of rows (distinct chunk sizes) and columns (methods) for the subplot grid chunk_sizes = list(all_data.keys()) n_rows = len(chunk_sizes) n_cols = max(len(method_data) for method_data in all_data.values()) max_macp = 1.1 # Get unique 'k' values for consistent X-axis set(k for method_data in all_data.values() for k in method_data.keys()) # Create a figure with subplots fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5 * n_rows), squeeze=False) for i, (chunk_size, method_data) in enumerate(sorted(all_data.items())): for j, (method, k_data) in enumerate(sorted(method_data.items())): mean_average_precisions_dict = {} for k, df in k_data.items(): if remove_zero: df = remove_all_zeros_rows(df) if method == "multistep": continue macp_i = df[f"average_context_precision_at_{k}"].mean() mean_average_precisions_dict[k] = macp_i # Convert the dictionary to a DataFrame for easier plotting df_mean_average_precisions = pd.Series(mean_average_precisions_dict).to_frame( name="MACP" ) # Plot on the respective subplot axis df_mean_average_precisions.plot(kind="bar", ax=axes[i][j], legend=False) axes[i][j].set_title(f"Chunk Size = {chunk_size}, Method = {method}") axes[i][j].set_ylabel("MACP") axes[i][j].set_ylim(0, max_macp * 1.1) # Set consistent Y-axis with a small margin # Add legend to the last plot in a row if j == n_cols - 1: axes[i][j].legend(title="K", bbox_to_anchor=(1, 1)) # Adjust layout and save plt.tight_layout() plt.savefig(f"{save_dir}/all_macp.png") if show: plt.show() else: plt.close(fig) def plot_mean_precision_graphs( all_data: Dict[int, Dict[str, Dict[int, pd.DataFrame]]], save_dir: str = "./", show: bool = True, remove_zero: bool = True, ) -> None: # Determine the number of rows (distinct chunk sizes) and columns (methods) for the subplot grid chunk_sizes = list(all_data.keys()) n_rows = len(chunk_sizes) n_cols = max(len(method_data) for method_data in all_data.values()) # Create a figure with subplots fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5 * n_rows), squeeze=False) # To make the y-height equal for all graphs max_y_val = 1.1 # Iterate over all_data items to plot graphs for i, (chunk_size, method_data) in enumerate(sorted(all_data.items())): for j, (method, k_data) in enumerate(sorted(method_data.items())): mean_precisions_dict = {} for k, df in k_data.items(): if remove_zero: df = remove_all_zeros_rows(df) if method == "multistep": continue mean_precision_i = df[f"context_precision_at_{k}"].mean() mean_precisions_dict[k] = mean_precision_i # Convert the dictionary to a DataFrame for easier plotting df_mean_precisions = pd.Series(mean_precisions_dict).to_frame(name="Mean Precision") # Plot on the respective subplot axis df_mean_precisions.plot(kind="bar", ax=axes[i][j], legend=False) axes[i][j].set_title(f"Chunk Size = {chunk_size}, Method = {method}") axes[i][j].set_ylabel("Mean Precision") axes[i][j].set_ylim(0, max_y_val) # Setting equal y-height for all plots # Add legend to the last plot in a row if j == n_cols - 1: axes[i][j].legend(title="K", bbox_to_anchor=(1, 1)) # Adjust layout and save plt.tight_layout() plt.savefig(os.path.join(save_dir, "all_mean_precisions.png")) if show: plt.show() else: plt.close(fig) def plot_percentage_incorrect( all_data: Dict[int, Dict[str, Dict[int, pd.DataFrame]]], save_dir: str = "./", show: bool = True, remove_zero: bool = True, ) -> None: """ Plot the percentage of 'incorrect' values for each chunk, method, and k. :param all_data: Dictionary containing data grouped by 'chunk_size', 'method', and 'k'. :param save_dir: Directory where the output plot will be saved. :param show: Whether to display the plot. """ chunk_sizes = list(all_data.keys()) n_rows = len(chunk_sizes) n_cols = max( len(method_data) * max(len(k_data) for k_data in method_data.values()) for method_data in all_data.values() ) # Create a figure with subplots fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5 * n_rows), squeeze=False) for i, (chunk_size, method_data) in enumerate(sorted(all_data.items())): col_counter = 0 # Reset column counter for each chunk_size for j, (method, k_data) in enumerate(sorted(method_data.items())): for k, df in k_data.items(): if remove_zero: df = remove_all_zeros_rows(df) # Calculate the percentage of "incorrect" values percent_incorrect = ( df["qa_evals"].value_counts(normalize=True).get("incorrect", 0) ) * 100 # Plot the percentage bars = axes[i][col_counter].bar(["incorrect"], [percent_incorrect], color=["red"]) # Multi-line title title = f"Chunk Size: {chunk_size}\nMethod: {method}\nK: {k}" axes[i][col_counter].set_title( title, fontsize=10, y=1.08 ) # Adjust fontsize and y-position of title axes[i][col_counter].set_ylim(0, 105) axes[i][col_counter].set_ylabel("Percentage") axes[i][col_counter].set_xlabel("qa_evals") # Adding the text label above the bar for bar in bars: yval = bar.get_height() axes[i][col_counter].text( bar.get_x() + bar.get_width() / 2, yval + 2, round(yval, 2), ha="center", va="bottom", color="black", weight="bold", ) col_counter += 1 # Move to the next column for the next k value # Adjust layout spacing for titles fig.tight_layout(pad=3.0) # Save the plot to a file plt.savefig(os.path.join(save_dir, "percentage_incorrect_plot.png"), dpi=300) # Display the plot if 'show' is True if show: plt.show() else: plt.close(fig)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Arize-ai/phoenix'

If you have feedback or need assistance with the MCP directory API, please join our Discord server