We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/mckinsey/vizro'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
"""Utility functions for the Vizro MCP."""
import base64
import gzip
import io
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Literal
from urllib.parse import quote, urlencode
import pandas as pd
import vizro
import vizro.models as vm
from pydantic.json_schema import GenerateJsonSchema
from vizro.models._base import _format_and_lint
from vizro_mcp._utils.configs import DFInfo, DFMetaData
if TYPE_CHECKING:
from vizro_mcp._schemas.schemas import ChartPlan
# PyCafe URL for Vizro snippets
PYCAFE_URL = "https://py.cafe"
@dataclass
class VizroCodeAndPreviewLink:
python_code: str
pycafe_url: str
def convert_github_url_to_raw(path_or_url: str) -> str:
"""Convert a GitHub URL to a raw URL if it's a GitHub URL, otherwise return the original path or URL."""
github_pattern = r"https?://(?:www\.)?github\.com/([^/]+)/([^/]+)/(?:blob|raw)/([^/]+)/(.+)"
github_match = re.match(github_pattern, path_or_url)
if github_match:
user, repo, branch, file_path = github_match.groups()
return f"https://raw.githubusercontent.com/{user}/{repo}/{branch}/{file_path}"
return path_or_url
def load_dataframe_by_format(
path_or_url: str | Path, mime_type: str | None = None
) -> tuple[pd.DataFrame, Literal["pd.read_csv", "pd.read_json", "pd.read_html", "pd.read_excel", "pd.read_parquet"]]:
"""Load a dataframe based on file format determined by MIME type or file extension."""
file_path_str_lower = str(path_or_url).lower()
# Determine format
if mime_type == "text/csv" or file_path_str_lower.endswith(".csv"):
df = pd.read_csv(
path_or_url,
on_bad_lines="warn",
low_memory=False,
)
read_fn = "pd.read_csv"
elif mime_type == "application/json" or file_path_str_lower.endswith(".json"):
df = pd.read_json(path_or_url)
read_fn = "pd.read_json"
elif mime_type == "text/html" or file_path_str_lower.endswith((".html", ".htm")):
tables = pd.read_html(path_or_url)
if not tables:
raise ValueError("No HTML tables found in the provided file or URL")
df = tables[0] # Get the first table by default
read_fn = "pd.read_html"
elif mime_type in [
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.oasis.opendocument.spreadsheet",
] or any(file_path_str_lower.endswith(ext) for ext in [".xls", ".xlsx", ".ods"]):
df = pd.read_excel(path_or_url) # opens only sheet 0
read_fn = "pd.read_excel"
elif mime_type == "application/vnd.apache.parquet" or file_path_str_lower.endswith(
".parquet"
): # mime type exists but I did not manage to ever extract it
df = pd.read_parquet(path_or_url)
read_fn = "pd.read_parquet"
else:
raise ValueError("Could not determine file format")
# Check if the result is a Series and convert to DataFrame if needed
if isinstance(df, pd.Series):
df = df.to_frame()
return df, read_fn
def path_or_url_check(string: str) -> str:
"""Check if a string is a link or a file path."""
if string.startswith(("http://", "https://", "www.")):
return "remote"
if Path(string).is_file():
return "local"
return "invalid"
def get_dataframe_info(df: pd.DataFrame) -> DFInfo:
"""Get the info of a DataFrame."""
buffer = io.StringIO()
df.info(buf=buffer)
info_string = buffer.getvalue()
# Sample only as many rows as exist in the dataframe, up to 5
sample_size = min(5, len(df)) if not df.empty else 0
return DFInfo(general_info=info_string, sample=df.sample(sample_size).to_dict() if sample_size > 0 else {})
def create_pycafe_url(python_code: str) -> str:
"""Create a PyCafe URL for a given Python code."""
# Create JSON object for py.cafe
json_object = {
"code": python_code,
"requirements": f"vizro=={vizro.__version__}",
"files": [],
}
# Convert to compressed base64 URL
json_text = json.dumps(json_object)
compressed_json_text = gzip.compress(json_text.encode("utf8"))
base64_text = base64.b64encode(compressed_json_text).decode("utf8")
query = urlencode({"c": base64_text}, quote_via=quote)
pycafe_url = f"{PYCAFE_URL}/snippet/vizro/v1?{query}"
return pycafe_url
# TODO: is this still needed after 0.1.42
def remove_figure_quotes(code_string: str) -> str:
"""Remove quotes around all figure argument values."""
return _format_and_lint(re.sub(r'figure="([^"]*)"', r"figure=\1", code_string))
def get_python_code_and_preview_link(
model_object: vm.VizroBaseModel,
data_infos: list[DFMetaData],
custom_charts: list["ChartPlan"],
) -> VizroCodeAndPreviewLink:
"""Get the Python code and preview link for a Vizro model object."""
# Get the Python code
python_code = model_object._to_python(
extra_callable_defs={custom_chart.get_chart_code(vizro=True) for custom_chart in custom_charts}
)
# Gather all imports (static + custom), deduplicate, and insert at the first empty line
static_imports = [
"from vizro import Vizro",
"import pandas as pd",
"from vizro.managers import data_manager",
"import vizro.figures as vf",
# TODO: Temporary workaround for Figure model support; required until vizro>=0.1.46
# Remove this import once minimum supported vizro version is >=0.1.46.
]
custom_imports = [
imp for custom_chart in custom_charts for imp in custom_chart.get_imports(vizro=True).split("\n") if imp.strip()
]
all_imports = list(dict.fromkeys(static_imports + custom_imports))
lines = python_code.splitlines()
for i, line in enumerate(lines):
if not line.strip():
lines[i:i] = all_imports
break
python_code = "\n".join(lines)
# Prepare data loading code
data_loading_code = "\n".join(
f'data_manager["{info.file_name}"] = {info.read_function_string}("{info.file_path_or_url}")'
for info in data_infos
)
# Patterns to identify the data manager section
data_manager_start_marker = "####### Data Manager Settings #####"
data_manager_end_marker = "########### Model code ############"
# Replace everything between the markers with our data loading code
pattern = re.compile(f"{data_manager_start_marker}.*?{data_manager_end_marker}", re.DOTALL)
replacement = f"{data_manager_start_marker}\n{data_loading_code}\n\n{data_manager_end_marker}"
python_code = pattern.sub(replacement, python_code)
# Add final run line
python_code += "\n\nVizro().build(model).run()"
python_code = remove_figure_quotes(python_code)
pycafe_url = create_pycafe_url(python_code)
return VizroCodeAndPreviewLink(python_code=python_code, pycafe_url=pycafe_url)
class NoDefsGenerateJsonSchema(GenerateJsonSchema):
"""Custom schema generator that handles reference cases appropriately."""
def generate(self, schema, mode="validation"):
"""Generate schema and resolve references if needed."""
json_schema = super().generate(schema, mode=mode)
# If schema is a reference (has $ref but no properties)
if "$ref" in json_schema and "properties" not in json_schema:
# Extract the reference path - typically like "#/$defs/ModelName"
ref_path = json_schema["$ref"]
if ref_path.startswith("#/$defs/"):
model_name = ref_path.split("/")[-1]
# Get the referenced definition from $defs
# Simply copy the referenced definition content to the top level
json_schema.update(json_schema["$defs"][model_name])
# Remove the $ref since we've resolved it
json_schema.pop("$ref", None)
# Remove the $defs section if it exists
json_schema.pop("$defs", None)
return json_schema
# if __name__ == "__main__":
# print(vm.Dashboard.model_json_schema(schema_generator=NoDefsGenerateJsonSchema).keys())