vizro-mcp

Official

Overview Schema Related Servers Score Discussions

test_dashboard.py•11.2 KiB

"""Tests for dashboard using VizroAI."""

import csv
import os
import statistics
from collections import Counter
from datetime import datetime

import chromedriver_autoinstaller
import numpy as np
import pytest
import vizro.plotly.express as px
from prompts import complex_prompt, easy_prompt, medium_prompt
from vizro import Vizro

from vizro_ai import VizroAI

df1 = px.data.gapminder()
df2 = px.data.stocks()
df3 = px.data.tips()
df4 = px.data.wind()


@pytest.fixture(scope="module", autouse=True)
def setup_test_environment():
    # We only need to install chromedriver outside CI.
    if not os.getenv("CI"):
        chromedriver_autoinstaller.install()


# If len() is 0, it means that nothing was entered for this score in config,
# in this case it should be 1.
def score_calculator(metrics_score: list[int]):
    return statistics.mean(metrics_score) if len(metrics_score) != 0 else 1


def logic(  # noqa: PLR0912, PLR0913, PLR0915
    dashboard,
    model_name,
    dash_duo,
    prompt_tier,
    prompt_name,
    prompt_text,
    config: dict,
):
    """Calculates all separate scores. Creates csv report.

    Attributes:
        dashboard: VizroAI generated dashboard
        model_name: GenAI model name
        dash_duo: dash_duo fixture
        prompt_tier: complexity of the prompt
        prompt_name: short prompt description
        prompt_text: prompt text
        config: json config of the expected dashboard

    """
    # TODO: Add layout score
    report_dir = "tests/e2e/reports"
    os.makedirs(report_dir, exist_ok=True)

    try:
        app = Vizro().build(dashboard).dash
        dash_duo.start_server(app)
        app_started = 1
        app_started_report = "App started!"
    except Exception as e:
        app_started = 0
        app_started_report = "App didn't start!"
        print(f"App start exception: {e}")  # noqa: T201

    try:
        assert dash_duo.get_logs() == []
        no_browser_console_errors = 1
        no_browser_console_errors_report = "No error logs in browser console!"
    except AssertionError as e:
        no_browser_console_errors = 0
        no_browser_console_errors_report = "Error logs in browser console found!"
        print(f"Browser console exception: {e}")  # noqa: T201

    Vizro._reset()

    try:
        vizro_type = os.environ["VIZRO_TYPE"]
        branch = os.environ["BRANCH"]
        python_version = os.environ["PYTHON_VERSION"]
    except KeyError:
        vizro_type = "local_env"
        branch = "local"
        python_version = "local"

    pages_exist = [1 if dashboard.pages else 0][0]
    pages_exist_report = bool(pages_exist)
    pages_num = [1 if len(dashboard.pages) == len(config["pages"]) else 0]
    pages_num_report = [f"{len(config['pages'])} page(s) for dashboard is {bool(pages_num[0])}"]

    components_num = []
    components_num_report = []
    for page in range(len(config["pages"])):
        try:
            components = [
                1 if len(dashboard.pages[page].components) == len(config["pages"][page]["components"]) else 0
            ][0]
        except IndexError:
            components = 0
        components_num.append(components)
        components_num_report.append(
            f"{len(config['pages'][page]['components'])} component(s) for page {page} is {bool(components)}"
        )

    controls_num = []
    controls_num_report = []
    for page in range(len(config["pages"])):
        try:
            controls = [1 if len(dashboard.pages[page].controls) == len(config["pages"][page]["controls"]) else 0][0]
        except IndexError:
            controls = 0
        controls_num.append(controls)
        controls_num_report.append(
            f"{len(config['pages'][page]['controls'])} control(s) for page {page} is {bool(controls)}"
        )

    components_types_names = []
    components_types_names_report = []
    try:
        for page in range(len(config["pages"])):
            components_dashboard = Counter([component.type for component in dashboard.pages[page].components])
            components_config = Counter([component.type for component in config["pages"][page]["components"]])
            for component_name in components_config:
                components_types = [
                    1 if components_config[component_name] == components_dashboard[component_name] else 0
                ][0]
                components_types_names.append(components_types)
                components_types_names_report.append(
                    f"{components_config[component_name]} components_type(s) {component_name} "
                    f"for page {page} is {bool(components_types)}"
                )
    except IndexError:
        components_types = 0
        components_types_names.append(components_types)
        components_types_names_report.append("page or component does not exists")

    controls_types_names = []
    controls_types_names_report = []
    try:
        for page in range(len(config["pages"])):
            controls_dashboard = Counter([control.type for control in dashboard.pages[page].controls])
            controls_config = Counter([control.type for control in config["pages"][page]["controls"]])
            for control_name in controls_config:
                controls_types = [1 if controls_config[control_name] == controls_dashboard[control_name] else 0][0]
                controls_types_names.append(controls_types)
                controls_types_names_report.append(
                    f"{controls_config[control_name]} controls_type(s) {control_name} "
                    f"for page {page} is {bool(controls_types)}"
                )
    except IndexError:
        controls_types = 0
        controls_types_names.append(controls_types)
        controls_types_names_report.append("page or control does not exists")

    # Every separate score has its own weight.
    scores = [
        {"score_name": "app_started_score", "weight": 0.4, "score": app_started},
        {"score_name": "no_browser_console_errors_score", "weight": 0.1, "score": no_browser_console_errors},
        {"score_name": "pages_score", "weight": 0.3, "score": pages_exist},
        {"score_name": "pages_number", "weight": 0.2, "score": score_calculator(metrics_score=pages_num)},
        {"score_name": "components_score", "weight": 0.2, "score": score_calculator(metrics_score=components_num)},
        {
            "score_name": "component_types_score",
            "weight": 0.2,
            "score": score_calculator(metrics_score=components_types_names),
        },
        {"score_name": "controls_score", "weight": 0.2, "score": score_calculator(metrics_score=controls_num)},
        {
            "score_name": "controls_types_score",
            "weight": 0.2,
            "score": score_calculator(metrics_score=controls_types_names),
        },
    ]

    scores_values = np.array([score["score"] for score in scores])
    weights = np.array([score["weight"] for score in scores])
    weighted_score = np.average(scores_values, weights=weights)

    # csv report creation
    data_rows = [
        datetime.now(),
        vizro_type,
        branch,
        python_version,
        model_name,
        prompt_tier,
        prompt_name,
        weighted_score,
    ]
    data_rows.extend(score["score"] for score in scores)
    data_rows.extend([prompt_text])

    with open(f"{report_dir}/report_model_{model_name}_{vizro_type}.csv", "a", newline=""):
        with open(f"{report_dir}/report_model_{model_name}_{vizro_type}.csv", "r+", newline="") as csvfile:
            writer = csv.writer(csvfile, delimiter=",")
            first_line = csvfile.readline()
            if not first_line:
                header_rows = [
                    "timestamp",
                    "vizro_type",
                    "branch",
                    "python_version",
                    "model",
                    "prompt_tier",
                    "prompt_name",
                    "weighted_score",
                ]
                header_rows.extend(score["score_name"] for score in scores)
                header_rows.extend(["prompt_text"])
                writer.writerow(header_rows)
            writer.writerow(data_rows)

    # Readable report for the console output
    print(f"App started: {app_started_report}")  # noqa: T201
    print(f"Console errors: {no_browser_console_errors_report}")  # noqa: T201
    print(f"Pages exists: {pages_exist_report}")  # noqa: T201
    print(f"Correct pages number: {pages_num_report}")  # noqa: T201
    print(f"Components: {components_num_report}")  # noqa: T201
    print(f"Correct controls number: {controls_num_report}")  # noqa: T201
    print(f"Correct components types: {components_types_names_report}")  # noqa: T201
    print(f"Correct controls types: {controls_types_names_report}")  # noqa: T201
    print(f"Weighted score: {weighted_score}")  # noqa: T201
    print(f"Scores: {scores}")  # noqa: T201


@pytest.mark.parametrize(
    "model_name",
    [
        "gpt-4o-mini",
        "gpt-4o",
        "gpt-4-turbo",
    ],
    ids=[
        "gpt-4o-mini",
        "gpt-4o",
        "gpt-4-turbo",
    ],
)
@pytest.mark.parametrize(
    "tier_type, prompt_name, prompt_text, expected_config, dfs",
    [
        (
            easy_prompt["tier_type"],
            easy_prompt["prompt_name"],
            easy_prompt["prompt_text"],
            easy_prompt["expected_config"],
            [df1, df2],
        ),
        (
            medium_prompt["tier_type"],
            medium_prompt["prompt_name"],
            medium_prompt["prompt_text"],
            medium_prompt["expected_config"],
            [df1, df2, df3],
        ),
        (
            complex_prompt["tier_type"],
            complex_prompt["prompt_name"],
            complex_prompt["prompt_text"],
            complex_prompt["expected_config"],
            [df1, df2, df3, df4],
        ),
    ],
)
def test_dashboard_openai(dash_duo, model_name, tier_type, prompt_name, prompt_text, expected_config, dfs):  # noqa: PLR0913
    created_dashboard = VizroAI(model=model_name).dashboard(dfs, prompt_text)

    logic(
        dashboard=created_dashboard,
        model_name=model_name,
        dash_duo=dash_duo,
        prompt_tier=tier_type,
        prompt_name=prompt_name,
        prompt_text=prompt_text.replace("\n", " "),
        config=expected_config,
    )


@pytest.mark.parametrize(
    "model_name",
    [
        "claude-3-5-sonnet-latest",
        "claude-3-opus-latest",
        "claude-3-sonnet-20240229",
        "claude-3-haiku-20240307",
    ],
    ids=[
        "claude-3-5-sonnet-latest",
        "claude-3-opus-latest",
        "claude-3-sonnet-20240229",
        "claude-3-haiku-20240307",
    ],
)
@pytest.mark.parametrize(
    "tier_type, prompt_name, prompt_text, expected_config, dfs",
    [
        (
            easy_prompt["tier_type"],
            easy_prompt["prompt_name"],
            easy_prompt["prompt_text"],
            easy_prompt["expected_config"],
            [df1, df2],
        ),
    ],
)
def test_dashboard_anthropic(dash_duo, model_name, tier_type, prompt_name, prompt_text, expected_config, dfs):  # noqa: PLR0913
    created_dashboard = VizroAI(model=model_name).dashboard(dfs, prompt_text)

    logic(
        dashboard=created_dashboard,
        model_name=model_name,
        dash_duo=dash_duo,
        prompt_tier=tier_type,
        prompt_name=prompt_name,
        prompt_text=prompt_text.replace("\n", " "),
        config=expected_config,
    )

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mckinsey/vizro'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_dashboard.py•11.2 KiB

"""Tests for dashboard using VizroAI."""

import csv
import os
import statistics
from collections import Counter
from datetime import datetime

import chromedriver_autoinstaller
import numpy as np
import pytest
import vizro.plotly.express as px
from prompts import complex_prompt, easy_prompt, medium_prompt
from vizro import Vizro

from vizro_ai import VizroAI

df1 = px.data.gapminder()
df2 = px.data.stocks()
df3 = px.data.tips()
df4 = px.data.wind()


@pytest.fixture(scope="module", autouse=True)
def setup_test_environment():
    # We only need to install chromedriver outside CI.
    if not os.getenv("CI"):
        chromedriver_autoinstaller.install()


# If len() is 0, it means that nothing was entered for this score in config,
# in this case it should be 1.
def score_calculator(metrics_score: list[int]):
    return statistics.mean(metrics_score) if len(metrics_score) != 0 else 1


def logic(  # noqa: PLR0912, PLR0913, PLR0915
    dashboard,
    model_name,
    dash_duo,
    prompt_tier,
    prompt_name,
    prompt_text,
    config: dict,
):
    """Calculates all separate scores. Creates csv report.

    Attributes:
        dashboard: VizroAI generated dashboard
        model_name: GenAI model name
        dash_duo: dash_duo fixture
        prompt_tier: complexity of the prompt
        prompt_name: short prompt description
        prompt_text: prompt text
        config: json config of the expected dashboard

    """
    # TODO: Add layout score
    report_dir = "tests/e2e/reports"
    os.makedirs(report_dir, exist_ok=True)

    try:
        app = Vizro().build(dashboard).dash
        dash_duo.start_server(app)
        app_started = 1
        app_started_report = "App started!"
    except Exception as e:
        app_started = 0
        app_started_report = "App didn't start!"
        print(f"App start exception: {e}")  # noqa: T201

    try:
        assert dash_duo.get_logs() == []
        no_browser_console_errors = 1
        no_browser_console_errors_report = "No error logs in browser console!"
    except AssertionError as e:
        no_browser_console_errors = 0
        no_browser_console_errors_report = "Error logs in browser console found!"
        print(f"Browser console exception: {e}")  # noqa: T201

    Vizro._reset()

    try:
        vizro_type = os.environ["VIZRO_TYPE"]
        branch = os.environ["BRANCH"]
        python_version = os.environ["PYTHON_VERSION"]
    except KeyError:
        vizro_type = "local_env"
        branch = "local"
        python_version = "local"

    pages_exist = [1 if dashboard.pages else 0][0]
    pages_exist_report = bool(pages_exist)
    pages_num = [1 if len(dashboard.pages) == len(config["pages"]) else 0]
    pages_num_report = [f"{len(config['pages'])} page(s) for dashboard is {bool(pages_num[0])}"]

    components_num = []
    components_num_report = []
    for page in range(len(config["pages"])):
        try:
            components = [
                1 if len(dashboard.pages[page].components) == len(config["pages"][page]["components"]) else 0
            ][0]
        except IndexError:
            components = 0
        components_num.append(components)
        components_num_report.append(
            f"{len(config['pages'][page]['components'])} component(s) for page {page} is {bool(components)}"
        )

    controls_num = []
    controls_num_report = []
    for page in range(len(config["pages"])):
        try:
            controls = [1 if len(dashboard.pages[page].controls) == len(config["pages"][page]["controls"]) else 0][0]
        except IndexError:
            controls = 0
        controls_num.append(controls)
        controls_num_report.append(
            f"{len(config['pages'][page]['controls'])} control(s) for page {page} is {bool(controls)}"
        )

    components_types_names = []
    components_types_names_report = []
    try:
        for page in range(len(config["pages"])):
            components_dashboard = Counter([component.type for component in dashboard.pages[page].components])
            components_config = Counter([component.type for component in config["pages"][page]["components"]])
            for component_name in components_config:
                components_types = [
                    1 if components_config[component_name] == components_dashboard[component_name] else 0
                ][0]
                components_types_names.append(components_types)
                components_types_names_report.append(
                    f"{components_config[component_name]} components_type(s) {component_name} "
                    f"for page {page} is {bool(components_types)}"
                )
    except IndexError:
        components_types = 0
        components_types_names.append(components_types)
        components_types_names_report.append("page or component does not exists")

    controls_types_names = []
    controls_types_names_report = []
    try:
        for page in range(len(config["pages"])):
            controls_dashboard = Counter([control.type for control in dashboard.pages[page].controls])
            controls_config = Counter([control.type for control in config["pages"][page]["controls"]])
            for control_name in controls_config:
                controls_types = [1 if controls_config[control_name] == controls_dashboard[control_name] else 0][0]
                controls_types_names.append(controls_types)
                controls_types_names_report.append(
                    f"{controls_config[control_name]} controls_type(s) {control_name} "
                    f"for page {page} is {bool(controls_types)}"
                )
    except IndexError:
        controls_types = 0
        controls_types_names.append(controls_types)
        controls_types_names_report.append("page or control does not exists")

    # Every separate score has its own weight.
    scores = [
        {"score_name": "app_started_score", "weight": 0.4, "score": app_started},
        {"score_name": "no_browser_console_errors_score", "weight": 0.1, "score": no_browser_console_errors},
        {"score_name": "pages_score", "weight": 0.3, "score": pages_exist},
        {"score_name": "pages_number", "weight": 0.2, "score": score_calculator(metrics_score=pages_num)},
        {"score_name": "components_score", "weight": 0.2, "score": score_calculator(metrics_score=components_num)},
        {
            "score_name": "component_types_score",
            "weight": 0.2,
            "score": score_calculator(metrics_score=components_types_names),
        },
        {"score_name": "controls_score", "weight": 0.2, "score": score_calculator(metrics_score=controls_num)},
        {
            "score_name": "controls_types_score",
            "weight": 0.2,
            "score": score_calculator(metrics_score=controls_types_names),
        },
    ]

    scores_values = np.array([score["score"] for score in scores])
    weights = np.array([score["weight"] for score in scores])
    weighted_score = np.average(scores_values, weights=weights)

    # csv report creation
    data_rows = [
        datetime.now(),
        vizro_type,
        branch,
        python_version,
        model_name,
        prompt_tier,
        prompt_name,
        weighted_score,
    ]
    data_rows.extend(score["score"] for score in scores)
    data_rows.extend([prompt_text])

    with open(f"{report_dir}/report_model_{model_name}_{vizro_type}.csv", "a", newline=""):
        with open(f"{report_dir}/report_model_{model_name}_{vizro_type}.csv", "r+", newline="") as csvfile:
            writer = csv.writer(csvfile, delimiter=",")
            first_line = csvfile.readline()
            if not first_line:
                header_rows = [
                    "timestamp",
                    "vizro_type",
                    "branch",
                    "python_version",
                    "model",
                    "prompt_tier",
                    "prompt_name",
                    "weighted_score",
                ]
                header_rows.extend(score["score_name"] for score in scores)
                header_rows.extend(["prompt_text"])
                writer.writerow(header_rows)
            writer.writerow(data_rows)

    # Readable report for the console output
    print(f"App started: {app_started_report}")  # noqa: T201
    print(f"Console errors: {no_browser_console_errors_report}")  # noqa: T201
    print(f"Pages exists: {pages_exist_report}")  # noqa: T201
    print(f"Correct pages number: {pages_num_report}")  # noqa: T201
    print(f"Components: {components_num_report}")  # noqa: T201
    print(f"Correct controls number: {controls_num_report}")  # noqa: T201
    print(f"Correct components types: {components_types_names_report}")  # noqa: T201
    print(f"Correct controls types: {controls_types_names_report}")  # noqa: T201
    print(f"Weighted score: {weighted_score}")  # noqa: T201
    print(f"Scores: {scores}")  # noqa: T201


@pytest.mark.parametrize(
    "model_name",
    [
        "gpt-4o-mini",
        "gpt-4o",
        "gpt-4-turbo",
    ],
    ids=[
        "gpt-4o-mini",
        "gpt-4o",
        "gpt-4-turbo",
    ],
)
@pytest.mark.parametrize(
    "tier_type, prompt_name, prompt_text, expected_config, dfs",
    [
        (
            easy_prompt["tier_type"],
            easy_prompt["prompt_name"],
            easy_prompt["prompt_text"],
            easy_prompt["expected_config"],
            [df1, df2],
        ),
        (
            medium_prompt["tier_type"],
            medium_prompt["prompt_name"],
            medium_prompt["prompt_text"],
            medium_prompt["expected_config"],
            [df1, df2, df3],
        ),
        (
            complex_prompt["tier_type"],
            complex_prompt["prompt_name"],
            complex_prompt["prompt_text"],
            complex_prompt["expected_config"],
            [df1, df2, df3, df4],
        ),
    ],
)
def test_dashboard_openai(dash_duo, model_name, tier_type, prompt_name, prompt_text, expected_config, dfs):  # noqa: PLR0913
    created_dashboard = VizroAI(model=model_name).dashboard(dfs, prompt_text)

    logic(
        dashboard=created_dashboard,
        model_name=model_name,
        dash_duo=dash_duo,
        prompt_tier=tier_type,
        prompt_name=prompt_name,
        prompt_text=prompt_text.replace("\n", " "),
        config=expected_config,
    )


@pytest.mark.parametrize(
    "model_name",
    [
        "claude-3-5-sonnet-latest",
        "claude-3-opus-latest",
        "claude-3-sonnet-20240229",
        "claude-3-haiku-20240307",
    ],
    ids=[
        "claude-3-5-sonnet-latest",
        "claude-3-opus-latest",
        "claude-3-sonnet-20240229",
        "claude-3-haiku-20240307",
    ],
)
@pytest.mark.parametrize(
    "tier_type, prompt_name, prompt_text, expected_config, dfs",
    [
        (
            easy_prompt["tier_type"],
            easy_prompt["prompt_name"],
            easy_prompt["prompt_text"],
            easy_prompt["expected_config"],
            [df1, df2],
        ),
    ],
)
def test_dashboard_anthropic(dash_duo, model_name, tier_type, prompt_name, prompt_text, expected_config, dfs):  # noqa: PLR0913
    created_dashboard = VizroAI(model=model_name).dashboard(dfs, prompt_text)

    logic(
        dashboard=created_dashboard,
        model_name=model_name,
        dash_duo=dash_duo,
        prompt_tier=tier_type,
        prompt_name=prompt_name,
        prompt_text=prompt_text.replace("\n", " "),
        config=expected_config,
    )