Grafana MCP Server

test_openai_client.py•10.6 KiB

import pytest from tests.conftest import assert_response_quality # Mark all tests in this file as 'integration' pytestmark = pytest.mark.integration # Test data for parameterized testing test_models = ["gpt-4.1"] connection_queries = [ "Test the connection to Grafana.", "Check Grafana connection status.", "Verify connection to Grafana.", ] dashboard_queries = [ "Fetch all available dashboards from Grafana.", "Show me the dashboards.", "List all dashboards available.", "Get me information about Grafana dashboards.", ] query_tests = [ "Run a simple PromQL query to check if services are up.", "Execute a PromQL query: up", "Query Grafana metrics using PromQL.", ] datasource_queries = [ "What datasources are available in Grafana?", "Show me all the datasources.", "List Grafana datasources.", ] class TestOpenAIIntegration: """Test OpenAI integration with MCP server.""" def test_openai_client_initialization(self, mcp_client): """Test that OpenAI client initializes correctly.""" assert mcp_client is not None # The client doesn't have a model attribute, but it has mcp_tools assert hasattr(mcp_client, "mcp_tools") assert len(mcp_client.mcp_tools) > 0 @pytest.mark.parametrize("model", ["gpt-4.1"]) @pytest.mark.parametrize("query", connection_queries) @pytest.mark.flaky(max_runs=3) def test_connection_queries(self, mcp_client, evaluator, model, query): """Test connection-related queries using LLM evaluation.""" messages = [{"role": "user", "content": query}] response = mcp_client.chat(messages=messages, model=model) assert response is not None assert len(response) > 0 if evaluator: assert_response_quality( prompt=query, response=response, evaluator=evaluator, min_pass_rate=0.7, specific_checks=["connection_status"], required_checks=["is_helpful"], ) print(f"Connection query successful: {query}") @pytest.mark.parametrize("model", ["gpt-4.1"]) @pytest.mark.parametrize("query", dashboard_queries) @pytest.mark.flaky(max_runs=3) def test_dashboard_queries(self, mcp_client, evaluator, model, query): """Test dashboard-related queries using LLM evaluation.""" messages = [{"role": "user", "content": query}] response = mcp_client.chat(messages=messages, model=model) assert response is not None assert len(response) > 0 if evaluator: assert_response_quality( prompt=query, response=response, evaluator=evaluator, min_pass_rate=0.7, specific_checks=["dashboard_info"], required_checks=["is_helpful"], ) print(f"Dashboard query successful: {query}") @pytest.mark.parametrize("model", ["gpt-4.1"]) @pytest.mark.parametrize("query", query_tests) @pytest.mark.flaky(max_runs=3) def test_promql_queries(self, mcp_client, evaluator, model, query): """Test PromQL-related queries using LLM evaluation.""" messages = [{"role": "user", "content": query}] response = mcp_client.chat(messages=messages, model=model) assert response is not None assert len(response) > 0 # Use LLM evaluation if available if evaluator: assert_response_quality( prompt=query, response=response, evaluator=evaluator, min_pass_rate=0.6, specific_checks=["promql_query_result"], required_checks=["is_helpful"], ) print(f"PromQL query successful: {query}") @pytest.mark.parametrize("model", ["gpt-4.1"]) @pytest.mark.parametrize("query", datasource_queries) @pytest.mark.flaky(max_runs=3) def test_datasource_queries(self, mcp_client, evaluator, model, query): """Test datasource-related queries using LLM evaluation.""" messages = [{"role": "user", "content": query}] response = mcp_client.chat(messages=messages, model=model) assert response is not None assert len(response) > 0 # Use LLM evaluation if available if evaluator: assert_response_quality( prompt=query, response=response, evaluator=evaluator, min_pass_rate=0.7, specific_checks=["datasource_info"], required_checks=["is_helpful"], ) print(f"Datasource query successful: {query}") class TestComplexScenarios: """Test complex multi-step scenarios.""" def test_dashboard_exploration_workflow(self, mcp_client, evaluator): """Test a complex dashboard exploration workflow.""" query = ( "I want to explore Grafana dashboards. First, show me what dashboards are available, " "then pick one and show me its configuration, and finally tell me about the datasources." ) messages = [{"role": "user", "content": query}] response = mcp_client.chat(messages=messages) assert response is not None assert len(response) > 0 # This should involve multiple tool calls assert "dashboard" in response.lower() # Use LLM evaluation if available if evaluator: assert_response_quality( prompt=query, response=response, evaluator=evaluator, min_pass_rate=0.6, specific_checks=["dashboard_info", "datasource_info"], required_checks=["is_helpful"], ) print("Complex dashboard exploration workflow completed successfully") def test_monitoring_setup_guidance(self, mcp_client, evaluator): """Test guidance for monitoring setup.""" query = ( "I'm setting up monitoring for my application. Can you help me understand " "what's available in Grafana? Show me the datasources and some example dashboards." ) messages = [{"role": "user", "content": query}] response = mcp_client.chat(messages=messages) assert response is not None assert len(response) > 0 # Use LLM evaluation if available if evaluator: assert_response_quality( prompt=query, response=response, evaluator=evaluator, min_pass_rate=0.6, specific_checks=["dashboard_info", "datasource_info"], required_checks=["is_helpful"], ) print("Monitoring setup guidance completed successfully") def test_troubleshooting_scenario(self, mcp_client, evaluator): """Test troubleshooting scenario.""" query = ( "I'm trying to troubleshoot an issue with my application. " "Can you help me run some basic queries to check if my services are up and running?" ) messages = [{"role": "user", "content": query}] response = mcp_client.chat(messages=messages) assert response is not None assert len(response) > 0 # Use LLM evaluation if available if evaluator: assert_response_quality( prompt=query, response=response, evaluator=evaluator, min_pass_rate=0.6, specific_checks=["promql_query_result"], required_checks=["is_helpful"], ) print("Troubleshooting scenario completed successfully") class TestErrorHandling: """Test error handling in OpenAI integration.""" def test_invalid_queries(self, mcp_client): """Test handling of invalid or nonsensical queries.""" invalid_queries = [ "Make me a sandwich", "What's the weather like?", "Solve world hunger", ] for query in invalid_queries: messages = [{"role": "user", "content": query}] response = mcp_client.chat(messages=messages) # Should still return a response, even if it's explaining limitations assert response is not None assert len(response) > 0 print(f"Handled invalid query appropriately: {query}") def test_partial_tool_failures(self, mcp_client): """Test handling when some tools might fail.""" query = "Show me all dashboards and then run a PromQL query for a metric that probably doesn't exist: non_existent_metric_12345" messages = [{"role": "user", "content": query}] response = mcp_client.chat(messages=messages) # Should handle partial failures gracefully assert response is not None assert len(response) > 0 print("Handled partial tool failures appropriately") class TestPerformance: """Test performance aspects of OpenAI integration.""" @pytest.mark.timeout(30) # Should complete within 30 seconds def test_response_time(self, mcp_client): """Test that responses are generated within reasonable time.""" query = "What dashboards are available in Grafana?" messages = [{"role": "user", "content": query}] response = mcp_client.chat(messages=messages) assert response is not None assert len(response) > 0 print("Response generated within acceptable time limit") def test_multiple_sequential_queries(self, mcp_client): """Test handling multiple sequential queries.""" queries = [ "Test the Grafana connection.", "Show me available dashboards.", "What datasources are configured?", ] responses = [] for query in queries: messages = [{"role": "user", "content": query}] response = mcp_client.chat(messages=messages) responses.append(response) # All queries should succeed for i, response in enumerate(responses): assert response is not None assert len(response) > 0 print(f"Sequential query {i + 1} completed successfully") # Utility function for manual testing def manual_test_conversation(mcp_client, queries: list): """ Utility function for manual testing of conversation flows. Args: mcp_client: OpenAI MCP client instance queries: List of queries to test """ for i, query in enumerate(queries, 1): print(f"\n--- Query {i}: {query} ---") messages = [{"role": "user", "content": query}] response = mcp_client.chat(messages=messages) print(f"Response: {response}") print("-" * 50)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DrDroidLab/grafana-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_openai_client.py•10.6 KiB