Skip to main content
Glama

BigQuery Analysis MCP Server

by gotalab
#!/usr/bin/env node /** * BigQuery Analysis MCP Server * * This server provides tools to: * - Perform a dry run of a BigQuery query to check if it's valid and estimate its size * - Run a BigQuery query if the dry run succeeds and is less than 1 TB */ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { ErrorCode, McpError } from "@modelcontextprotocol/sdk/types.js"; import { BigQuery } from "@google-cloud/bigquery"; import { z } from "zod"; // Initialize BigQuery client // Note: This uses application default credentials // The user needs to have set up authentication via gcloud CLI or service account const bigquery = new BigQuery(); // Size limit for queries (1 TB in bytes) const SIZE_LIMIT_BYTES = 1_099_511_627_776; // 1 TB /** * Function to check if a query contains DML statements * @param query The SQL query to check * @returns An object with a boolean indicating if DML was detected and a message */ function containsDMLStatements(query: string): { isDML: boolean; message?: string } { // Normalize the query by removing comments and converting to uppercase for easier matching const normalizedQuery = query .replace(/--.*$/gm, '') // Remove single-line comments .replace(/\/\*[\s\S]*?\*\//g, '') // Remove multi-line comments .trim() .toUpperCase(); // Define patterns for common DML statements const dmlPatterns = [ { pattern: /\bCREATE\s+TABLE\b/i, name: "CREATE TABLE" }, { pattern: /\bCREATE\s+OR\s+REPLACE\s+TABLE\b/i, name: "CREATE OR REPLACE TABLE" }, { pattern: /\bDROP\s+TABLE\b/i, name: "DROP TABLE" }, { pattern: /\bALTER\s+TABLE\b/i, name: "ALTER TABLE" }, { pattern: /\bINSERT\s+INTO\b/i, name: "INSERT INTO" }, { pattern: /\bUPDATE\b/i, name: "UPDATE" }, { pattern: /\bDELETE\s+FROM\b/i, name: "DELETE FROM" }, { pattern: /\bMERGE\s+INTO\b/i, name: "MERGE INTO" }, { pattern: /\bTRUNCATE\s+TABLE\b/i, name: "TRUNCATE TABLE" }, { pattern: /\bCREATE\s+VIEW\b/i, name: "CREATE VIEW" }, { pattern: /\bCREATE\s+OR\s+REPLACE\s+VIEW\b/i, name: "CREATE OR REPLACE VIEW" }, { pattern: /\bDROP\s+VIEW\b/i, name: "DROP VIEW" }, { pattern: /\bCREATE\s+FUNCTION\b/i, name: "CREATE FUNCTION" }, { pattern: /\bDROP\s+FUNCTION\b/i, name: "DROP FUNCTION" }, { pattern: /\bCREATE\s+PROCEDURE\b/i, name: "CREATE PROCEDURE" }, { pattern: /\bDROP\s+PROCEDURE\b/i, name: "DROP PROCEDURE" }, { pattern: /\bGRANT\b/i, name: "GRANT" }, { pattern: /\bREVOKE\b/i, name: "REVOKE" }, { pattern: /\bBEGIN\s+TRANSACTION\b/i, name: "BEGIN TRANSACTION" }, { pattern: /\bCOMMIT\b/i, name: "COMMIT" }, { pattern: /\bROLLBACK\b/i, name: "ROLLBACK" } ]; // Check if the query contains any DML statements for (const { pattern, name } of dmlPatterns) { if (pattern.test(normalizedQuery)) { return { isDML: true, message: `DML statement detected: ${name}. Only SELECT queries are allowed.` }; } } return { isDML: false }; } /** * Create an MCP server with BigQuery tools */ const server = new McpServer({ name: "bigquery-analysis-server", version: "0.1.0", }); /** * Tool: Perform a dry run of a BigQuery query */ server.tool( "dry_run_query", "Perform a dry run of a BigQuery query to check if it's valid and estimate its size", { query: z.string().describe("The SQL query to dry run"), projectId: z.string().optional().describe("Google Cloud project ID (optional, uses default if not provided)") }, async ({ query, projectId }) => { if (!query) { throw new McpError(ErrorCode.InvalidParams, "Query is required"); } try { // Create query options with dryRun flag const options = { query, dryRun: true, ...(projectId && { projectId }) }; // Execute the dry run const [job] = await bigquery.createQueryJob(options); const metadata = job.metadata; // Get the total bytes processed const totalBytesProcessed = metadata.statistics.totalBytesProcessed; const isBelowLimit = BigInt(totalBytesProcessed) < BigInt(SIZE_LIMIT_BYTES); // Format size in a human-readable format const sizeInGB = Number(totalBytesProcessed) / (1024 * 1024 * 1024); const sizeInTB = sizeInGB / 1024; return { content: [{ type: "text", text: JSON.stringify({ success: true, bytesProcessed: totalBytesProcessed, formattedSize: `${sizeInGB.toFixed(2)} GB (${sizeInTB.toFixed(4)} TB)`, isBelowLimit, message: isBelowLimit ? `Dry run successful. Query will process ${sizeInGB.toFixed(2)} GB, which is below the 1 TB limit.` : `Dry run successful, but query will process ${sizeInGB.toFixed(2)} GB, which exceeds the 1 TB limit.` }, null, 2) }] }; } catch (error) { console.error("Dry run error:", error); return { content: [{ type: "text", text: JSON.stringify({ success: false, error: error instanceof Error ? error.message : String(error) }, null, 2) }] }; } } ); /** * Tool: Run a BigQuery query with validation */ server.tool( "run_query_with_validation", "Run a BigQuery query with dry run validation (fails if query exceeds 1 TB or contains DML statements)", { query: z.string().describe("The SQL query to run (only SELECT queries are allowed)"), projectId: z.string().optional().describe("Google Cloud project ID (optional, uses default if not provided)"), maxResults: z.number().optional().describe("Maximum number of results to return (default: 100)") }, async ({ query, projectId, maxResults = 100 }) => { if (!query) { throw new McpError(ErrorCode.InvalidParams, "Query is required"); } // Check if the query contains DML statements const dmlCheck = containsDMLStatements(query); if (dmlCheck.isDML) { return { content: [{ type: "text", text: JSON.stringify({ success: false, error: dmlCheck.message }, null, 2) }] }; } try { // First perform a dry run to check the query size const options = { query, dryRun: true, ...(projectId && { projectId }) }; const [job] = await bigquery.createQueryJob(options); const metadata = job.metadata; // Get the total bytes processed const totalBytesProcessed = metadata.statistics.totalBytesProcessed; const isBelowLimit = BigInt(totalBytesProcessed) < BigInt(SIZE_LIMIT_BYTES); // Format size in a human-readable format const sizeInGB = Number(totalBytesProcessed) / (1024 * 1024 * 1024); if (!isBelowLimit) { return { content: [{ type: "text", text: JSON.stringify({ success: false, bytesProcessed: totalBytesProcessed, formattedSize: `${sizeInGB.toFixed(2)} GB`, error: `Query would process ${sizeInGB.toFixed(2)} GB, which exceeds the 1 TB limit.` }, null, 2) }] }; } // If the query is below the size limit, run it const [rows] = await bigquery.query({ query, ...(projectId && { projectId }), maxResults }); return { content: [{ type: "text", text: JSON.stringify({ success: true, bytesProcessed: totalBytesProcessed, formattedSize: `${sizeInGB.toFixed(2)} GB`, rowCount: rows.length, results: rows }, null, 2) }] }; } catch (error) { console.error("Query error:", error); return { content: [{ type: "text", text: JSON.stringify({ success: false, error: error instanceof Error ? error.message : String(error) }, null, 2) }] }; } } ); /** * Start the server using stdio transport */ async function main() { console.log("Starting BigQuery Analysis MCP Server..."); const transport = new StdioServerTransport(); await server.connect(transport); console.log("Server connected to transport"); } main().catch((error) => { console.error("Server error:", error); process.exit(1); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/gotalab/bigquery-analysis-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server