Dataproc MCP Server

job-tools.ts•8.88 KiB

/** * Job submission and management tool definitions * Extracted from main server file for better organization */ export const jobTools = [ // New tool: submit Hive query { name: 'submit_hive_query', description: 'Submit a Hive query to a Dataproc cluster with enhanced result discovery.\n\n' + '**🚀 QUICK START EXAMPLES:**\n' + '• `SHOW DATABASES` - List all databases\n' + '• `SELECT COUNT(*) FROM my_table` - Get row count\n' + '• `DESCRIBE my_table` - Show table schema\n' + '• `SHOW TABLES IN my_database` - List tables in database\n\n' + '**📊 RESULT ACCESS:**\n' + 'After submission, use query_knowledge with jobId to get actual results:\n' + '`query_knowledge("jobId:YOUR_JOB_ID contentType:query_results")`\n\n' + '**⚡ ASYNC MODE:**\n' + 'Set async:true for long-running queries, then monitor with get_job_status', inputSchema: { type: 'object', properties: { clusterName: { type: 'string', description: 'Name of the cluster to run the query on' }, query: { type: 'string', description: 'Hive query to execute' }, async: { type: 'boolean', description: 'Optional: Whether to wait for query completion (false) or return immediately (true)', }, verbose: { type: 'boolean', description: 'Optional: Return full response without filtering (default: false)', }, queryOptions: { type: 'object', description: 'Optional: Query configuration options', properties: { timeoutMs: { type: 'number', description: 'Optional: Timeout in milliseconds' }, parameters: { type: 'object', description: 'Optional: Query parameters' }, properties: { type: 'object', description: 'Optional: Query properties' }, }, }, }, required: ['clusterName', 'query'], }, }, // New tool: get query status { name: 'get_query_status', description: 'Get the status of a Hive query job', inputSchema: { type: 'object', properties: { jobId: { type: 'string', description: 'Job ID to check' }, }, required: ['jobId'], }, }, // Enhanced tool: get query results with async support and semantic search { name: 'get_query_results', description: 'Get the results of a completed Hive query with enhanced async support and semantic search integration', inputSchema: { type: 'object', properties: { jobId: { type: 'string', description: 'Job ID to get results for' }, maxResults: { type: 'number', description: 'Optional: Maximum number of rows to display in the response (default: 10)', }, pageToken: { type: 'string', description: 'Optional: Page token for pagination' }, }, required: ['jobId'], }, }, // New tool: submit Dataproc job (generic) { name: 'submit_dataproc_job', description: 'Submit a Dataproc job (Hive, Spark, PySpark, Presto, etc.) to a cluster with enhanced monitoring.\n\n' + '**🔧 SUPPORTED JOB TYPES:**\n' + '• **hive** - SQL queries on Hadoop data\n' + '• **spark** - Scala/Java Spark applications\n' + '• **pyspark** - Python Spark jobs\n' + '• **presto** - Fast SQL analytics\n' + '• **hadoop** - MapReduce jobs\n\n' + '**📝 JOB CONFIG EXAMPLES:**\n' + '• Hive: `{"query": "SELECT COUNT(*) FROM table"}`\n' + '• PySpark: `{"mainPythonFileUri": "{@./test-spark-job.py}", "args": ["arg1"]}`\n' + '• Spark: `{"mainClass": "com.example.Main", "jarFileUris": ["{@./app.jar}"]}`\n\n' + '**🔧 LOCAL FILE STAGING:**\n' + '• Use `{@./relative/path}` for files relative to config directory\n' + '• Use `{@/absolute/path}` for absolute file paths\n' + '• Files are automatically staged to GCS and cleaned up after job completion\n' + '• Supports .py, .jar, .sql, .R file extensions\n\n' + '**🎯 RESULT WORKFLOW:**\n' + '1. Submit job → Get jobId\n' + '2. Monitor: get_job_status(jobId)\n' + '3. Results: query_knowledge("jobId:YOUR_ID contentType:query_results")', inputSchema: { type: 'object', properties: { clusterName: { type: 'string', description: 'Name of the cluster to run the job on' }, jobType: { type: 'string', description: 'Type of job (hive, spark, pyspark, presto, etc.)', }, jobConfig: { type: 'object', description: 'Job configuration object (type-specific)' }, async: { type: 'boolean', description: 'Whether to submit asynchronously (default: false)', }, }, required: ['clusterName', 'jobType', 'jobConfig'], }, }, // New tool: get Dataproc job status { name: 'get_job_status', description: 'Get the status of a Dataproc job by job ID with smart result discovery.\n\n' + '**📊 STATUS TYPES:**\n' + '• PENDING - Job queued for execution\n' + '• RUNNING - Job currently executing\n' + '• DONE - Job completed successfully ✅\n' + '• ERROR - Job failed with errors ❌\n' + '• CANCELLED - Job was cancelled\n\n' + '**🎯 WHEN STATUS = DONE:**\n' + 'Automatically shows result discovery hints:\n' + '`query_knowledge("jobId:YOUR_ID contentType:query_results")`\n\n' + '**💡 MONITORING WORKFLOW:**\n' + '1. Submit job (async mode)\n' + '2. Check status periodically\n' + '3. When DONE, get actual results via query_knowledge', inputSchema: { type: 'object', properties: { jobId: { type: 'string', description: 'Job ID to check' }, verbose: { type: 'boolean', description: 'Optional: Return full response without filtering (default: false)', }, }, required: ['jobId'], }, }, // Enhanced tool: get Dataproc job results with smart discovery hints { name: 'get_job_results', description: 'Get the results of a completed Dataproc job by job ID.\n\n' + '**🎯 FOR COMPLETE RESULTS INCLUDING ACTUAL DATA:**\n' + 'Use query_knowledge with combined tags for better results:\n' + '• `jobId:YOUR_JOB_ID contentType:query_results` - Get actual query results\n' + '• `jobId:YOUR_JOB_ID type:query_result` - Alternative format\n' + '• `jobId:YOUR_JOB_ID` - Get job metadata and result hints\n\n' + '**💡 EXAMPLE:**\n' + 'query_knowledge("jobId:89feded7-902b-4698-b076-12008a8929a7 contentType:query_results")\n' + '→ Returns actual data: ["220144"]\n\n' + '**Note:** This tool returns optimization metadata. For actual query output data, use the query_knowledge patterns above.', inputSchema: { type: 'object', properties: { jobId: { type: 'string', description: 'Job ID to get results for' }, maxResults: { type: 'number', description: 'Optional: Maximum number of rows to display in the response (default: 10)', }, }, required: ['jobId'], }, }, // New tool: quick status check for active jobs { name: 'check_active_jobs', description: "🚀 Quick status check for all active and recent jobs - perfect for seeing what's running!", inputSchema: { type: 'object', properties: { includeCompleted: { type: 'boolean', description: 'Include recently completed jobs (default: false)', }, }, required: [], }, }, // New tool: cancel Dataproc job { name: 'cancel_dataproc_job', description: 'Cancel a running Dataproc job with intelligent status handling and job tracking integration.\n\n' + '**🛑 CANCELLATION WORKFLOW:**\n' + '• Attempts to cancel jobs in PENDING or RUNNING states\n' + '• Provides informative messages for jobs already in terminal states\n' + '• Updates internal job tracking when cancellation succeeds\n\n' + '**📊 STATUS HANDLING:**\n' + '• PENDING/RUNNING → Cancellation attempted\n' + '• DONE/ERROR/CANCELLED → Informative message returned\n' + '• Job not found → Clear error message\n\n' + '**💡 MONITORING:**\n' + 'After cancellation, use get_job_status("jobId") to confirm the job reaches CANCELLED state.', inputSchema: { type: 'object', properties: { jobId: { type: 'string', description: 'The ID of the Dataproc job to cancel' }, projectId: { type: 'string', description: 'Optional: Google Cloud Project ID (uses defaults)', }, region: { type: 'string', description: 'Optional: Google Cloud region (uses defaults)' }, verbose: { type: 'boolean', description: 'Optional: Return full response without filtering (default: false)', }, }, required: ['jobId'], }, }, ];

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dipseth/dataproc-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

job-tools.ts•8.88 KiB