Dataproc MCP Server

enhanced-prompt-templates.ts•20.8 KiB

/**
 * Enhanced Prompt Templates
 * Defines the four sophisticated prompt templates that replace static prompts
 * with dynamic, context-aware generation capabilities
 */

import { PromptTemplate } from '../types/enhanced-prompts.js';

/**
 * Template 1: analyze-dataproc-query (Knowledge-Driven)
 * Analyzes queries with knowledge base examples and optimization insights
 */
export const analyzeDataprocQueryTemplate: PromptTemplate = {
  id: 'analyze-dataproc-query',
  category: 'knowledge-driven',
  description: 'Analyze Hive/Spark queries with knowledge base examples and optimization insights',

  template: `Analyze this {{queryType || 'SQL'}} query for optimization and best practices:

**Query to Analyze:**
\`\`\`sql
{{query}}
\`\`\`

**Context:**
- Target Cluster: {{clusterName || 'any'}}
- Optimization Level: {{optimizationLevel || 'basic'}}
- Query Type: {{queryType || 'auto-detect'}}
- Project: {{projectId || 'not specified'}}
- Region: {{region || 'not specified'}}

**Similar Query Examples from Knowledge Base:**
{{qdrant_query("similar queries " + queryType, "limit:3 type:job")}}

**Performance Insights from Cluster Data:**
{{qdrant_query("query optimization " + queryType + " performance", "limit:2 type:cluster")}}

**Historical Query Patterns:**
{{qdrant_query("successful " + queryType + " patterns", "limit:2 type:job")}}

**Analysis Requirements:**
Please provide a comprehensive analysis including:

1. **Query Performance Analysis**
   - Identify potential bottlenecks and performance issues
   - Analyze query complexity and resource requirements
   - Evaluate join strategies and data access patterns

2. **Optimization Suggestions**
   - Specific recommendations based on similar successful queries
   - Indexing and partitioning strategies
   - Query rewriting opportunities for better performance

3. **Best Practices Recommendations**
   - Industry best practices for {{queryType}} queries
   - Dataproc-specific optimizations
   - Resource allocation recommendations

4. **Potential Issues and Warnings**
   - Common pitfalls and anti-patterns detected
   - Resource consumption warnings
   - Compatibility considerations

5. **Alternative Query Approaches**
   - Different strategies to achieve the same result
   - Trade-offs between approaches
   - Performance vs. complexity considerations`,

  parameters: [
    {
      name: 'query',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'SQL/HiveQL query to analyze',
    },
    {
      name: 'queryType',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Type of query (hive, spark, presto, auto)',
      validation: {
        enum: ['hive', 'spark', 'presto', 'auto'],
      },
    },
    {
      name: 'clusterName',
      type: 'string',
      required: false,
      source: 'profile',
      description: 'Target cluster name for optimization context',
    },
    {
      name: 'optimizationLevel',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Level of optimization analysis',
      validation: {
        enum: ['basic', 'advanced', 'expert'],
      },
      defaultValue: 'basic',
    },
    {
      name: 'projectId',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'GCP project ID for context',
    },
    {
      name: 'region',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'GCP region for context',
    },
  ],

  knowledgeQueries: [
    {
      type: 'semantic',
      query: 'similar queries {{queryType}}',
      filters: { type: 'job' },
      limit: 3,
      minConfidence: 0.7,
    },
    {
      type: 'semantic',
      query: 'query optimization {{queryType}} performance',
      filters: { type: 'cluster' },
      limit: 2,
      minConfidence: 0.6,
    },
    {
      type: 'semantic',
      query: 'successful {{queryType}} patterns',
      filters: { type: 'job' },
      limit: 2,
      minConfidence: 0.7,
    },
  ],

  dynamicFunctions: [
    {
      name: 'qdrant_query',
      args: ['similar queries {{queryType}}', 'limit:3 type:job'],
      cacheKey: 'similar_queries_{{queryType}}',
      ttl: 1800000, // 30 minutes
    },
    {
      name: 'qdrant_query',
      args: ['query optimization {{queryType}} performance', 'limit:2 type:cluster'],
      cacheKey: 'optimization_{{queryType}}',
      ttl: 3600000, // 1 hour
    },
    {
      name: 'qdrant_query',
      args: ['successful {{queryType}} patterns', 'limit:2 type:job'],
      cacheKey: 'patterns_{{queryType}}',
      ttl: 1800000, // 30 minutes
    },
  ],
};

/**
 * Template 2: design-dataproc-cluster (Resource-Integrated)
 * Generates cluster configurations with profile integration and regional best practices
 */
export const designDataprocClusterTemplate: PromptTemplate = {
  id: 'design-dataproc-cluster',
  category: 'resource-integrated',
  description: 'Generate cluster configuration recommendations with profile integration',

  template: `Design an optimal Dataproc cluster configuration for the following requirements:

**Workload Requirements:**
- **Type**: {{workloadType}}
- **Data Size**: {{dataSize}}
- **Budget**: {{budget}}
- **Region**: {{region || 'flexible'}}
- **Additional Requirements**: {{requirements || 'none specified'}}

**Profile-Based Configuration Templates:**
{{profile_config(profileId, "cluster-templates")}}

**Similar Cluster Configurations from Knowledge Base:**
{{qdrant_query("workload " + workloadType + " " + dataSize + " cluster", "limit:3 type:cluster")}}

**Regional Best Practices and Optimizations:**
{{qdrant_query("region " + region + " cluster configuration best practices", "limit:2")}}

**Cost Optimization Patterns:**
{{qdrant_query(budget + " budget cluster optimization", "limit:2 type:cluster")}}

**Design Requirements:**
Please provide a comprehensive cluster design including:

1. **Recommended Cluster Configuration**
   - Master node specifications (machine type, disk size, count)
   - Worker node specifications (machine type, disk size, count)
   - Preemptible worker configuration if applicable
   - Network and security group settings

2. **Software Components and Versions**
   - Recommended Dataproc image version for {{workloadType}}
   - Optional components based on workload requirements
   - Initialization scripts and custom configurations

3. **Networking and Security Recommendations**
   - VPC and subnet configuration for {{region}}
   - Firewall rules and security best practices
   - Service account and IAM permissions
   - Encryption settings (at-rest and in-transit)

4. **Cost Optimization Strategies**
   - Strategies specific to {{budget}} budget constraints
   - Preemptible instance recommendations
   - Auto-scaling policies for cost efficiency
   - Resource scheduling and lifecycle management

5. **Performance Tuning Suggestions**
   - Configuration optimizations for {{dataSize}} data volumes
   - Memory and CPU allocation strategies
   - Storage optimization (SSD vs HDD, local vs persistent)
   - Network performance considerations

6. **Complete YAML Configuration File**
   - Ready-to-use cluster configuration
   - Environment-specific parameter substitution
   - Validation and deployment instructions

7. **Scaling and Monitoring Recommendations**
   - Auto-scaling policies and thresholds
   - Monitoring and alerting setup
   - Performance metrics to track
   - Maintenance and upgrade strategies`,

  parameters: [
    {
      name: 'workloadType',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'Type of workload the cluster will handle',
      validation: {
        enum: ['analytics', 'ml', 'streaming', 'batch', 'mixed'],
      },
    },
    {
      name: 'dataSize',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'Expected data size to process',
      validation: {
        enum: ['small', 'medium', 'large', 'xlarge'],
      },
    },
    {
      name: 'budget',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'Budget constraints for the cluster',
      validation: {
        enum: ['low', 'medium', 'high', 'unlimited'],
      },
    },
    {
      name: 'region',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'Target GCP region for deployment',
    },
    {
      name: 'requirements',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Additional specific requirements or constraints',
    },
    {
      name: 'profileId',
      type: 'string',
      required: false,
      source: 'profile',
      description: 'Profile ID for configuration templates',
    },
  ],

  resourceAccess: [
    {
      uri: 'dataproc://profile/{{category}}/{{profileId}}/config',
      purpose: 'cluster-templates',
      cacheKey: 'profile_{{profileId}}_templates',
      ttl: 3600000, // 1 hour
    },
  ],

  knowledgeQueries: [
    {
      type: 'semantic',
      query: 'workload {{workloadType}} {{dataSize}} cluster',
      filters: { type: 'cluster' },
      limit: 3,
      minConfidence: 0.7,
    },
    {
      type: 'semantic',
      query: 'region {{region}} cluster configuration best practices',
      limit: 2,
      minConfidence: 0.6,
    },
    {
      type: 'semantic',
      query: '{{budget}} budget cluster optimization',
      filters: { type: 'cluster' },
      limit: 2,
      minConfidence: 0.6,
    },
  ],

  dynamicFunctions: [
    {
      name: 'profile_config',
      args: ['{{profileId}}', 'cluster-templates'],
      cacheKey: 'profile_config_{{profileId}}',
      ttl: 3600000, // 1 hour
    },
    {
      name: 'qdrant_query',
      args: ['workload {{workloadType}} {{dataSize}} cluster', 'limit:3 type:cluster'],
      cacheKey: 'workload_{{workloadType}}_{{dataSize}}',
      ttl: 1800000, // 30 minutes
    },
  ],
};

/**
 * Template 3: troubleshoot-dataproc-issue (Fully-Integrated)
 * Comprehensive troubleshooting with all system integrations
 */
export const troubleshootDataprocIssueTemplate: PromptTemplate = {
  id: 'troubleshoot-dataproc-issue',
  category: 'fully-integrated',
  description: 'Comprehensive troubleshooting with semantic error matching and job analysis',

  template: `Troubleshoot this Dataproc issue with comprehensive analysis:

**Issue Details:**
- **Type**: {{issueType}}
- **Error Message**: {{errorMessage || 'not provided'}}
- **Job ID**: {{jobId || 'not applicable'}}
- **Cluster**: {{clusterName || 'not specified'}}
- **Timeline**: {{timeline || 'not specified'}}
- **Context**: {{context || 'not provided'}}
- **Project**: {{projectId || 'not specified'}}
- **Region**: {{region || 'not specified'}}

**Job Output and Error Analysis:**
{{job_output(jobId, "error-analysis")}}

**Similar Error Patterns from Knowledge Base:**
{{qdrant_query("error " + issueType + " " + errorMessage, "limit:5 type:error")}}

**Cluster Configuration Analysis:**
{{qdrant_query("cluster " + clusterName + " configuration issues", "limit:3 type:cluster")}}

**Historical Resolution Patterns:**
{{qdrant_query("resolved " + issueType + " troubleshooting solutions", "limit:3")}}

**Related Job Failures:**
{{qdrant_query("job failure " + issueType + " " + clusterName, "limit:2 type:job")}}

**Troubleshooting Analysis:**
Please provide a systematic troubleshooting approach:

1. **Root Cause Analysis**
   - Primary cause identification based on error patterns
   - Secondary contributing factors
   - System state analysis at time of failure
   - Resource utilization assessment

2. **Step-by-Step Troubleshooting Guide**
   - Immediate diagnostic steps to take
   - Data collection and log analysis procedures
   - Progressive isolation techniques
   - Verification checkpoints

3. **Diagnostic Commands and Tools**
   - Specific gcloud commands for investigation
   - Log queries and monitoring dashboards
   - Performance analysis tools
   - Network and connectivity tests

4. **Resolution Strategies**
   - Primary resolution approach based on similar cases
   - Alternative solutions if primary approach fails
   - Rollback procedures if needed
   - Testing and validation steps

5. **Prevention Recommendations**
   - Configuration changes to prevent recurrence
   - Monitoring and alerting improvements
   - Best practices implementation
   - Proactive maintenance procedures

6. **Escalation Path**
   - When to involve Google Cloud Support
   - Information to gather before escalation
   - Internal team escalation procedures
   - Emergency response protocols`,

  parameters: [
    {
      name: 'issueType',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'Type of issue being experienced',
      validation: {
        enum: ['job-failure', 'cluster-startup', 'performance', 'connectivity', 'other'],
      },
    },
    {
      name: 'errorMessage',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Specific error message or symptoms',
    },
    {
      name: 'jobId',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Job ID if the issue is related to a specific job',
    },
    {
      name: 'clusterName',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Name of the affected cluster',
    },
    {
      name: 'timeline',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'When the issue started or was first noticed',
    },
    {
      name: 'context',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Additional context about what was being attempted',
    },
    {
      name: 'projectId',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'GCP project ID for context',
    },
    {
      name: 'region',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'GCP region for context',
    },
  ],

  dynamicFunctions: [
    {
      name: 'job_output',
      args: ['{{jobId}}', 'error-analysis'],
      cacheKey: 'job_output_{{jobId}}',
      ttl: 600000, // 10 minutes
    },
    {
      name: 'qdrant_query',
      args: ['error {{issueType}} {{errorMessage}}', 'limit:5 type:error'],
      cacheKey: 'error_{{issueType}}_patterns',
      ttl: 1800000, // 30 minutes
    },
    {
      name: 'qdrant_query',
      args: ['cluster {{clusterName}} configuration issues', 'limit:3 type:cluster'],
      cacheKey: 'cluster_{{clusterName}}_issues',
      ttl: 900000, // 15 minutes
    },
    {
      name: 'qdrant_query',
      args: ['resolved {{issueType}} troubleshooting solutions', 'limit:3'],
      cacheKey: 'resolved_{{issueType}}',
      ttl: 3600000, // 1 hour
    },
  ],

  knowledgeQueries: [
    {
      type: 'semantic',
      query: 'error {{issueType}} {{errorMessage}}',
      filters: { type: 'error' },
      limit: 5,
      minConfidence: 0.6,
    },
    {
      type: 'semantic',
      query: 'cluster {{clusterName}} configuration issues',
      filters: { type: 'cluster' },
      limit: 3,
      minConfidence: 0.7,
    },
    {
      type: 'semantic',
      query: 'resolved {{issueType}} troubleshooting solutions',
      limit: 3,
      minConfidence: 0.7,
    },
  ],
};

/**
 * Template 4: generate-dataproc-query (Template-Enhanced)
 * Generates optimized queries with template-driven optimization
 */
export const generateDataprocQueryTemplate: PromptTemplate = {
  id: 'generate-dataproc-query',
  category: 'template-enhanced',
  description: 'Generate optimized queries with template-driven optimization and examples',

  template: `Generate an optimized {{dialect}} query for the following requirements:

**Requirements:**
- **Purpose**: {{queryPurpose}}
- **Tables**: {{tableNames || 'to be determined'}}
- **Query Type**: {{queryType}}
- **Dialect**: {{dialect}}
- **Performance Level**: {{performanceLevel}}
- **Target Cluster**: {{clusterName || 'any'}}
- **Project**: {{projectId || 'not specified'}}
- **Region**: {{region || 'not specified'}}

**Cluster-Specific Optimization Hints:**
{{cluster_status(clusterName, "optimization-hints")}}

**Query Pattern Examples from Knowledge Base:**
{{qdrant_query("successful " + queryType + " " + dialect + " queries", "limit:3 type:job")}}

**Performance Best Practices:**
{{qdrant_query(dialect + " " + performanceLevel + " optimization patterns", "limit:2")}}

**Similar Query Implementations:**
{{qdrant_query("query " + queryPurpose + " " + dialect, "limit:2 type:job")}}

**Generation Requirements:**
Please provide a complete query solution including:

1. **Complete, Runnable Query**
   - Fully functional {{dialect}} query that addresses the requirements
   - Proper syntax and formatting for {{dialect}}
   - Comments explaining key sections and logic
   - Parameter placeholders where appropriate

2. **Query Logic and Approach Explanation**
   - Step-by-step breakdown of the query logic
   - Reasoning behind chosen approach
   - Data flow and transformation explanation
   - Join strategies and filtering logic

3. **Performance Optimizations**
   - Specific optimizations for {{performanceLevel}} performance level
   - Indexing recommendations for better performance
   - Partitioning strategies if applicable
   - Memory and resource optimization techniques

4. **Alternative Approaches**
   - Different strategies to achieve the same result
   - Trade-offs between different approaches
   - When to use each alternative approach
   - Performance implications of each option

5. **Execution Considerations**
   - Memory requirements and allocation strategies
   - Parallelism and concurrency considerations
   - Resource usage patterns and optimization
   - Estimated execution time and resource consumption

6. **Monitoring and Validation Recommendations**
   - Key metrics to monitor during execution
   - Performance benchmarks and thresholds
   - Data quality validation checks
   - Error handling and recovery strategies`,

  parameters: [
    {
      name: 'queryPurpose',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'What the query is intended to accomplish',
    },
    {
      name: 'tableNames',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Comma-separated list of table names to work with',
    },
    {
      name: 'queryType',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'Type of query operation',
      validation: {
        enum: ['select', 'insert', 'update', 'create', 'analyze'],
      },
    },
    {
      name: 'dialect',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'SQL dialect to use',
      validation: {
        enum: ['hive', 'spark-sql', 'presto'],
      },
    },
    {
      name: 'performanceLevel',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'Target performance level',
      validation: {
        enum: ['standard', 'optimized', 'high-performance'],
      },
    },
    {
      name: 'clusterName',
      type: 'string',
      required: false,
      source: 'profile',
      description: 'Target cluster for optimization context',
    },
    {
      name: 'projectId',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'GCP project ID for context',
    },
    {
      name: 'region',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'GCP region for context',
    },
  ],

  templateResolution: {
    enableParameterInjection: true,
    enableDynamicFunctions: true,
    cacheResults: true,
    timeoutMs: 15000,
  },

  dynamicFunctions: [
    {
      name: 'cluster_status',
      args: ['{{clusterName}}', 'optimization-hints'],
      cacheKey: 'cluster_status_{{clusterName}}',
      ttl: 900000, // 15 minutes
    },
    {
      name: 'qdrant_query',
      args: ['successful {{queryType}} {{dialect}} queries', 'limit:3 type:job'],
      cacheKey: 'successful_{{queryType}}_{{dialect}}',
      ttl: 1800000, // 30 minutes
    },
    {
      name: 'qdrant_query',
      args: ['{{dialect}} {{performanceLevel}} optimization patterns', 'limit:2'],
      cacheKey: 'optimization_{{dialect}}_{{performanceLevel}}',
      ttl: 3600000, // 1 hour
    },
  ],

  knowledgeQueries: [
    {
      type: 'semantic',
      query: 'successful {{queryType}} {{dialect}} queries',
      filters: { type: 'job' },
      limit: 3,
      minConfidence: 0.7,
    },
    {
      type: 'semantic',
      query: '{{dialect}} {{performanceLevel}} optimization patterns',
      limit: 2,
      minConfidence: 0.6,
    },
    {
      type: 'semantic',
      query: 'query {{queryPurpose}} {{dialect}}',
      filters: { type: 'job' },
      limit: 2,
      minConfidence: 0.6,
    },
  ],
};

/**
 * All enhanced prompt templates
 */
export const ENHANCED_PROMPT_TEMPLATES: PromptTemplate[] = [
  analyzeDataprocQueryTemplate,
  designDataprocClusterTemplate,
  troubleshootDataprocIssueTemplate,
  generateDataprocQueryTemplate,
];

/**
 * Template registry for easy access
 */
export const TEMPLATE_REGISTRY = new Map<string, PromptTemplate>(
  ENHANCED_PROMPT_TEMPLATES.map((template) => [template.id, template])
);

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dipseth/dataproc-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

enhanced-prompt-templates.ts•20.8 KiB

/**
 * Enhanced Prompt Templates
 * Defines the four sophisticated prompt templates that replace static prompts
 * with dynamic, context-aware generation capabilities
 */

import { PromptTemplate } from '../types/enhanced-prompts.js';

/**
 * Template 1: analyze-dataproc-query (Knowledge-Driven)
 * Analyzes queries with knowledge base examples and optimization insights
 */
export const analyzeDataprocQueryTemplate: PromptTemplate = {
  id: 'analyze-dataproc-query',
  category: 'knowledge-driven',
  description: 'Analyze Hive/Spark queries with knowledge base examples and optimization insights',

  template: `Analyze this {{queryType || 'SQL'}} query for optimization and best practices:

**Query to Analyze:**
\`\`\`sql
{{query}}
\`\`\`

**Context:**
- Target Cluster: {{clusterName || 'any'}}
- Optimization Level: {{optimizationLevel || 'basic'}}
- Query Type: {{queryType || 'auto-detect'}}
- Project: {{projectId || 'not specified'}}
- Region: {{region || 'not specified'}}

**Similar Query Examples from Knowledge Base:**
{{qdrant_query("similar queries " + queryType, "limit:3 type:job")}}

**Performance Insights from Cluster Data:**
{{qdrant_query("query optimization " + queryType + " performance", "limit:2 type:cluster")}}

**Historical Query Patterns:**
{{qdrant_query("successful " + queryType + " patterns", "limit:2 type:job")}}

**Analysis Requirements:**
Please provide a comprehensive analysis including:

1. **Query Performance Analysis**
   - Identify potential bottlenecks and performance issues
   - Analyze query complexity and resource requirements
   - Evaluate join strategies and data access patterns

2. **Optimization Suggestions**
   - Specific recommendations based on similar successful queries
   - Indexing and partitioning strategies
   - Query rewriting opportunities for better performance

3. **Best Practices Recommendations**
   - Industry best practices for {{queryType}} queries
   - Dataproc-specific optimizations
   - Resource allocation recommendations

4. **Potential Issues and Warnings**
   - Common pitfalls and anti-patterns detected
   - Resource consumption warnings
   - Compatibility considerations

5. **Alternative Query Approaches**
   - Different strategies to achieve the same result
   - Trade-offs between approaches
   - Performance vs. complexity considerations`,

  parameters: [
    {
      name: 'query',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'SQL/HiveQL query to analyze',
    },
    {
      name: 'queryType',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Type of query (hive, spark, presto, auto)',
      validation: {
        enum: ['hive', 'spark', 'presto', 'auto'],
      },
    },
    {
      name: 'clusterName',
      type: 'string',
      required: false,
      source: 'profile',
      description: 'Target cluster name for optimization context',
    },
    {
      name: 'optimizationLevel',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Level of optimization analysis',
      validation: {
        enum: ['basic', 'advanced', 'expert'],
      },
      defaultValue: 'basic',
    },
    {
      name: 'projectId',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'GCP project ID for context',
    },
    {
      name: 'region',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'GCP region for context',
    },
  ],

  knowledgeQueries: [
    {
      type: 'semantic',
      query: 'similar queries {{queryType}}',
      filters: { type: 'job' },
      limit: 3,
      minConfidence: 0.7,
    },
    {
      type: 'semantic',
      query: 'query optimization {{queryType}} performance',
      filters: { type: 'cluster' },
      limit: 2,
      minConfidence: 0.6,
    },
    {
      type: 'semantic',
      query: 'successful {{queryType}} patterns',
      filters: { type: 'job' },
      limit: 2,
      minConfidence: 0.7,
    },
  ],

  dynamicFunctions: [
    {
      name: 'qdrant_query',
      args: ['similar queries {{queryType}}', 'limit:3 type:job'],
      cacheKey: 'similar_queries_{{queryType}}',
      ttl: 1800000, // 30 minutes
    },
    {
      name: 'qdrant_query',
      args: ['query optimization {{queryType}} performance', 'limit:2 type:cluster'],
      cacheKey: 'optimization_{{queryType}}',
      ttl: 3600000, // 1 hour
    },
    {
      name: 'qdrant_query',
      args: ['successful {{queryType}} patterns', 'limit:2 type:job'],
      cacheKey: 'patterns_{{queryType}}',
      ttl: 1800000, // 30 minutes
    },
  ],
};

/**
 * Template 2: design-dataproc-cluster (Resource-Integrated)
 * Generates cluster configurations with profile integration and regional best practices
 */
export const designDataprocClusterTemplate: PromptTemplate = {
  id: 'design-dataproc-cluster',
  category: 'resource-integrated',
  description: 'Generate cluster configuration recommendations with profile integration',

  template: `Design an optimal Dataproc cluster configuration for the following requirements:

**Workload Requirements:**
- **Type**: {{workloadType}}
- **Data Size**: {{dataSize}}
- **Budget**: {{budget}}
- **Region**: {{region || 'flexible'}}
- **Additional Requirements**: {{requirements || 'none specified'}}

**Profile-Based Configuration Templates:**
{{profile_config(profileId, "cluster-templates")}}

**Similar Cluster Configurations from Knowledge Base:**
{{qdrant_query("workload " + workloadType + " " + dataSize + " cluster", "limit:3 type:cluster")}}

**Regional Best Practices and Optimizations:**
{{qdrant_query("region " + region + " cluster configuration best practices", "limit:2")}}

**Cost Optimization Patterns:**
{{qdrant_query(budget + " budget cluster optimization", "limit:2 type:cluster")}}

**Design Requirements:**
Please provide a comprehensive cluster design including:

1. **Recommended Cluster Configuration**
   - Master node specifications (machine type, disk size, count)
   - Worker node specifications (machine type, disk size, count)
   - Preemptible worker configuration if applicable
   - Network and security group settings

2. **Software Components and Versions**
   - Recommended Dataproc image version for {{workloadType}}
   - Optional components based on workload requirements
   - Initialization scripts and custom configurations

3. **Networking and Security Recommendations**
   - VPC and subnet configuration for {{region}}
   - Firewall rules and security best practices
   - Service account and IAM permissions
   - Encryption settings (at-rest and in-transit)

4. **Cost Optimization Strategies**
   - Strategies specific to {{budget}} budget constraints
   - Preemptible instance recommendations
   - Auto-scaling policies for cost efficiency
   - Resource scheduling and lifecycle management

5. **Performance Tuning Suggestions**
   - Configuration optimizations for {{dataSize}} data volumes
   - Memory and CPU allocation strategies
   - Storage optimization (SSD vs HDD, local vs persistent)
   - Network performance considerations

6. **Complete YAML Configuration File**
   - Ready-to-use cluster configuration
   - Environment-specific parameter substitution
   - Validation and deployment instructions

7. **Scaling and Monitoring Recommendations**
   - Auto-scaling policies and thresholds
   - Monitoring and alerting setup
   - Performance metrics to track
   - Maintenance and upgrade strategies`,

  parameters: [
    {
      name: 'workloadType',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'Type of workload the cluster will handle',
      validation: {
        enum: ['analytics', 'ml', 'streaming', 'batch', 'mixed'],
      },
    },
    {
      name: 'dataSize',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'Expected data size to process',
      validation: {
        enum: ['small', 'medium', 'large', 'xlarge'],
      },
    },
    {
      name: 'budget',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'Budget constraints for the cluster',
      validation: {
        enum: ['low', 'medium', 'high', 'unlimited'],
      },
    },
    {
      name: 'region',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'Target GCP region for deployment',
    },
    {
      name: 'requirements',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Additional specific requirements or constraints',
    },
    {
      name: 'profileId',
      type: 'string',
      required: false,
      source: 'profile',
      description: 'Profile ID for configuration templates',
    },
  ],

  resourceAccess: [
    {
      uri: 'dataproc://profile/{{category}}/{{profileId}}/config',
      purpose: 'cluster-templates',
      cacheKey: 'profile_{{profileId}}_templates',
      ttl: 3600000, // 1 hour
    },
  ],

  knowledgeQueries: [
    {
      type: 'semantic',
      query: 'workload {{workloadType}} {{dataSize}} cluster',
      filters: { type: 'cluster' },
      limit: 3,
      minConfidence: 0.7,
    },
    {
      type: 'semantic',
      query: 'region {{region}} cluster configuration best practices',
      limit: 2,
      minConfidence: 0.6,
    },
    {
      type: 'semantic',
      query: '{{budget}} budget cluster optimization',
      filters: { type: 'cluster' },
      limit: 2,
      minConfidence: 0.6,
    },
  ],

  dynamicFunctions: [
    {
      name: 'profile_config',
      args: ['{{profileId}}', 'cluster-templates'],
      cacheKey: 'profile_config_{{profileId}}',
      ttl: 3600000, // 1 hour
    },
    {
      name: 'qdrant_query',
      args: ['workload {{workloadType}} {{dataSize}} cluster', 'limit:3 type:cluster'],
      cacheKey: 'workload_{{workloadType}}_{{dataSize}}',
      ttl: 1800000, // 30 minutes
    },
  ],
};

/**
 * Template 3: troubleshoot-dataproc-issue (Fully-Integrated)
 * Comprehensive troubleshooting with all system integrations
 */
export const troubleshootDataprocIssueTemplate: PromptTemplate = {
  id: 'troubleshoot-dataproc-issue',
  category: 'fully-integrated',
  description: 'Comprehensive troubleshooting with semantic error matching and job analysis',

  template: `Troubleshoot this Dataproc issue with comprehensive analysis:

**Issue Details:**
- **Type**: {{issueType}}
- **Error Message**: {{errorMessage || 'not provided'}}
- **Job ID**: {{jobId || 'not applicable'}}
- **Cluster**: {{clusterName || 'not specified'}}
- **Timeline**: {{timeline || 'not specified'}}
- **Context**: {{context || 'not provided'}}
- **Project**: {{projectId || 'not specified'}}
- **Region**: {{region || 'not specified'}}

**Job Output and Error Analysis:**
{{job_output(jobId, "error-analysis")}}

**Similar Error Patterns from Knowledge Base:**
{{qdrant_query("error " + issueType + " " + errorMessage, "limit:5 type:error")}}

**Cluster Configuration Analysis:**
{{qdrant_query("cluster " + clusterName + " configuration issues", "limit:3 type:cluster")}}

**Historical Resolution Patterns:**
{{qdrant_query("resolved " + issueType + " troubleshooting solutions", "limit:3")}}

**Related Job Failures:**
{{qdrant_query("job failure " + issueType + " " + clusterName, "limit:2 type:job")}}

**Troubleshooting Analysis:**
Please provide a systematic troubleshooting approach:

1. **Root Cause Analysis**
   - Primary cause identification based on error patterns
   - Secondary contributing factors
   - System state analysis at time of failure
   - Resource utilization assessment

2. **Step-by-Step Troubleshooting Guide**
   - Immediate diagnostic steps to take
   - Data collection and log analysis procedures
   - Progressive isolation techniques
   - Verification checkpoints

3. **Diagnostic Commands and Tools**
   - Specific gcloud commands for investigation
   - Log queries and monitoring dashboards
   - Performance analysis tools
   - Network and connectivity tests

4. **Resolution Strategies**
   - Primary resolution approach based on similar cases
   - Alternative solutions if primary approach fails
   - Rollback procedures if needed
   - Testing and validation steps

5. **Prevention Recommendations**
   - Configuration changes to prevent recurrence
   - Monitoring and alerting improvements
   - Best practices implementation
   - Proactive maintenance procedures

6. **Escalation Path**
   - When to involve Google Cloud Support
   - Information to gather before escalation
   - Internal team escalation procedures
   - Emergency response protocols`,

  parameters: [
    {
      name: 'issueType',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'Type of issue being experienced',
      validation: {
        enum: ['job-failure', 'cluster-startup', 'performance', 'connectivity', 'other'],
      },
    },
    {
      name: 'errorMessage',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Specific error message or symptoms',
    },
    {
      name: 'jobId',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Job ID if the issue is related to a specific job',
    },
    {
      name: 'clusterName',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Name of the affected cluster',
    },
    {
      name: 'timeline',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'When the issue started or was first noticed',
    },
    {
      name: 'context',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Additional context about what was being attempted',
    },
    {
      name: 'projectId',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'GCP project ID for context',
    },
    {
      name: 'region',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'GCP region for context',
    },
  ],

  dynamicFunctions: [
    {
      name: 'job_output',
      args: ['{{jobId}}', 'error-analysis'],
      cacheKey: 'job_output_{{jobId}}',
      ttl: 600000, // 10 minutes
    },
    {
      name: 'qdrant_query',
      args: ['error {{issueType}} {{errorMessage}}', 'limit:5 type:error'],
      cacheKey: 'error_{{issueType}}_patterns',
      ttl: 1800000, // 30 minutes
    },
    {
      name: 'qdrant_query',
      args: ['cluster {{clusterName}} configuration issues', 'limit:3 type:cluster'],
      cacheKey: 'cluster_{{clusterName}}_issues',
      ttl: 900000, // 15 minutes
    },
    {
      name: 'qdrant_query',
      args: ['resolved {{issueType}} troubleshooting solutions', 'limit:3'],
      cacheKey: 'resolved_{{issueType}}',
      ttl: 3600000, // 1 hour
    },
  ],

  knowledgeQueries: [
    {
      type: 'semantic',
      query: 'error {{issueType}} {{errorMessage}}',
      filters: { type: 'error' },
      limit: 5,
      minConfidence: 0.6,
    },
    {
      type: 'semantic',
      query: 'cluster {{clusterName}} configuration issues',
      filters: { type: 'cluster' },
      limit: 3,
      minConfidence: 0.7,
    },
    {
      type: 'semantic',
      query: 'resolved {{issueType}} troubleshooting solutions',
      limit: 3,
      minConfidence: 0.7,
    },
  ],
};

/**
 * Template 4: generate-dataproc-query (Template-Enhanced)
 * Generates optimized queries with template-driven optimization
 */
export const generateDataprocQueryTemplate: PromptTemplate = {
  id: 'generate-dataproc-query',
  category: 'template-enhanced',
  description: 'Generate optimized queries with template-driven optimization and examples',

  template: `Generate an optimized {{dialect}} query for the following requirements:

**Requirements:**
- **Purpose**: {{queryPurpose}}
- **Tables**: {{tableNames || 'to be determined'}}
- **Query Type**: {{queryType}}
- **Dialect**: {{dialect}}
- **Performance Level**: {{performanceLevel}}
- **Target Cluster**: {{clusterName || 'any'}}
- **Project**: {{projectId || 'not specified'}}
- **Region**: {{region || 'not specified'}}

**Cluster-Specific Optimization Hints:**
{{cluster_status(clusterName, "optimization-hints")}}

**Query Pattern Examples from Knowledge Base:**
{{qdrant_query("successful " + queryType + " " + dialect + " queries", "limit:3 type:job")}}

**Performance Best Practices:**
{{qdrant_query(dialect + " " + performanceLevel + " optimization patterns", "limit:2")}}

**Similar Query Implementations:**
{{qdrant_query("query " + queryPurpose + " " + dialect, "limit:2 type:job")}}

**Generation Requirements:**
Please provide a complete query solution including:

1. **Complete, Runnable Query**
   - Fully functional {{dialect}} query that addresses the requirements
   - Proper syntax and formatting for {{dialect}}
   - Comments explaining key sections and logic
   - Parameter placeholders where appropriate

2. **Query Logic and Approach Explanation**
   - Step-by-step breakdown of the query logic
   - Reasoning behind chosen approach
   - Data flow and transformation explanation
   - Join strategies and filtering logic

3. **Performance Optimizations**
   - Specific optimizations for {{performanceLevel}} performance level
   - Indexing recommendations for better performance
   - Partitioning strategies if applicable
   - Memory and resource optimization techniques

4. **Alternative Approaches**
   - Different strategies to achieve the same result
   - Trade-offs between different approaches
   - When to use each alternative approach
   - Performance implications of each option

5. **Execution Considerations**
   - Memory requirements and allocation strategies
   - Parallelism and concurrency considerations
   - Resource usage patterns and optimization
   - Estimated execution time and resource consumption

6. **Monitoring and Validation Recommendations**
   - Key metrics to monitor during execution
   - Performance benchmarks and thresholds
   - Data quality validation checks
   - Error handling and recovery strategies`,

  parameters: [
    {
      name: 'queryPurpose',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'What the query is intended to accomplish',
    },
    {
      name: 'tableNames',
      type: 'string',
      required: false,
      source: 'tool',
      description: 'Comma-separated list of table names to work with',
    },
    {
      name: 'queryType',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'Type of query operation',
      validation: {
        enum: ['select', 'insert', 'update', 'create', 'analyze'],
      },
    },
    {
      name: 'dialect',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'SQL dialect to use',
      validation: {
        enum: ['hive', 'spark-sql', 'presto'],
      },
    },
    {
      name: 'performanceLevel',
      type: 'string',
      required: true,
      source: 'tool',
      description: 'Target performance level',
      validation: {
        enum: ['standard', 'optimized', 'high-performance'],
      },
    },
    {
      name: 'clusterName',
      type: 'string',
      required: false,
      source: 'profile',
      description: 'Target cluster for optimization context',
    },
    {
      name: 'projectId',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'GCP project ID for context',
    },
    {
      name: 'region',
      type: 'string',
      required: false,
      source: 'gcp',
      description: 'GCP region for context',
    },
  ],

  templateResolution: {
    enableParameterInjection: true,
    enableDynamicFunctions: true,
    cacheResults: true,
    timeoutMs: 15000,
  },

  dynamicFunctions: [
    {
      name: 'cluster_status',
      args: ['{{clusterName}}', 'optimization-hints'],
      cacheKey: 'cluster_status_{{clusterName}}',
      ttl: 900000, // 15 minutes
    },
    {
      name: 'qdrant_query',
      args: ['successful {{queryType}} {{dialect}} queries', 'limit:3 type:job'],
      cacheKey: 'successful_{{queryType}}_{{dialect}}',
      ttl: 1800000, // 30 minutes
    },
    {
      name: 'qdrant_query',
      args: ['{{dialect}} {{performanceLevel}} optimization patterns', 'limit:2'],
      cacheKey: 'optimization_{{dialect}}_{{performanceLevel}}',
      ttl: 3600000, // 1 hour
    },
  ],

  knowledgeQueries: [
    {
      type: 'semantic',
      query: 'successful {{queryType}} {{dialect}} queries',
      filters: { type: 'job' },
      limit: 3,
      minConfidence: 0.7,
    },
    {
      type: 'semantic',
      query: '{{dialect}} {{performanceLevel}} optimization patterns',
      limit: 2,
      minConfidence: 0.6,
    },
    {
      type: 'semantic',
      query: 'query {{queryPurpose}} {{dialect}}',
      filters: { type: 'job' },
      limit: 2,
      minConfidence: 0.6,
    },
  ],
};

/**
 * All enhanced prompt templates
 */
export const ENHANCED_PROMPT_TEMPLATES: PromptTemplate[] = [
  analyzeDataprocQueryTemplate,
  designDataprocClusterTemplate,
  troubleshootDataprocIssueTemplate,
  generateDataprocQueryTemplate,
];

/**
 * Template registry for easy access
 */
export const TEMPLATE_REGISTRY = new Map<string, PromptTemplate>(
  ENHANCED_PROMPT_TEMPLATES.map((template) => [template.id, template])
);