"""
AI Prompt Templates for KQL Generation
Enhanced prompts with KQL knowledge, schema context, and few-shot learning.
Uses special tokens and structured output for better AI accuracy.
Author: Arjun Trivedi
Email: arjuntrivedi42@yahoo.com
"""
from typing import Any, Dict, List, Optional
# System prompt with KQL expertise
KQL_SYSTEM_PROMPT = """You are an expert in Kusto Query Language (KQL). You generate accurate, efficient KQL queries.
CRITICAL RULES:
1. **Join Conditions**: ONLY use 'and' in join conditions, NEVER 'or'
- β
CORRECT: Table1 | join Table2 on Col1 and Col2
- β WRONG: Table1 | join Table2 on Col1 or Col2
2. **Column Validation**: ONLY use columns that exist in the provided schema
- Always check the schema before using a column name
- Use exact column names (case-sensitive)
3. **Reserved Words**: Bracket reserved words and special characters
- Use ['column-name'] for columns with hyphens or spaces
- Use ['table name'] for tables with spaces
4. **Operator Best Practices**:
- Use 'project' to select specific columns (avoid 'project *')
- Use 'where' for filtering
- Use 'summarize' for aggregations
- Use 'extend' to add calculated columns
- Use 'take' or 'limit' to limit results
5. **Data Types**: Use proper type conversions
- toint(), tolong(), toreal() for numbers
- tostring() for strings
- todatetime() for dates
- Handle nulls with isnull(), isnotnull(), iff()
OUTPUT FORMAT:
Return ONLY the KQL query, nothing else. No explanations, no markdown, just the query."""
# Specialized Mermaid Visualization Prompt
MERMAID_VISUALIZATION_PROMPT = """
You are an expert in Data Visualization using Mermaid.js.
Your goal is to create STUNNING, MODERN, and HIGH-CONTRAST diagrams using a CYBERPUNK/NEON aesthetic.
π¨ CYBERPUNK COLOR PALETTE:
βββββββββββββββββββββββββββββββββββββββββββββββββββββ
Background & Fills:
- Primary Dark: #0a0e27 (deep space blue)
- Secondary Dark: #1a1a2e (midnight blue)
- Medium Dark: #16213e (dark slate)
- Medium: #1a1a40 (navy blue)
- Accent Dark: #0f3460 (deep ocean)
Neon Accents & Strokes:
- Cyan Neon: #00d9ff (electric cyan)
- Bright Cyan: #00ffff (pure cyan)
- Hot Pink: #ff0080 (magenta neon)
- Purple Neon: #9d4edd (violet)
- Light Purple: #c77dff (lavender neon)
- Gold Neon: #ffaa00 (amber gold)
- Orange Neon: #ff6600 (fire orange)
- Success Green: #00ff88 (neon green)
- Error Red: #ff3366 (neon red)
Typography:
- Font Family: 'Inter', 'Segoe UI', 'Roboto', sans-serif
- Base Size: 16px-18px
- Labels: 14px-16px
β οΈ STRICT COLOR COMPLIANCE:
Use ONLY the colors listed above. Do NOT use:
- #00ff88 (neon green) - NOT in palette
- #ff3366 (neon red) - NOT in palette
- Any custom colors not explicitly listed
βββββββββββββββββββββββββββββββββββββββββββββββββββββ
π CRITICAL INSTRUCTIONS:
1. **ALWAYS** use `%%{init: {...}}%%` directive for theme configuration (MANDATORY)
2. **COLOR PALETTE**: Use ONLY colors from the defined cyberpunk palette above
- Fill Colors: Choose from #0a0e27 (darkest), #1a1a2e (dark), #16213e (medium-dark), #1a1a40 (medium), #0f3460 (medium-light)
- Stroke Colors: Choose from #00d9ff (cyan-blue), #ff6600 (orange), #00ffff (cyan), #ff0080 (pink), #9d4edd (purple), #c77dff (light-purple), #ffaa00 (gold)
- Example: `style NodeName fill:#0a0e27,stroke:#00d9ff,stroke-width:3px,color:#00d9ff`
3. **WATERMARK**: ALWAYS add report attribution in markdown after the mermaid diagram block:
- Format: "**Report Generated by MCP-KQL-Server** | [β Star this repo on GitHub](https://github.com/4R9UN/mcp-kql-server)"
- Place this text OUTSIDE and AFTER the closing ```
4. **High Contrast**: Ensure text is readable against dark backgrounds (use bright neon colors)
5. **Consistent Styling**: Apply node-specific styles for visual hierarchy
6. **Proper Spacing**: Use adequate node spacing (40-60px) for clarity
βββββββββββββββββββββββββββββββββββββββββββββββββββββ
π DIAGRAM TYPE TEMPLATES:
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
1. FLOWCHART / PROCESS DIAGRAM (graph TB/LR)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
```mermaid
%%{init: {'theme':'dark', 'themeVariables': {
'primaryColor':'#1a1a2e',
'primaryTextColor':'#00d9ff',
'primaryBorderColor':'#00d9ff',
'secondaryColor':'#16213e',
'secondaryTextColor':'#c77dff',
'secondaryBorderColor':'#c77dff',
'tertiaryColor':'#0f3460',
'tertiaryTextColor':'#ffaa00',
'tertiaryBorderColor':'#ffaa00',
'lineColor':'#00d9ff',
'textColor':'#ffffff',
'mainBkg':'#0a0e27',
'nodeBorder':'#00d9ff',
'clusterBkg':'#16213e',
'clusterBorder':'#9d4edd',
'titleColor':'#00ffff',
'edgeLabelBackground':'#1a1a2e',
'fontFamily':'Inter, Segoe UI, sans-serif',
'fontSize':'16px',
'flowchart':{'nodeSpacing':50, 'rankSpacing':60, 'curve':'basis', 'padding':20}
}}}%%
graph TB
Start([π Start Process]) --> Validate[π Validate Input]
Validate --> Decision{β
Valid?}
Decision -->|Yes| Process[βοΈ Process Data]
Decision -->|No| Error([β Error State])
Process --> Success([β
Success])
%% Node Styles (using only palette colors)
style Start fill:#1a1a2e,stroke:#00d9ff,stroke-width:3px,color:#00ffff
style Validate fill:#16213e,stroke:#c77dff,stroke-width:2px,color:#c77dff
style Decision fill:#1a1a40,stroke:#ffaa00,stroke-width:3px,color:#ffaa00
style Process fill:#16213e,stroke:#9d4edd,stroke-width:2px,color:#9d4edd
style Success fill:#0f3460,stroke:#00d9ff,stroke-width:3px,color:#00d9ff
style Error fill:#1a1a2e,stroke:#ff0080,stroke-width:3px,color:#ff0080
```
**Report Generated by MCP-KQL-Server** | [β Star this repo on GitHub](https://github.com/4R9UN/mcp-kql-server)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
2. SEQUENCE DIAGRAM (Interactions/Flow)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
```mermaid
%%{init: {'theme':'dark', 'themeVariables': {
'actorBkg':'#1a1a2e',
'actorBorder':'#00d9ff',
'actorTextColor':'#00ffff',
'actorLineColor':'#00d9ff',
'signalColor':'#c77dff',
'signalTextColor':'#ffffff',
'labelBoxBkgColor':'#16213e',
'labelBoxBorderColor':'#9d4edd',
'labelTextColor':'#ffffff',
'loopTextColor':'#ffaa00',
'noteBkgColor':'#0f3460',
'noteBorderColor':'#ff6600',
'noteTextColor':'#ffffff',
'activationBkgColor':'#9d4edd',
'activationBorderColor':'#c77dff',
'sequenceNumberColor':'#ffaa00',
'fontFamily':'Inter, Segoe UI, sans-serif',
'fontSize':'16px'
}}}%%
sequenceDiagram
autonumber
participant U as π€ User
participant S as π₯οΈ System
participant D as πΎ Database
U->>+S: π€ Send Request
S->>+D: π Query Data
D-->>-S: π Return Results
S-->>-U: β
Response
```
**Report Generated by MCP-KQL-Server** | [β Star this repo on GitHub](https://github.com/4R9UN/mcp-kql-server)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
3. ER DIAGRAM (Database Schema)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
```mermaid
%%{init: {'theme':'dark', 'themeVariables': {
'primaryColor':'#1a1a2e',
'primaryTextColor':'#00d9ff',
'primaryBorderColor':'#00d9ff',
'lineColor':'#c77dff',
'secondaryColor':'#16213e',
'tertiaryColor':'#0f3460',
'fontFamily':'Inter, Segoe UI, sans-serif',
'fontSize':'16px',
'entityBkg':'#1a1a2e',
'entityBorder':'#00d9ff',
'entityTextColor':'#00ffff',
'relationshipLabelColor':'#ffaa00',
'relationshipLineColor':'#c77dff'
}}}%%
erDiagram
USERS ||--o{ ORDERS : "places"
USERS {
string userId PK
string username
string email
datetime createdAt
}
ORDERS {
string orderId PK
string userId FK
decimal amount
datetime orderDate
}
ORDERS ||--|{ ORDER_ITEMS : "contains"
ORDER_ITEMS {
string itemId PK
string orderId FK
string productId FK
int quantity
}
```
**Report Generated by MCP-KQL-Server** | [β Star this repo on GitHub](https://github.com/4R9UN/mcp-kql-server)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
4. STATE DIAGRAM (Workflow States)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
```mermaid
%%{init: {'theme':'dark', 'themeVariables': {
'primaryColor':'#1a1a2e',
'primaryTextColor':'#00d9ff',
'primaryBorderColor':'#00d9ff',
'lineColor':'#c77dff',
'secondaryColor':'#16213e',
'tertiaryColor':'#0f3460',
'fontFamily':'Inter, Segoe UI, sans-serif',
'fontSize':'16px'
}}}%%
stateDiagram-v2
[*] --> Idle: π Initialize
Idle --> Processing: βΆοΈ Start
Processing --> Success: β
Complete
Processing --> Failed: β Error
Failed --> Retry: π Retry
Retry --> Processing
Success --> [*]
Failed --> [*]: π Abort
```
**Report Generated by MCP-KQL-Server** | [β Star this repo on GitHub](https://github.com/4R9UN/mcp-kql-server)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
5. GANTT CHART (Timeline/Schedule)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
```mermaid
%%{init: {'theme':'dark', 'themeVariables': {
'primaryColor':'#1a1a2e',
'primaryTextColor':'#00d9ff',
'primaryBorderColor':'#00d9ff',
'lineColor':'#c77dff',
'secondaryColor':'#16213e',
'tertiaryColor':'#0f3460',
'gridColor':'#00d9ff',
'fontFamily':'Inter, Segoe UI, sans-serif',
'fontSize':'14px'
}}}%%
gantt
title π Project Timeline
dateFormat YYYY-MM-DD
section Phase 1
Task 1 :a1, 2024-01-01, 30d
Task 2 :a2, after a1, 20d
section Phase 2
Task 3 :a3, after a2, 25d
```
**Report Generated by MCP-KQL-Server** | [β Star this repo on GitHub](https://github.com/4R9UN/mcp-kql-server)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
6. PIE CHART (Distribution)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
```mermaid
%%{init: {'theme':'dark', 'themeVariables': {
'primaryColor':'#00d9ff',
'primaryTextColor':'#ffffff',
'pie1':'#00d9ff',
'pie2':'#ff0080',
'pie3':'#9d4edd',
'pie4':'#ffaa00',
'pie5':'#00ff88',
'pie6':'#ff6600',
'pieTitleTextColor':'#00ffff',
'pieSectionTextColor':'#ffffff',
'pieLegendTextColor':'#ffffff',
'pieStrokeColor':'#0a0e27',
'fontFamily':'Inter, Segoe UI, sans-serif',
'fontSize':'16px'
}}}%%
pie title π Data Distribution
"Category A" : 45
"Category B" : 30
"Category C" : 15
"Category D" : 10
```
**Report Generated by MCP-KQL-Server** | [β Star this repo on GitHub](https://github.com/4R9UN/mcp-kql-server)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
7. CLASS DIAGRAM (Object Structure)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
```mermaid
%%{init: {'theme':'dark', 'themeVariables': {
'primaryColor':'#1a1a2e',
'primaryTextColor':'#00d9ff',
'primaryBorderColor':'#00d9ff',
'lineColor':'#c77dff',
'secondaryColor':'#16213e',
'tertiaryColor':'#0f3460',
'fontFamily':'Inter, Segoe UI, sans-serif',
'fontSize':'14px',
'classText':'#00ffff'
}}}%%
classDiagram
class KQLQuery {
+string query
+string database
+execute()
+validate()
}
class Result {
+array data
+int rowCount
+format()
}
KQLQuery --> Result : returns
```
**Report Generated by MCP-KQL-Server** | [β Star this repo on GitHub](https://github.com/4R9UN/mcp-kql-server)
βββββββββββββββββββββββββββββββββββββββββββββββββββββ
π― SELECTION GUIDE:
- **Process/Workflow**: Use Flowchart (graph TB/LR)
- **Interactions/API Calls**: Use Sequence Diagram
- **Data Relationships**: Use ER Diagram
- **Status/Lifecycle**: Use State Diagram
- **Timeline/Schedule**: Use Gantt Chart
- **Percentages/Parts**: Use Pie Chart
- **Code/Classes**: Use Class Diagram
β‘ BEST PRACTICES:
1. **MANDATORY**: Add markdown attribution AFTER closing ``` : "**Report Generated by MCP-KQL-Server** | [β Star this repo on GitHub](https://github.com/4R9UN/mcp-kql-server)"
2. **COLOR RESTRICTION**: Use ONLY the defined cyberpunk palette colors (no custom colors)
- Fills: #0a0e27, #1a1a2e, #16213e, #1a1a40, #0f3460
- Strokes: #00d9ff, #ff6600, #00ffff, #ff0080, #9d4edd, #c77dff, #ffaa00
3. Always include emojis for visual appeal and clarity
4. Keep node labels concise (max 3-4 words)
5. Test that text colors have sufficient contrast (AA standard minimum)
6. Use line thickness to indicate importance (3px=critical, 2px=normal, 1px=minor)
7. Add meaningful arrow labels when needed
8. Group related items in subgraphs when applicable
π« COMMON MISTAKES TO AVOID:
- β Don't use colors outside the defined cyberpunk palette (#00ff88, #ff3366, etc.)
- β Don't put watermarks INSIDE the mermaid diagram
- β Don't forget the markdown attribution after the closing ```
- β Don't forget the init directive
- β Don't use low-contrast color combinations
- β Don't overcrowd diagrams (max 10-15 nodes per graph)
π DIAGRAM GENERATION WORKFLOW:
1. Analyze the data/query results structure
2. Choose the MOST APPROPRIATE diagram type from templates above
3. Apply the `%%{init: {...}}%%` directive with cyberpunk theme
4. Build the diagram using ONLY palette colors for fills and strokes
5. Add clear labels with emojis for visual interest
6. Close the mermaid code block with ```
7. **CRITICAL**: Add markdown attribution line: "**Report Generated by MCP-KQL-Server** | [β Star this repo on GitHub](https://github.com/4R9UN/mcp-kql-server)"
8. Verify color palette compliance and readability
β¨ OUTPUT FORMAT EXAMPLE:
```mermaid
%%{init: {...}}%%
[your diagram here]
```
**Report Generated by MCP-KQL-Server** | [β Star this repo on GitHub](https://github.com/4R9UN/mcp-kql-server)
Remember: The goal is STUNNING, PREMIUM, HIGH-CONTRAST visualizations using the strict cyberpunk color palette! π¨β¨
"""
# Few-shot learning examples
FEW_SHOT_EXAMPLES = [
{
"description": "Show recent failed login attempts",
"schema": {
"table": "SigninLogs",
"columns": {
"TimeGenerated": {"data_type": "datetime"},
"UserPrincipalName": {"data_type": "string"},
"ResultType": {"data_type": "string"},
"ResultDescription": {"data_type": "string"},
"IPAddress": {"data_type": "string"},
"Location": {"data_type": "string"}
}
},
"query": "SigninLogs | where ResultType != '0' | where TimeGenerated > ago(1h) | project TimeGenerated, UserPrincipalName, ResultDescription, IPAddress, Location | take 100"
},
{
"description": "Count events by severity in the last 24 hours",
"schema": {
"table": "SecurityEvent",
"columns": {
"TimeGenerated": {"data_type": "datetime"},
"EventID": {"data_type": "int"},
"Level": {"data_type": "string"},
"Computer": {"data_type": "string"},
"Account": {"data_type": "string"}
}
},
"query": "SecurityEvent | where TimeGenerated > ago(24h) | summarize Count=count() by Level | order by Count desc"
},
{
"description": "Find top 10 users by activity",
"schema": {
"table": "AuditLogs",
"columns": {
"TimeGenerated": {"data_type": "datetime"},
"OperationName": {"data_type": "string"},
"InitiatedBy": {"data_type": "string"},
"TargetResources": {"data_type": "dynamic"},
"Result": {"data_type": "string"}
}
},
"query": "AuditLogs | summarize ActivityCount=count() by InitiatedBy | top 10 by ActivityCount desc"
},
{
"description": "Join two tables to correlate data",
"schema": {
"table1": "Alerts",
"columns1": {
"AlertId": {"data_type": "string"},
"Severity": {"data_type": "string"},
"DeviceId": {"data_type": "string"},
"TimeGenerated": {"data_type": "datetime"}
},
"table2": "Devices",
"columns2": {
"DeviceId": {"data_type": "string"},
"DeviceName": {"data_type": "string"},
"OSPlatform": {"data_type": "string"}
}
},
"query": "Alerts | join kind=inner Devices on DeviceId | project TimeGenerated, AlertId, Severity, DeviceName, OSPlatform | take 50"
}
]
def build_generation_prompt(
nl_query: str,
schema: Dict[str, Any],
table_name: Optional[str] = None,
include_examples: bool = True,
include_visualization: bool = False,
similar_queries: Optional[List[Dict[str, Any]]] = None
) -> str:
"""
Build optimized prompt for KQL generation with schema context.
Args:
nl_query: Natural language query from user
schema: Table schema with columns and types
table_name: Target table name
include_examples: Whether to include few-shot examples
Returns:
Formatted prompt string
"""
prompt_parts = []
# Add schema context
if schema and schema.get("columns"):
prompt_parts.append("SCHEMA CONTEXT:")
prompt_parts.append(f"Table: {table_name or schema.get('table', 'Unknown')}")
# Extract column names for critical rules section
column_names = list(schema["columns"].keys())
prompt_parts.append("Columns:")
for col_name, col_info in list(schema["columns"].items())[:20]: # Limit to 20 columns
data_type = col_info.get("data_type", "unknown")
description = col_info.get("description", "")
col_line = f" - {col_name} ({data_type})"
# Add sample values if available
sample_values = col_info.get("sample_values", [])
if sample_values:
# Format samples nicely
samples_str = ", ".join(str(v) for v in sample_values[:2])
col_line += f" [Samples: {samples_str}]"
if description:
col_line += f": {description[:50]}"
prompt_parts.append(col_line)
if len(schema["columns"]) > 20:
prompt_parts.append(f" ... and {len(schema['columns']) - 20} more columns")
prompt_parts.append("")
# Add CRITICAL RULES section
prompt_parts.append("β οΈ CRITICAL RULES - COLUMN NAMES:")
prompt_parts.append("1. Use ONLY these EXACT column names (case-sensitive):")
# Show first 15 column names explicitly
for i, col_name in enumerate(column_names[:15], 1):
prompt_parts.append(f" {i}. {col_name}")
if len(column_names) > 15:
prompt_parts.append(f" ... and {len(column_names) - 15} more columns listed above")
prompt_parts.append("2. DO NOT modify, abbreviate, or invent column names")
prompt_parts.append("3. Column names are CASE-SENSITIVE - use exact capitalization")
prompt_parts.append("4. If you need a column not in this list, the query cannot be fulfilled")
prompt_parts.append("")
# Add few-shot examples (optional)
if include_examples:
prompt_parts.append("EXAMPLES:")
# Add dynamic similar queries if available
if similar_queries:
prompt_parts.append("--- RELEVANT PAST QUERIES (High Confidence) ---")
for i, q in enumerate(similar_queries, 1):
desc = q.get('description', 'Similar query')
query = q.get('query', '').strip()
prompt_parts.append(f"// {desc}")
prompt_parts.append(query)
prompt_parts.append("")
prompt_parts.append("--- GENERAL EXAMPLES ---")
for i, example in enumerate(FEW_SHOT_EXAMPLES[:2], 1): # Include 2 examples
prompt_parts.append(f"\nExample {i}:")
prompt_parts.append(f"User: {example['description']}")
prompt_parts.append(f"KQL: {example['query']}")
prompt_parts.append("")
# Add user query
prompt_parts.append("USER REQUEST:")
prompt_parts.append(nl_query)
prompt_parts.append("")
prompt_parts.append("Generate a KQL query that:")
prompt_parts.append("1. Uses ONLY columns from the schema above")
prompt_parts.append("2. Follows all KQL syntax rules")
prompt_parts.append("3. Is efficient, optimize and accurate")
prompt_parts.append("")
prompt_parts.append("KQL Query:")
if include_visualization:
prompt_parts.append("\n" + MERMAID_VISUALIZATION_PROMPT)
return "\n".join(prompt_parts)
def build_schema_description_prompt(
table_name: str,
columns: Dict[str, Any]
) -> str:
"""
Build prompt for LLM to generate table description.
Args:
table_name: Name of the table
columns: Dictionary of columns with metadata
Returns:
Formatted prompt for description generation
"""
col_names = ", ".join(list(columns.keys())[:15])
if len(columns) > 15:
col_names += f", and {len(columns) - 15} more"
prompt = f"""Generate a concise, natural language description for a database table.
Table Name: {table_name}
Columns: {col_names}
Description should:
1. Be 1-2 sentences
2. Explain what data this table likely contains
3. Mention key columns if obvious (e.g., timestamps, IDs, names)
4. Be helpful for accurate query generation
Description:"""
return prompt
def build_error_feedback_prompt(
original_query: str,
error_message: str,
schema: Dict[str, Any]
) -> str:
"""
Build prompt for LLM to fix a failed query.
Args:
original_query: The query that failed
error_message: Error message from Kusto
schema: Table schema
Returns:
Formatted prompt for query correction
"""
prompt_parts = []
prompt_parts.append("QUERY ERROR - PLEASE FIX:")
prompt_parts.append("")
prompt_parts.append("Original Query:")
prompt_parts.append(original_query)
prompt_parts.append("")
prompt_parts.append("Error Message:")
prompt_parts.append(error_message)
prompt_parts.append("")
if schema and schema.get("columns"):
prompt_parts.append("Available Columns:")
for col_name in list(schema["columns"].keys())[:20]:
prompt_parts.append(f" - {col_name}")
prompt_parts.append("")
prompt_parts.append("Generate a CORRECTED KQL query that:")
prompt_parts.append("1. Fixes the error")
prompt_parts.append("2. Uses only valid columns from the schema")
prompt_parts.append("3. Maintains the original intent")
prompt_parts.append("")
prompt_parts.append("Corrected KQL Query:")
return "\n".join(prompt_parts)
def extract_kql_from_response(response: str) -> str:
"""
Extract KQL query from LLM response.
Handles various response formats:
- Plain query
- Query in code blocks
- Query with explanations
Args:
response: LLM response text
Returns:
Extracted KQL query
"""
# Remove markdown code blocks
if "```" in response:
# Extract content between ```kql or ``` blocks
import re
pattern = r'```(?:kql)?\s*\n(.*?)\n```'
matches = re.findall(pattern, response, re.DOTALL)
if matches:
response = matches[0]
# Remove common prefixes
prefixes = [
"KQL Query:",
"Query:",
"Here's the query:",
"Here is the query:",
"The query is:",
]
for prefix in prefixes:
if response.strip().startswith(prefix):
response = response.strip()[len(prefix):].strip()
# Take first line if multi-line with explanations
lines = response.strip().split('\n')
if len(lines) > 1:
# Check if first line looks like a query
first_line = lines[0].strip()
if any(kw in first_line.lower() for kw in ['|', 'where', 'project', 'summarize', 'take']):
# Multi-line query, join all lines that look like query parts
query_lines = []
for line in lines:
line = line.strip()
if line and not line.startswith('#') and not line.startswith('//'):
query_lines.append(line)
elif query_lines: # Stop at first non-query line after query started
break
response = ' '.join(query_lines)
return response.strip()
# Special tokens for structured output
SPECIAL_TOKENS = {
"query_start": "<KQL>",
"query_end": "</KQL>",
"error_start": "<ERROR>",
"error_end": "</ERROR>",
"suggestion_start": "<SUGGESTION>",
"suggestion_end": "</SUGGESTION>"
}
def build_structured_prompt(
nl_query: str,
schema: Dict[str, Any],
use_special_tokens: bool = False
) -> str:
"""
Build prompt with optional special tokens for structured output.
Args:
nl_query: Natural language query
schema: Table schema
use_special_tokens: Whether to use special tokens
Returns:
Formatted prompt
"""
base_prompt = build_generation_prompt(nl_query, schema, include_examples=True)
if use_special_tokens:
base_prompt += f"\n\nWrap your KQL query in {SPECIAL_TOKENS['query_start']} and {SPECIAL_TOKENS['query_end']} tags."
return base_prompt