#!/usr/bin/env python3
"""
Setup script for Databricks MCP Proxy.
Creates a uv-compatible MCP server that:
- Uses stdio transport for Claude Desktop
- Has authenticate tool that opens browser for OAuth
- Proxies tools from remote Databricks MCP server
Usage:
python setup_databricks_mcp_proxy.py [--output-dir ./databricks-mcp-proxy]
"""
import argparse
import os
from pathlib import Path
PYPROJECT_TOML = '''[project]
name = "databricks-mcp-proxy"
version = "0.1.0"
description = "MCP proxy server for Databricks with OAuth U2M authentication"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"mcp>=1.0.0",
"requests>=2.28.0",
"databricks-sdk>=0.20.0",
"databricks-mcp",
"pyyaml>=6.0",
]
[project.scripts]
databricks-mcp-proxy = "databricks_mcp_proxy.main:main"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["src/databricks_mcp_proxy"]
'''
INIT_PY = '''"""Databricks MCP Proxy Server."""
__version__ = "0.1.0"
'''
AUTH_PY = '''"""OAuth U2M authentication for Databricks."""
import base64
import hashlib
import secrets
import string
import sys
import webbrowser
from http.server import BaseHTTPRequestHandler, HTTPServer
from urllib.parse import parse_qs, urlencode, urlparse
import requests
CLIENT_ID = "databricks-cli"
DEFAULT_SCOPES = "all-apis offline_access"
DEFAULT_REDIRECT_URI = "http://localhost:8020"
class OAuthCallbackHandler(BaseHTTPRequestHandler):
"""HTTP handler to capture OAuth callback."""
authorization_code = None
state_value = None
def do_GET(self):
query_components = parse_qs(urlparse(self.path).query)
OAuthCallbackHandler.authorization_code = query_components.get("code", [None])[0]
OAuthCallbackHandler.state_value = query_components.get("state", [None])[0]
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(b"""
<html>
<body style="font-family: system-ui; display: flex; justify-content: center; align-items: center; height: 100vh; margin: 0; background: #f5f5f5;">
<div style="text-align: center; padding: 40px; background: white; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1);">
<h2 style="color: #22c55e;">Authorization Successful!</h2>
<p style="color: #666;">You can close this window.</p>
</div>
</body>
</html>
""")
def log_message(self, format, *args):
pass
def generate_pkce_pair():
"""Generate PKCE code verifier and challenge."""
allowed_chars = string.ascii_letters + string.digits + "-._~"
code_verifier = "".join(secrets.choice(allowed_chars) for _ in range(64))
sha256_hash = hashlib.sha256(code_verifier.encode()).digest()
code_challenge = base64.urlsafe_b64encode(sha256_hash).decode().rstrip("=")
return code_verifier, code_challenge
def start_oauth_flow(host: str, scopes: str = DEFAULT_SCOPES, redirect_uri: str = DEFAULT_REDIRECT_URI) -> str:
"""
Start OAuth U2M flow and return access token.
Opens browser for user authorization.
"""
host = host.rstrip("/")
state = secrets.token_urlsafe(32)
code_verifier, code_challenge = generate_pkce_pair()
auth_params = {
"client_id": CLIENT_ID,
"redirect_uri": redirect_uri,
"response_type": "code",
"state": state,
"code_challenge": code_challenge,
"code_challenge_method": "S256",
"scope": scopes,
}
auth_url = f"{host}/oidc/v1/authorize?{urlencode(auth_params)}"
# Reset state
OAuthCallbackHandler.authorization_code = None
OAuthCallbackHandler.state_value = None
# Start callback server
redirect_port = int(urlparse(redirect_uri).port or 8020)
server = HTTPServer(("localhost", redirect_port), OAuthCallbackHandler)
server.timeout = 300
# Open browser
print(f"Opening browser for authorization...", file=sys.stderr)
webbrowser.open(auth_url)
# Wait for callback
print(f"Waiting for authorization callback on {redirect_uri}...", file=sys.stderr)
server.handle_request()
if OAuthCallbackHandler.state_value != state:
raise ValueError("State mismatch! Possible CSRF attack.")
if not OAuthCallbackHandler.authorization_code:
raise ValueError("No authorization code received.")
# Exchange code for token
print("Exchanging code for token...", file=sys.stderr)
token_response = requests.post(
f"{host}/oidc/v1/token",
data={
"client_id": CLIENT_ID,
"grant_type": "authorization_code",
"scope": scopes,
"redirect_uri": redirect_uri,
"code_verifier": code_verifier,
"code": OAuthCallbackHandler.authorization_code,
}
)
if token_response.status_code != 200:
raise ValueError(f"Token exchange failed: {token_response.text}")
print("Token obtained successfully!", file=sys.stderr)
return token_response.json()["access_token"]
'''
CLIENT_PY = '''"""Databricks MCP Client wrapper."""
import sys
from dataclasses import dataclass
from typing import Any, List, Optional
from concurrent.futures import ThreadPoolExecutor
from databricks.sdk import WorkspaceClient
from databricks_mcp import DatabricksMCPClient
@dataclass
class ToolInfo:
"""Information about a remote tool."""
name: str
description: str
input_schema: dict
class DatabricksMCPProxy:
"""Proxy client for Databricks MCP server."""
def __init__(self, host: str, app_url: str, access_token: str):
self.host = host
self.app_url = app_url.rstrip("/")
self.mcp_url = f"{self.app_url}/mcp"
self.access_token = access_token
self._workspace_client: Optional[WorkspaceClient] = None
self._mcp_client: Optional[DatabricksMCPClient] = None
self._tools: List[ToolInfo] = []
self._connected = False
def connect(self):
"""Establish connection to the Databricks MCP server."""
print(f"Creating WorkspaceClient for {self.host}...", file=sys.stderr)
self._workspace_client = WorkspaceClient(host=self.host, token=self.access_token)
print("WorkspaceClient created", file=sys.stderr)
print(f"Connecting to MCP server at {self.mcp_url}...", file=sys.stderr)
self._mcp_client = DatabricksMCPClient(server_url=self.mcp_url, workspace_client=self._workspace_client)
print("MCP client connected", file=sys.stderr)
self._connected = True
@property
def is_connected(self) -> bool:
return self._connected
def discover_tools(self) -> List[ToolInfo]:
"""Discover available tools from the remote MCP server."""
if not self._mcp_client:
raise RuntimeError("Not connected. Call connect() first.")
print("Discovering remote tools...", file=sys.stderr)
with ThreadPoolExecutor() as executor:
remote_tools = executor.submit(self._mcp_client.list_tools).result()
self._tools = [
ToolInfo(
name=t.name,
description=t.description or "",
input_schema=t.inputSchema if hasattr(t, 'inputSchema') else {"type": "object", "properties": {}}
)
for t in remote_tools
]
print(f"Discovered {len(self._tools)} tools", file=sys.stderr)
for tool in self._tools:
print(f" - {tool.name}", file=sys.stderr)
return self._tools
@property
def tools(self) -> List[ToolInfo]:
return self._tools
def call_tool(self, name: str, arguments: dict) -> Any:
"""Call a tool on the remote MCP server."""
if not self._mcp_client:
raise RuntimeError("Not connected. Call connect() first.")
with ThreadPoolExecutor() as executor:
return executor.submit(self._mcp_client.call_tool, name, arguments or {}).result()
'''
SERVER_PY = '''"""MCP Server for Databricks Proxy."""
import os
import sys
from typing import Optional
from mcp.server.fastmcp import FastMCP
from .auth import start_oauth_flow, DEFAULT_SCOPES
from .client import DatabricksMCPProxy
class AppState:
"""Global application state."""
proxy: Optional[DatabricksMCPProxy] = None
host: Optional[str] = None
app_url: Optional[str] = None
scopes: str = DEFAULT_SCOPES
authenticated: bool = False
state = AppState()
# Create MCP server
mcp = FastMCP("databricks-mcp-proxy")
@mcp.tool()
def authenticate() -> str:
"""
Authenticate with Databricks using OAuth U2M flow.
Opens a browser for authorization.
Uses DATABRICKS_HOST and DATABRICKS_APP_URL from app.yaml or environment.
"""
try:
host = state.host or os.environ.get("DATABRICKS_HOST")
app_url = state.app_url or os.environ.get("DATABRICKS_APP_URL")
scopes = state.scopes or os.environ.get("DATABRICKS_SCOPES", DEFAULT_SCOPES)
if not host:
return "Error: DATABRICKS_HOST not configured. Set it in app.yaml or environment."
if not app_url:
return "Error: DATABRICKS_APP_URL not configured. Set it in app.yaml or environment."
print(f"Starting OAuth flow for {host}...", file=sys.stderr)
access_token = start_oauth_flow(host, scopes)
state.proxy = DatabricksMCPProxy(host, app_url, access_token)
state.proxy.connect()
state.proxy.discover_tools()
state.authenticated = True
tool_names = [t.name for t in state.proxy.tools]
return f"Authenticated successfully!\\n\\nAvailable tools ({len(tool_names)}):\\n" + "\\n".join(f" - {name}" for name in tool_names)
except Exception as e:
state.authenticated = False
return f"Authentication failed: {e}"
@mcp.tool()
def list_databricks_tools() -> str:
"""
List all available tools on the remote Databricks MCP server.
Must authenticate first.
"""
if not state.authenticated or not state.proxy:
return "Not authenticated. Call 'authenticate' first."
if not state.proxy.tools:
return "No tools available."
lines = [f"Available tools ({len(state.proxy.tools)}):\\n"]
for tool in state.proxy.tools:
lines.append(f"**{tool.name}**")
lines.append(f" {tool.description}")
if tool.input_schema.get("properties"):
lines.append(f" Parameters: {list(tool.input_schema['properties'].keys())}")
lines.append("")
return "\\n".join(lines)
@mcp.tool()
def call_databricks_tool(tool_name: str, arguments: dict = {}) -> str:
"""
Call a tool on the remote Databricks MCP server.
Args:
tool_name: Name of the tool to call (use list_databricks_tools to see available tools)
arguments: Arguments to pass to the tool
"""
if not state.authenticated or not state.proxy:
return "Not authenticated. Call 'authenticate' first."
try:
result = state.proxy.call_tool(tool_name, arguments)
if hasattr(result, 'content') and result.content:
texts = [c.text for c in result.content if hasattr(c, 'text')]
if texts:
return "\\n".join(texts)
return str(result)
except Exception as e:
return f"Error calling tool '{tool_name}': {e}"
'''
MAIN_PY = '''#!/usr/bin/env python3
"""Databricks MCP Proxy - Entry Point"""
import os
import sys
from pathlib import Path
import yaml
from .server import mcp, state
from .auth import DEFAULT_SCOPES
def load_app_yaml(path: str = "app.yaml") -> dict:
"""Load configuration from app.yaml (Databricks App format)."""
app_yaml = Path(path)
if not app_yaml.exists():
return {}
with open(app_yaml) as f:
config = yaml.safe_load(f) or {}
# Parse env list into dict
env = {}
for item in config.get("env", []):
name = item.get("name")
if not name:
continue
if "value" in item:
env[name] = item["value"]
elif "valueFrom" in item:
# Secrets are injected as env vars by Databricks
env[name] = os.environ.get(name, "")
return {"command": config.get("command"), "env": env}
def main():
"""Main entry point."""
# Try to load app.yaml from current directory or parent directories
config = {}
for path in [Path("app.yaml"), Path("../app.yaml"), Path.cwd() / "app.yaml"]:
if path.exists():
config = load_app_yaml(str(path))
print(f"Loaded config from {path}", file=sys.stderr)
break
env = config.get("env", {})
# Set state from config (priority: app.yaml > env vars)
state.host = env.get("DATABRICKS_HOST") or os.environ.get("DATABRICKS_HOST")
state.app_url = env.get("DATABRICKS_APP_URL") or os.environ.get("DATABRICKS_APP_URL")
state.scopes = env.get("DATABRICKS_SCOPES") or os.environ.get("DATABRICKS_SCOPES", DEFAULT_SCOPES)
print("=" * 50, file=sys.stderr)
print("Databricks MCP Proxy", file=sys.stderr)
print("=" * 50, file=sys.stderr)
if state.host:
print(f"Host: {state.host}", file=sys.stderr)
if state.app_url:
print(f"App URL: {state.app_url}", file=sys.stderr)
print("", file=sys.stderr)
print("Tools available:", file=sys.stderr)
print(" - authenticate: Start OAuth flow (opens browser)", file=sys.stderr)
print(" - list_databricks_tools: List remote tools", file=sys.stderr)
print(" - call_databricks_tool: Call a remote tool", file=sys.stderr)
print("=" * 50, file=sys.stderr)
# Run MCP server (stdio transport)
mcp.run()
if __name__ == "__main__":
main()
'''
APP_YAML = '''# Databricks MCP Proxy Configuration
command: ["uv", "run", "databricks-mcp-proxy"]
env:
- name: DATABRICKS_HOST
value: "https://dbc-XXXXX.cloud.databricks.com" # UPDATE THIS
- name: DATABRICKS_APP_URL
value: "https://your-mcp-app.databricksapps.com" # UPDATE THIS
- name: DATABRICKS_SCOPES
value: "all-apis offline_access"
'''
README = '''# Databricks MCP Proxy
An MCP proxy server that authenticates with Databricks and exposes remote MCP tools.
## Installation
```bash
uv sync
```
## Configuration
Edit `app.yaml` with your Databricks settings:
```yaml
env:
- name: DATABRICKS_HOST
value: "https://dbc-XXXXX.cloud.databricks.com"
- name: DATABRICKS_APP_URL
value: "https://your-mcp-app.databricksapps.com"
```
## Usage
### Test locally
```bash
uv run databricks-mcp-proxy
```
### Claude Desktop Configuration
Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
```json
{
"mcpServers": {
"databricks": {
"command": "uv",
"args": [
"--directory", "/path/to/databricks-mcp-proxy",
"run",
"databricks-mcp-proxy"
]
}
}
}
```
## Tools
| Tool | Description |
|------|-------------|
| `authenticate` | Start OAuth flow (opens browser) |
| `list_databricks_tools` | List available remote tools |
| `call_databricks_tool` | Call a remote tool by name |
## Flow
1. Claude starts the proxy via stdio
2. Call `authenticate` tool
3. Browser opens for Databricks OAuth
4. After auth, remote tools are discovered
5. Use `call_databricks_tool` to invoke any remote tool
'''
GITIGNORE = '''__pycache__/
*.py[cod]
*.egg-info/
dist/
.venv/
.uv/
uv.lock
.DS_Store
'''
def main():
parser = argparse.ArgumentParser(description="Setup Databricks MCP Proxy")
parser.add_argument("--output-dir", default="./databricks-mcp-proxy")
args = parser.parse_args()
output_dir = Path(args.output_dir)
src_dir = output_dir / "src" / "databricks_mcp_proxy"
src_dir.mkdir(parents=True, exist_ok=True)
files = {
"pyproject.toml": PYPROJECT_TOML,
"README.md": README,
".gitignore": GITIGNORE,
"app.yaml": APP_YAML,
}
src_files = {
"__init__.py": INIT_PY,
"auth.py": AUTH_PY,
"client.py": CLIENT_PY,
"server.py": SERVER_PY,
"main.py": MAIN_PY,
}
print(f"Creating Databricks MCP Proxy in: {output_dir.absolute()}\n")
for name, content in files.items():
(output_dir / name).write_text(content)
print(f" Created {name}")
for name, content in src_files.items():
(src_dir / name).write_text(content)
print(f" Created src/databricks_mcp_proxy/{name}")
print(f"\n{'=' * 50}")
print("Setup complete!")
print(f"{'=' * 50}")
print(f"\nNext steps:")
print(f" 1. cd {output_dir}")
print(f" 2. Edit app.yaml with your Databricks settings")
print(f" 3. uv sync")
print(f" 4. uv run databricks-mcp-proxy")
print(f"\nClaude Desktop config:")
print(f'''
{{
"mcpServers": {{
"databricks": {{
"command": "uv",
"args": [
"--directory", "{output_dir.absolute()}",
"run",
"databricks-mcp-proxy"
]
}}
}}
}}
''')
if __name__ == "__main__":
main()