export_databricks_bundle
Export ThumbGate logs and proof artifacts as a Databricks-ready analytics bundle.
Instructions
Export ThumbGate logs and proof artifacts as a Databricks-ready analytics bundle
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| outputPath | No |
Implementation Reference
- Main handler function that exports ThumbGate feedback data (feedback events, memory records, sequences, attributions, and proof reports) into a Databricks-compatible analytics bundle. Reads JSONL data from feedback directories, annotates rows with bundle metadata, writes table JSONL files, generates a manifest.json, and builds a SQL template for loading into Databricks.
function exportDatabricksBundle(feedbackDir = getDefaultFeedbackDir(), outputPath, options = {}) { const resolvedFeedbackDir = path.resolve(feedbackDir || getDefaultFeedbackDir()); const resolvedProofDir = path.resolve(options.proofDir || DEFAULT_PROOF_DIR); const exportedAt = new Date().toISOString(); const bundlePath = path.resolve(outputPath || path.join( resolvedFeedbackDir, 'analytics', `databricks-${timestampSlug()}` )); const tablesDir = path.join(bundlePath, 'tables'); ensureDir(tablesDir); const datasets = [ { tableName: 'feedback_events', sourcePath: path.join(resolvedFeedbackDir, 'feedback-log.jsonl'), description: 'Raw ThumbGate feedback events from feedback-log.jsonl', }, { tableName: 'memory_records', sourcePath: path.join(resolvedFeedbackDir, 'memory-log.jsonl'), description: 'Promoted learning and mistake memories from memory-log.jsonl', }, { tableName: 'feedback_sequences', sourcePath: path.join(resolvedFeedbackDir, 'feedback-sequences.jsonl'), description: 'Sequence-model training rows derived from accepted feedback', }, { tableName: 'feedback_attributions', sourcePath: path.join(resolvedFeedbackDir, 'attributed-feedback.jsonl'), description: 'Tool-call attribution rows for negative feedback events', }, ]; const tables = datasets.map((dataset) => { const rows = annotateRows( readJSONL(dataset.sourcePath), dataset.tableName, path.basename(dataset.sourcePath), exportedAt, ); const fileName = `${dataset.tableName}.jsonl`; const relativePath = toBundleRelativePath('tables', fileName); writeJSONL(path.join(tablesDir, fileName), rows); return { tableName: dataset.tableName, relativePath, rowCount: rows.length, description: dataset.description, }; }); const proofRows = collectProofReports(resolvedProofDir, exportedAt); const proofRelativePath = toBundleRelativePath('tables', 'proof_reports.jsonl'); writeJSONL(path.join(tablesDir, 'proof_reports.jsonl'), proofRows); tables.push({ tableName: 'proof_reports', relativePath: proofRelativePath, rowCount: proofRows.length, description: 'Machine-readable proof artifacts discovered under proof/**/*.json', }); const manifest = { format: 'databricks-analytics-bundle', version: 1, exportedAt, bundlePath, feedbackDir: resolvedFeedbackDir, proofDir: resolvedProofDir, placeholders: { catalog: '__CATALOG__', schema: '__SCHEMA__', bundleRoot: '__BUNDLE_ROOT__', }, tables, }; const manifestPath = path.join(bundlePath, 'manifest.json'); const sqlTemplatePath = path.join(bundlePath, 'load_databricks.sql'); fs.writeFileSync(manifestPath, JSON.stringify(manifest, null, 2) + '\n'); fs.writeFileSync(sqlTemplatePath, buildSqlTemplate(manifest) + '\n'); return { bundlePath, manifestPath, sqlTemplatePath, tableCount: tables.length, totalRows: tables.reduce((sum, table) => sum + table.rowCount, 0), tables, }; } - Builds a SQL bootstrap template with CREATE TABLE statements using read_files() for each table in the manifest, with placeholders for catalog, schema, and bundle root.
function buildSqlTemplate(manifest) { const lines = [ '-- Databricks bootstrap for the exported analytics bundle.', '-- Replace __CATALOG__, __SCHEMA__, and __BUNDLE_ROOT__ before running.', '', 'CREATE SCHEMA IF NOT EXISTS __CATALOG__.__SCHEMA__;', '', ]; for (const table of manifest.tables) { lines.push(`CREATE OR REPLACE TABLE __CATALOG__.__SCHEMA__.${table.tableName} AS`); lines.push('SELECT *, _metadata.file_path AS source_file'); lines.push(`FROM read_files('__BUNDLE_ROOT__/${normalizeBundleRelativePath(table.relativePath)}', format => 'json');`); lines.push(''); } return lines.join('\n'); } - Collects proof reports by recursively walking JSON files under the proof directory and annotating them with bundle metadata.
function collectProofReports(proofDir, exportedAt) { return walkJsonFiles(proofDir) .map((filePath, index) => ({ bundleDataset: 'proof_reports', bundleRowNumber: index + 1, bundleExportedAt: exportedAt, reportId: path.basename(filePath, '.json'), reportCategory: path.basename(path.dirname(filePath)), reportPath: normalizeBundleRelativePath(path.relative(proofDir, filePath)), report: readJSON(filePath), })) .filter((row) => row.report); } - adapters/mcp/server-stdio.js:748-752 (registration)MCP tool registration case for 'export_databricks_bundle'. Calls exportDatabricksBundle with an optional outputPath, enforcing the 'export_databricks' rate limit.
case 'export_databricks_bundle': { enforceLimit('export_databricks'); const outputPath = args.outputPath ? resolveSafePath(args.outputPath) : undefined; return toTextResult(exportDatabricksBundle(undefined, outputPath)); } - scripts/agent-readiness.js:13-28 (schema)Registration of 'export_databricks_bundle' in the WRITE_CAPABLE_TOOLS set, marking it as a write-capable tool for agent readiness/permission tiering.
const WRITE_CAPABLE_TOOLS = new Set([ 'capture_feedback', 'bootstrap_internal_agent', 'prevention_rules', 'export_dpo_pairs', 'export_databricks_bundle', 'construct_context_pack', 'evaluate_context_pack', 'generate_skill', 'satisfy_gate', 'set_task_scope', 'approve_protected_action', 'track_action', 'register_claim_gate', 'run_autoresearch', ]);