Skip to main content
Glama
docker-compose.yaml9.15 kB
# Docker Compose configuration for testing BERDL Datalake MCP Server # This docker-compose is for developer convenience, not for running in production. # # INSTRUCTIONS: # ============= # # OPTION A: USING PRE-BUILT IMAGES (RECOMMENDED FOR MOST USERS) # -------------------------------------------------------------- # If you want to use pre-built images from the registry: # 1. Uncomment the 'image:' and 'platform:' lines in each service # 2. Comment out the 'build:' sections # 3. Run: docker compose up -d # # No build steps required! # # OPTION B: BUILDING FROM LOCAL SOURCE CODE (FOR DEVELOPERS) # ----------------------------------------------------------- # If you're developing and want to build from local source: # # 1. CLONE REQUIRED REPOSITORIES: # You need to clone the following repos from https://github.com/BERDataLakehouse # at the same directory level as this datalake-mcp-server directory: # # git clone https://github.com/BERDataLakehouse/spark_notebook_base.git # git clone https://github.com/BERDataLakehouse/kube_spark_manager_image.git # # Your directory structure should look like: # parent/ # ├── datalake-mcp-server/ (this directory) # ├── spark_notebook_base/ # └── kube_spark_manager_image/ # # 2. BUILD REQUIRED BASE IMAGES FIRST: # cd ../spark_notebook_base && \ # docker build -t spark_notebook_base:local . # # 3. ENSURE BUILD SECTIONS ARE UNCOMMENTED: # The docker-compose.yaml should have 'build:' sections uncommented # and 'image:' lines commented out for services you want to build locally. # # 4. RUN DOCKER COMPOSE: # docker compose up -d --build # # NOTE: Base images must be rebuilt when Dockerfiles or dependencies change. # # AUTHENTICATION SETUP (REQUIRED FOR BOTH OPTIONS): # -------------------------------------------------- # - Update KBASE_AUTH_TOKEN with your KBase CI auth token # - Update NB_USER with your KBase CI username # # DEFAULT CREDENTIALS: # -------------------- # - MinIO: minio/minio123 # - PostgreSQL: hive/hivepassword # # ARCHITECTURE NOTES: # ------------------- # This stack includes: # - Spark standalone cluster (master + 2 workers) # - MinIO S3-compatible storage # - PostgreSQL (Hive Metastore backend) # - Redis (distributed caching) # - Datalake MCP Server (main service under test) services: datalake-mcp-server: # image: ghcr.io/berdatalakehouse/datalake-mcp-server:main # platform: linux/amd64 build: context: . dockerfile: Dockerfile args: BASE_TAG: local BASE_REGISTRY: "" ports: - "8000:8000" # MCP server API environment: # SPARK CONFIGURATION - SPARK_CONNECT_URL_TEMPLATE=sc://spark-master:15002 - SPARK_MASTER_URL=spark://spark-master:7077 - BERDL_HIVE_METASTORE_URI=thrift://hive-metastore:9083 # MINIO CONFIGURATION - MINIO_ENDPOINT_URL=minio:9002 # AUTHENTICATION CONFIGURATION - KBASE_AUTH_URL=https://ci.kbase.us/services/auth/ - KBASE_ADMIN_ROLES=CDM_JUPYTERHUB_ADMIN - KBASE_REQUIRED_ROLES=BERDL_USER # POSTGRESQL CONFIGURATION (read-only user) - POSTGRES_URL=postgres:5432 - POSTGRES_DB=hive - POSTGRES_USER=readonly_user - POSTGRES_PASSWORD=readonly_password # REDIS CONFIGURATION - REDIS_HOST=redis - REDIS_PORT=6379 # DATA LAKE CONFIGURATION - DELTALAKE_WAREHOUSE_DIR=s3a://cdm-lake/users-sql-warehouse volumes: # Mount the shared /home directory to access user credentials # This allows the MCP server to dynamically read any user's MinIO credentials # from /home/{username}/.berdl_minio_credentials - users_home:/home:ro depends_on: - spark-master - hive-metastore - minio - postgres - redis spark-master: # image: ghcr.io/berdatalakehouse/kube_spark_manager_image:main # platform: linux/amd64 build: context: ../kube_spark_manager_image dockerfile: Dockerfile args: BASE_TAG: local BASE_REGISTRY: "" ports: - "8090:8080" # Spark Master Web UI - "7077:7077" # Spark Master - "15002:15002" # Spark Connect server environment: - SPARK_MODE=master - SPARK_MASTER_HOST=0.0.0.0 - SPARK_MASTER_PORT=7077 - SPARK_MASTER_WEBUI_PORT=8080 - BERDL_REDIS_HOST=redis - BERDL_REDIS_PORT=6379 - BERDL_DELTALAKE_WAREHOUSE_DIRECTORY_PATH=s3a://cdm-lake/users-sql-warehouse - BERDL_HIVE_METASTORE_URI=thrift://hive-metastore:9083 depends_on: - hive-metastore - minio - redis spark-worker-1: # image: ghcr.io/berdatalakehouse/kube_spark_manager_image:main # platform: linux/amd64 build: context: ../kube_spark_manager_image dockerfile: Dockerfile args: BASE_TAG: local BASE_REGISTRY: "" ports: - "8081:8081" # Spark Worker Web UI environment: - SPARK_MODE=worker - SPARK_MASTER_URL=spark://spark-master:7077 - SPARK_WORKER_CORES=10 - SPARK_WORKER_MEMORY=2g - SPARK_WORKER_PORT=8081 - SPARK_WORKER_WEBUI_PORT=8081 - BERDL_REDIS_HOST=redis - BERDL_REDIS_PORT=6379 - BERDL_DELTALAKE_WAREHOUSE_DIRECTORY_PATH=s3a://cdm-lake/users-sql-warehouse - BERDL_HIVE_METASTORE_URI=thrift://hive-metastore:9083 depends_on: - spark-master spark-worker-2: # image: ghcr.io/berdatalakehouse/kube_spark_manager_image:main # platform: linux/amd64 build: context: ../kube_spark_manager_image dockerfile: Dockerfile args: BASE_TAG: local BASE_REGISTRY: "" ports: - "8082:8082" # Spark Worker Web UI environment: - SPARK_MODE=worker - SPARK_MASTER_URL=spark://spark-master:7077 - SPARK_WORKER_CORES=10 - SPARK_WORKER_MEMORY=2g - SPARK_WORKER_PORT=8082 - SPARK_WORKER_WEBUI_PORT=8082 - BERDL_REDIS_HOST=redis - BERDL_REDIS_PORT=6379 - BERDL_DELTALAKE_WAREHOUSE_DIRECTORY_PATH=s3a://cdm-lake/users-sql-warehouse - BERDL_HIVE_METASTORE_URI=thrift://hive-metastore:9083 depends_on: - spark-master hive-metastore: # image: ghcr.io/berdatalakehouse/hive_metastore:main # platform: linux/amd64 build: context: ../hive_metastore dockerfile: Dockerfile args: # IMPORTANT: Spark 4.0.0 is only officially compatible with Hive Metastore 4.0.0 # Using newer Hive versions (4.1.0+) causes Thrift protocol incompatibilities HIVE_IMAGE_TAG: "4.0.0" ports: - "9083:9083" # Hive Metastore Thrift environment: # PostgreSQL database configuration - POSTGRES_DB=hive - POSTGRES_USER=hive - POSTGRES_PASSWORD=hivepassword - POSTGRES_HOST=postgres - POSTGRES_PORT=5432 # MinIO S3 configuration - S3_ENDPOINT=http://minio:9002 - S3_ACCESS_KEY=minio - S3_SECRET_KEY=minio123 # Delta Lake warehouse directory - DELTALAKE_WAREHOUSE_DIR=s3a://cdm-lake/users-sql-warehouse depends_on: - postgres - minio healthcheck: test: ["CMD", "nc", "-z", "localhost", "9083"] interval: 30s timeout: 10s retries: 3 postgres: image: postgres:16.3 ports: - "5432:5432" environment: - POSTGRES_USER=hive - POSTGRES_PASSWORD=hivepassword - POSTGRES_DB=hive volumes: - postgres_data:/var/lib/postgresql/data - ./scripts/init-postgres-readonly.sh:/docker-entrypoint-initdb.d/init-postgres-readonly.sh:ro minio: image: minio/minio:RELEASE.2025-04-22T22-12-26Z-cpuv1 ports: - "9002:9002" # MinIO API - "9003:9003" # MinIO Console environment: MINIO_ROOT_USER: minio MINIO_ROOT_PASSWORD: minio123 healthcheck: test: timeout 5s bash -c ':> /dev/tcp/127.0.0.1/9002' || exit 1 interval: 1s timeout: 10s retries: 5 command: server --address 0.0.0.0:9002 --console-address 0.0.0.0:9003 /data volumes: - minio_data:/data minio-create-bucket: image: minio/mc depends_on: minio: condition: service_healthy entrypoint: > /bin/sh -c " echo 'Configuring MinIO...'; mc alias set local http://minio:9002 minio minio123; echo 'Creating buckets...'; mc mb --ignore-existing local/cdm-lake; mc mb --ignore-existing local/cdm-spark-job-logs; echo 'Creating service accounts...'; mc admin user svcacct add --access-key minio-readonly --secret-key minio123 local minio || true; mc admin user svcacct add --access-key minio-readwrite --secret-key minio123 local minio || true; mc admin user svcacct add --access-key minio-log-access --secret-key minio123 local minio || true; echo 'MinIO configuration complete.'; " redis: image: redis:7-alpine ports: - "6379:6379" command: redis-server --appendonly yes volumes: - redis_data:/data volumes: postgres_data: minio_data: redis_data: users_home: # Shared volume for all user home directories

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/BERDataLakehouse/datalake-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server