Skip to main content
Glama
orneryd

M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

by orneryd
build-llama-cpp-qwen-vl.sh6.67 kB
#!/bin/bash # Build and tag llama.cpp server for ARM64 with Qwen3-VL models set -e # Configuration MODEL_SIZE="${1:-7b}" # Default to 7b, can be 2b, 7b, or 72b IMAGE_NAME="timothyswt/llama-cpp-server-arm64-qwen2.5-vl-${MODEL_SIZE}" VERSION="${2:-latest}" DOCKER_USERNAME="${DOCKER_USERNAME:-timothyswt}" echo "🔨 Building llama.cpp server for ARM64 with Qwen2.5-VL-${MODEL_SIZE}..." echo "Image: $IMAGE_NAME:${MODEL_SIZE}" echo "" # Target directory for model TARGET_DIR="docker/llama-cpp/models" TARGET_PATH="$TARGET_DIR/qwen2.5-vl-${MODEL_SIZE}.gguf" # Create models directory if it doesn't exist mkdir -p "$TARGET_DIR" echo "📥 Checking for Qwen2.5-VL-${MODEL_SIZE} GGUF model..." echo "" # Check if model already exists if [ -f "$TARGET_PATH" ]; then echo "✅ Model already exists: $TARGET_PATH" MODEL_SIZE_ACTUAL=$(du -h "$TARGET_PATH" | cut -f1) echo " Size: $MODEL_SIZE_ACTUAL" DOWNLOAD_SUCCESS=true else echo "Model not found, attempting download..." echo "" DOWNLOAD_SUCCESS=false # Try to download from HuggingFace echo "Attempting to download Qwen2.5-VL-${MODEL_SIZE} from HuggingFace..." case "$MODEL_SIZE" in 2b) HUGGINGFACE_URL="https://huggingface.co/Qwen/Qwen2.5-VL-2B-Instruct-GGUF/resolve/main/qwen2.5-vl-2b-instruct-q4_k_m.gguf" ;; 7b) HUGGINGFACE_URL="https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct-GGUF/resolve/main/qwen2.5-vl-7b-instruct-q4_k_m.gguf" ;; 72b) HUGGINGFACE_URL="https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct-GGUF/resolve/main/qwen2.5-vl-72b-instruct-q4_k_m.gguf" ;; *) echo "❌ Invalid model size: $MODEL_SIZE" echo " Valid options: 2b, 7b, 72b" exit 1 ;; esac if curl -L -f -# -o "$TARGET_PATH" "$HUGGINGFACE_URL" 2>/dev/null; then DOWNLOAD_SUCCESS=true echo "✅ Downloaded from HuggingFace" else echo "⚠️ HuggingFace download failed" fi # If download failed, provide manual instructions if [ "$DOWNLOAD_SUCCESS" = false ]; then echo "" echo "❌ Automatic download failed. Manual download required." echo "" echo "📋 Please manually download the Qwen2.5-VL-${MODEL_SIZE} GGUF file:" echo "" echo "Option 1 - Download from HuggingFace:" case "$MODEL_SIZE" in 2b) echo " Visit: https://huggingface.co/Qwen/Qwen2.5-VL-2B-Instruct-GGUF" echo " File: qwen2.5-vl-2b-instruct-q4_k_m.gguf (~1.5 GB)" ;; 7b) echo " Visit: https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct-GGUF" echo " File: qwen2.5-vl-7b-instruct-q4_k_m.gguf (~4.5 GB)" ;; 72b) echo " Visit: https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct-GGUF" echo " File: qwen2.5-vl-72b-instruct-q4_k_m.gguf (~45 GB)" ;; esac echo " Save to: $TARGET_PATH" echo "" echo "Option 2 - Use curl with direct link:" echo " curl -L -o '$TARGET_PATH' '$HUGGINGFACE_URL'" echo "" echo "After downloading, run this script again." exit 1 fi fi # Verify file exists if [ ! -f "$TARGET_PATH" ]; then echo "❌ Model file not found: $TARGET_PATH" exit 1 fi echo "" echo "✅ Model file ready: $TARGET_PATH" MODEL_SIZE_ACTUAL=$(du -h "$TARGET_PATH" | cut -f1) echo " Size: $MODEL_SIZE_ACTUAL" echo "" # Detect context size based on model case "$MODEL_SIZE" in 2b) CTX_SIZE="32768" # 32K ;; 7b|72b) CTX_SIZE="131072" # 128K ;; esac echo "🔍 Model configuration:" echo " Model: Qwen2.5-VL-${MODEL_SIZE}" echo " Context: ${CTX_SIZE} tokens" echo "" # Build the Docker image echo "🏗️ Building Docker image..." echo "" MODEL_FILE="qwen2.5-vl-${MODEL_SIZE}.gguf" VISION_FILE="qwen2.5-vl-${MODEL_SIZE}-vision.gguf" docker build \ --platform linux/arm64 \ --build-arg MODEL_FILE="$MODEL_FILE" \ --build-arg VISION_FILE="$VISION_FILE" \ -t "$IMAGE_NAME:latest" \ -t "$IMAGE_NAME:$VERSION" \ -f docker/Dockerfile.qwen-vl \ . echo "" echo "✅ Build complete!" echo "" echo "🔍 Image details:" docker images | grep "llama-cpp-server-arm64-qwen2.5-vl-${MODEL_SIZE}" echo "" echo "🧪 Testing the image locally..." echo " Starting container on port 8080..." # Start container for testing CONTAINER_ID=$(docker run -d -p 8080:8080 "$IMAGE_NAME:latest") echo " Container ID: $CONTAINER_ID" echo " Waiting for server to start..." # Wait for health check sleep 15 # Test health endpoint if curl -f http://localhost:8080/health > /dev/null 2>&1; then echo " ✅ Health check passed" # Test chat endpoint with vision echo " Testing vision capabilities..." TEST_RESULT=$(curl -s -X POST http://localhost:8080/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{"model": "qwen3-vl", "messages": [{"role": "user", "content": "Hello"}]}' || echo "failed") if [ "$TEST_RESULT" != "failed" ] && [ -n "$TEST_RESULT" ]; then echo " ✅ Chat test passed" else echo " ⚠️ Chat test failed (this may be normal during startup)" fi else echo " ⚠️ Health check failed (server may need more time)" fi # Stop test container echo " Stopping test container..." docker stop "$CONTAINER_ID" > /dev/null 2>&1 docker rm "$CONTAINER_ID" > /dev/null 2>&1 echo "" echo "📦 Ready to push to Docker Hub..." echo " (Make sure you're logged in: docker login)" echo "" # Ask for confirmation read -p "Push to Docker Hub? (y/N) " -n 1 -r echo if [[ $REPLY =~ ^[Yy]$ ]]; then docker push "$IMAGE_NAME:latest" docker push "$IMAGE_NAME:$VERSION" echo "✅ Published to Docker Hub!" else echo "⏭️ Skipped push. To push manually:" echo " docker push $IMAGE_NAME:latest" echo " docker push $IMAGE_NAME:$VERSION" fi # Clean up downloaded model echo "" echo "🧹 Cleaning up..." read -p "Remove temporary model file? (y/N) " -n 1 -r echo if [[ $REPLY =~ ^[Yy]$ ]]; then rm -f "$TARGET_PATH" echo "✅ Removed temporary model copy" else echo "⏭️ Keeping model file at: $TARGET_PATH" fi echo "" echo "🎉 Done! To use this image in docker-compose.yml:" echo " llama-vl-server:" echo " image: $IMAGE_NAME:latest" echo " # ... rest of config ..." echo "" echo " And update environment variables:" echo " - MIMIR_EMBEDDINGS_VL_MODEL=qwen2.5-vl" echo ""

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server