rlm_setup_ollama_direct

Install Ollama directly on macOS without Homebrew or sudo, enabling local inference for massive context processing. Supports downloading, starting the service, and pulling models like gemma3:12b for headless setups.

Instructions

Install Ollama via direct download (macOS).

Downloads from ollama.com to ~/Applications. PROS: No Homebrew needed, no sudo required, fully headless, works on locked-down machines. CONS: Manual PATH setup, no auto-updates, service runs as foreground process.

Args: install: Download and install Ollama to ~/Applications (no sudo needed) start_service: Start Ollama server (ollama serve) in background pull_model: Pull the default model (gemma3:12b) model: Model to pull (default: gemma3:12b). Use gemma3:4b or gemma3:1b for lower RAM systems.

Input Schema

TableJSON Schema

Name	Required	Default
`install`	No
`start_service`	No
`pull_model`	No
`model`	No	gemma3:12b

Output Schema

TableJSON Schema

Name	Required	Description	Default
No arguments

Implementation Reference

src/rlm_mcp_server.py:454-681 (handler)

The async function _setup_ollama_direct() which performs the actual Ollama direct download setup: downloads from ollama.com, extracts to ~/Applications, starts the service, and pulls the model. This is the core implementation called by the tool handler.

async def _setup_ollama_direct(
    install: bool = False,
    start_service: bool = False,
    pull_model: bool = False,
    model: str = "gemma3:12b",
) -> dict:
    """Setup Ollama via direct download - no Homebrew, no sudo, fully headless."""
    import shutil

    result = {
        "method": "direct_download",
        "actions_taken": [],
        "actions_skipped": [],
        "errors": [],
        "warnings": [],
        "success": True,
    }

    # Check basic system requirements (macOS, Apple Silicon, RAM)
    sys_check = _check_system_requirements()
    result["system_check"] = {
        "is_macos": sys_check["is_macos"],
        "is_apple_silicon": sys_check["is_apple_silicon"],
        "ram_gb": sys_check["ram_gb"],
        "ram_sufficient": sys_check["ram_sufficient"],
    }

    if not sys_check["is_macos"]:
        result["errors"].append("Direct download setup only supported on macOS")
        result["success"] = False
        return result

    # Define paths
    home = Path.home()
    install_dir = home / "Applications"
    app_path = install_dir / "Ollama.app"
    cli_path = app_path / "Contents" / "Resources" / "ollama"

    # Install Ollama via direct download
    if install:
        if app_path.exists():
            result["actions_skipped"].append(f"Ollama already installed at {app_path}")
        else:
            try:
                # Create ~/Applications if needed
                install_dir.mkdir(parents=True, exist_ok=True)

                # Download URL
                download_url = "https://ollama.com/download/Ollama-darwin.zip"
                zip_path = Path("/tmp/Ollama-darwin.zip")
                extract_dir = Path("/tmp/ollama-extract")

                result["actions_taken"].append(f"Downloading from {download_url}...")

                # Download using curl (available on all macOS)
                download_proc = subprocess.run(
                    ["curl", "-L", "-o", str(zip_path), download_url],
                    capture_output=True,
                    text=True,
                    timeout=600,  # 10 minute timeout for download
                )

                if download_proc.returncode != 0:
                    result["errors"].append(f"Download failed: {download_proc.stderr}")
                    result["success"] = False
                    return result

                result["actions_taken"].append("Download complete")

                # Clean up any previous extraction
                if extract_dir.exists():
                    shutil.rmtree(extract_dir)
                extract_dir.mkdir(parents=True, exist_ok=True)

                # Extract
                result["actions_taken"].append("Extracting...")
                extract_proc = subprocess.run(
                    ["unzip", "-q", str(zip_path), "-d", str(extract_dir)],
                    capture_output=True,
                    text=True,
                    timeout=120,
                )

                if extract_proc.returncode != 0:
                    result["errors"].append(f"Extraction failed: {extract_proc.stderr}")
                    result["success"] = False
                    return result

                # Move to ~/Applications
                extracted_app = extract_dir / "Ollama.app"
                if not extracted_app.exists():
                    # Try to find it
                    for item in extract_dir.iterdir():
                        if item.name == "Ollama.app" or item.suffix == ".app":
                            extracted_app = item
                            break

                if extracted_app.exists():
                    shutil.move(str(extracted_app), str(app_path))
                    result["actions_taken"].append(f"Installed to {app_path}")
                else:
                    result["errors"].append("Could not find Ollama.app in extracted contents")
                    result["success"] = False
                    return result

                # Clean up
                zip_path.unlink(missing_ok=True)
                shutil.rmtree(extract_dir, ignore_errors=True)

                # Note about PATH
                result["path_setup"] = {
                    "cli_path": str(cli_path),
                    "add_to_path": f'export PATH="{cli_path.parent}:$PATH"',
                    "shell_config": "Add the above line to ~/.zshrc or ~/.bashrc",
                }

            except subprocess.TimeoutExpired:
                result["errors"].append("Download timed out (10 min limit)")
                result["success"] = False
            except Exception as e:
                result["errors"].append(f"Installation error: {e}")
                result["success"] = False

    # Start Ollama service
    if start_service and result["success"]:
        # Check if CLI exists
        effective_cli = None
        if cli_path.exists():
            effective_cli = cli_path
        else:
            # Check if ollama is in PATH
            which_proc = subprocess.run(["which", "ollama"], capture_output=True, text=True)
            if which_proc.returncode == 0:
                effective_cli = Path(which_proc.stdout.strip())

        if not effective_cli:
            result["errors"].append(
                f"Ollama CLI not found. Expected at {cli_path} or in PATH. You may need to add it to your PATH first."
            )
            result["success"] = False
        else:
            # Check if already running
            status = await _check_ollama_status(force_refresh=True)
            if status.get("running"):
                result["actions_skipped"].append("Ollama service already running")
            else:
                try:
                    # Start ollama serve in background
                    # Using nohup to detach from terminal
                    subprocess.Popen(
                        ["nohup", str(effective_cli), "serve"],
                        stdout=subprocess.DEVNULL,
                        stderr=subprocess.DEVNULL,
                        start_new_session=True,
                    )
                    result["actions_taken"].append("Started Ollama service (ollama serve)")

                    # Wait for service to be ready
                    await asyncio.sleep(3)

                    # Verify it started
                    status = await _check_ollama_status(force_refresh=True)
                    if status.get("running"):
                        result["actions_taken"].append("Service is running")
                    else:
                        result["warnings"].append(
                            "Service may still be starting. Check with rlm_ollama_status in a few seconds."
                        )
                except Exception as e:
                    result["errors"].append(f"Failed to start service: {e}")

    # Pull model
    if pull_model and result["success"]:
        # Check RAM before pulling large model
        if model == "gemma3:12b" and not sys_check["ram_sufficient"]:
            result["errors"].append(
                f"Insufficient RAM ({sys_check['ram_gb']}GB) for {model}. "
                f"Need {MIN_RAM_GB}GB+. Consider: gemma3:4b or gemma3:1b"
            )
            result["success"] = False
        else:
            # Find CLI
            effective_cli = None
            if cli_path.exists():
                effective_cli = cli_path
            else:
                which_proc = subprocess.run(["which", "ollama"], capture_output=True, text=True)
                if which_proc.returncode == 0:
                    effective_cli = Path(which_proc.stdout.strip())

            if not effective_cli:
                result["errors"].append("Ollama CLI not found. Cannot pull model.")
                result["success"] = False
            else:
                # Check if model already exists
                status = await _check_ollama_status(force_refresh=True)
                model_base = model.split(":")[0]
                already_pulled = any(m.startswith(model_base) for m in status.get("models", []))

                if already_pulled:
                    result["actions_skipped"].append(f"Model {model} already available")
                else:
                    try:
                        result["actions_taken"].append(f"Pulling model {model} (this may take several minutes)...")
                        pull_proc = subprocess.run(
                            [str(effective_cli), "pull", model],
                            capture_output=True,
                            text=True,
                            timeout=1800,  # 30 minute timeout
                        )
                        if pull_proc.returncode == 0:
                            result["actions_taken"].append(f"Successfully pulled {model}")
                        else:
                            result["errors"].append(f"Failed to pull {model}: {pull_proc.stderr}")
                            result["success"] = False
                    except subprocess.TimeoutExpired:
                        result["errors"].append("Model pull timed out (30 min limit)")
                        result["success"] = False
                    except Exception as e:
                        result["errors"].append(f"Pull error: {e}")
                        result["success"] = False

    # Final status check
    if result["success"]:
        final_status = await _check_ollama_status(force_refresh=True)
        result["ollama_status"] = final_status

    return result

src/rlm_mcp_server.py:1279-1336 (registration)

The FastMCP tool registration via @mcp.tool() decorator on async function rlm_setup_ollama_direct(). This is the public-facing tool definition with docstring, parameter schema, and calls _setup_ollama_direct() for the actual logic.

@mcp.tool()
async def rlm_setup_ollama_direct(
    install: bool = False,
    start_service: bool = False,
    pull_model: bool = False,
    model: str = "gemma3:12b",
) -> dict:
    """Install Ollama via direct download (macOS).

    Downloads from ollama.com to ~/Applications.
    PROS: No Homebrew needed, no sudo required, fully headless, works on locked-down machines.
    CONS: Manual PATH setup, no auto-updates, service runs as foreground process.

    Args:
        install: Download and install Ollama to ~/Applications (no sudo needed)
        start_service: Start Ollama server (ollama serve) in background
        pull_model: Pull the default model (gemma3:12b)
        model: Model to pull (default: gemma3:12b). Use gemma3:4b or gemma3:1b for lower RAM systems.
    """
    # If no actions specified, show comparison
    if not any([install, start_service, pull_model]):
        return {
            "message": "No actions specified. Use install=true, start_service=true, or pull_model=true.",
            "method": "direct_download",
            "advantages": [
                "No Homebrew required",
                "No sudo/admin permissions needed",
                "Fully headless automation",
                "Works on locked-down/managed machines",
            ],
            "disadvantages": [
                "Manual PATH setup needed (CLI at ~/Applications/Ollama.app/Contents/Resources/ollama)",
                "No automatic updates",
                "Service runs via 'ollama serve' (not a managed launchd service)",
            ],
            "example": "rlm_setup_ollama_direct(install=true, start_service=true, pull_model=true)",
            "alternative": "Use rlm_setup_ollama for Homebrew-based installation if you have Homebrew",
        }

    result = await _setup_ollama_direct(
        install=install,
        start_service=start_service,
        pull_model=pull_model,
        model=model,
    )

    # Add summary
    if result["success"]:
        result["summary"] = (
            f"Setup complete (direct download)! Actions: {', '.join(result['actions_taken']) or 'none'}. "
            f"Skipped: {', '.join(result['actions_skipped']) or 'none'}."
        )
        if result.get("path_setup"):
            result["summary"] += f" NOTE: Add to PATH: {result['path_setup']['add_to_path']}"
    else:
        result["summary"] = f"Setup failed: {'; '.join(result['errors'])}"

    return result

src/rlm_mcp_server.py:330-451 (helper)

The _check_system_requirements() helper function called by _setup_ollama_direct to verify platform (macOS), Apple Silicon, RAM sufficiency, and Homebrew status before proceeding with installation.

def _check_system_requirements() -> dict:
    """Check if the system meets requirements for running Ollama with gemma3:12b."""
    import platform

    result = {
        "platform": platform.system(),
        "machine": platform.machine(),
        "is_macos": False,
        "is_apple_silicon": False,
        "ram_gb": 0,
        "ram_sufficient": False,
        "homebrew_installed": False,
        "ollama_installed": False,
        "meets_requirements": False,
        "issues": [],
        "recommendations": [],
    }

    # Check macOS
    if platform.system() == "Darwin":
        result["is_macos"] = True
    else:
        result["issues"].append(f"Not macOS (detected: {platform.system()})")
        result["recommendations"].append("Ollama auto-setup is only supported on macOS")

    # Check Apple Silicon (M1, M2, M3, M4)
    machine = platform.machine()
    if machine == "arm64":
        result["is_apple_silicon"] = True
        # Try to get specific chip info
        try:
            chip_info = subprocess.run(
                ["sysctl", "-n", "machdep.cpu.brand_string"],
                capture_output=True,
                text=True,
                timeout=5,
            )
            if chip_info.returncode == 0:
                result["chip"] = chip_info.stdout.strip()
        except Exception:
            result["chip"] = "Apple Silicon (arm64)"
    else:
        result["issues"].append(f"Not Apple Silicon (detected: {machine})")
        result["recommendations"].append("Apple Silicon (M1/M2/M3/M4) recommended for optimal Ollama performance")

    # Check RAM
    try:
        if platform.system() == "Darwin":
            mem_info = subprocess.run(
                ["sysctl", "-n", "hw.memsize"],
                capture_output=True,
                text=True,
                timeout=5,
            )
            if mem_info.returncode == 0:
                ram_bytes = int(mem_info.stdout.strip())
                ram_gb = ram_bytes / (1024**3)
                result["ram_gb"] = round(ram_gb, 1)
                result["ram_sufficient"] = ram_gb >= MIN_RAM_GB

                if not result["ram_sufficient"]:
                    result["issues"].append(
                        f"Insufficient RAM: {result['ram_gb']}GB (need {MIN_RAM_GB}GB+ for gemma3:12b)"
                    )
                    result["recommendations"].append(
                        f"gemma3:12b requires ~{GEMMA3_12B_RAM_GB}GB RAM. "
                        f"With {result['ram_gb']}GB total, consider using a smaller model."
                    )
    except Exception as e:
        result["issues"].append(f"Could not determine RAM: {e}")

    # Check Homebrew
    try:
        brew_check = subprocess.run(
            ["which", "brew"],
            capture_output=True,
            text=True,
            timeout=5,
        )
        result["homebrew_installed"] = brew_check.returncode == 0
        if result["homebrew_installed"]:
            result["homebrew_path"] = brew_check.stdout.strip()
        else:
            result["issues"].append("Homebrew not installed")
            result["recommendations"].append(
                'Install Homebrew first: /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'
            )
    except Exception:
        result["issues"].append("Could not check for Homebrew")

    # Check if Ollama is already installed
    try:
        ollama_check = subprocess.run(
            ["which", "ollama"],
            capture_output=True,
            text=True,
            timeout=5,
        )
        result["ollama_installed"] = ollama_check.returncode == 0
        if result["ollama_installed"]:
            result["ollama_path"] = ollama_check.stdout.strip()
            # Get version
            try:
                version_check = subprocess.run(
                    ["ollama", "--version"],
                    capture_output=True,
                    text=True,
                    timeout=5,
                )
                if version_check.returncode == 0:
                    result["ollama_version"] = version_check.stdout.strip()
            except Exception:
                pass
    except Exception:
        pass

    # Determine if all requirements are met
    result["meets_requirements"] = (
        result["is_macos"] and result["is_apple_silicon"] and result["ram_sufficient"] and result["homebrew_installed"]
    )

    return result

src/rlm_mcp_server.py:818-925 (helper)

The _check_ollama_status() helper function called by _setup_ollama_direct to check if Ollama is running and if models are available, used during start_service and pull_model phases.

async def _check_ollama_status(force_refresh: bool = False) -> dict:
    """Check Ollama server status and available models. Cached with TTL."""
    import time

    cache = _ollama_status_cache
    now = time.time()

    # Return cached result if still valid
    if not force_refresh and cache["checked_at"] is not None:
        if now - cache["checked_at"] < cache["ttl_seconds"]:
            return {
                "running": cache["running"],
                "models": cache["models"],
                "default_model_available": cache["default_model_available"],
                "cached": True,
                "checked_at": cache["checked_at"],
            }

    # Check Ollama status
    if not HAS_HTTPX:
        cache.update(
            {
                "checked_at": now,
                "running": False,
                "models": [],
                "default_model_available": False,
            }
        )
        return {
            "running": False,
            "error": "httpx not installed",
            "models": [],
            "default_model_available": False,
            "cached": False,
        }

    ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")

    try:
        async with httpx.AsyncClient(timeout=5.0) as client:
            # Check if Ollama is running
            response = await client.get(f"{ollama_url}/api/tags")
            response.raise_for_status()

            data = response.json()
            models = [m.get("name", "") for m in data.get("models", [])]

            # Check if default model is available
            default_model = DEFAULT_MODELS["ollama"]
            # Handle model name variations (gemma3:12b vs gemma3:12b-instruct-q4_0)
            default_available = any(m.startswith(default_model.split(":")[0]) for m in models)

            cache.update(
                {
                    "checked_at": now,
                    "running": True,
                    "models": models,
                    "default_model_available": default_available,
                }
            )

            return {
                "running": True,
                "url": ollama_url,
                "models": models,
                "model_count": len(models),
                "default_model": default_model,
                "default_model_available": default_available,
                "cached": False,
                "checked_at": now,
            }

    except httpx.ConnectError:
        cache.update(
            {
                "checked_at": now,
                "running": False,
                "models": [],
                "default_model_available": False,
            }
        )
        return {
            "running": False,
            "url": ollama_url,
            "error": "connection_refused",
            "message": "Ollama server not running. Start with: ollama serve",
            "models": [],
            "default_model_available": False,
            "cached": False,
        }
    except Exception as e:
        cache.update(
            {
                "checked_at": now,
                "running": False,
                "models": [],
                "default_model_available": False,
            }
        )
        return {
            "running": False,
            "url": ollama_url,
            "error": "check_failed",
            "message": str(e),
            "models": [],
            "default_model_available": False,
            "cached": False,
        }

Massive Context MCP

rlm_setup_ollama_direct

Instructions

Input Schema

Output Schema

Implementation Reference

Tool Definition Quality

Other Tools

Latest Blog Posts

MCP directory API