rlm_setup_ollama_direct
Install Ollama directly on macOS without Homebrew or sudo, enabling local inference for massive context processing. Supports downloading, starting the service, and pulling models like gemma3:12b for headless setups.
Instructions
Install Ollama via direct download (macOS).
Downloads from ollama.com to ~/Applications. PROS: No Homebrew needed, no sudo required, fully headless, works on locked-down machines. CONS: Manual PATH setup, no auto-updates, service runs as foreground process.
Args: install: Download and install Ollama to ~/Applications (no sudo needed) start_service: Start Ollama server (ollama serve) in background pull_model: Pull the default model (gemma3:12b) model: Model to pull (default: gemma3:12b). Use gemma3:4b or gemma3:1b for lower RAM systems.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| install | No | ||
| start_service | No | ||
| pull_model | No | ||
| model | No | gemma3:12b |
Output Schema
| Name | Required | Description | Default |
|---|---|---|---|
No arguments | |||
Implementation Reference
- src/rlm_mcp_server.py:454-681 (handler)The async function _setup_ollama_direct() which performs the actual Ollama direct download setup: downloads from ollama.com, extracts to ~/Applications, starts the service, and pulls the model. This is the core implementation called by the tool handler.
async def _setup_ollama_direct( install: bool = False, start_service: bool = False, pull_model: bool = False, model: str = "gemma3:12b", ) -> dict: """Setup Ollama via direct download - no Homebrew, no sudo, fully headless.""" import shutil result = { "method": "direct_download", "actions_taken": [], "actions_skipped": [], "errors": [], "warnings": [], "success": True, } # Check basic system requirements (macOS, Apple Silicon, RAM) sys_check = _check_system_requirements() result["system_check"] = { "is_macos": sys_check["is_macos"], "is_apple_silicon": sys_check["is_apple_silicon"], "ram_gb": sys_check["ram_gb"], "ram_sufficient": sys_check["ram_sufficient"], } if not sys_check["is_macos"]: result["errors"].append("Direct download setup only supported on macOS") result["success"] = False return result # Define paths home = Path.home() install_dir = home / "Applications" app_path = install_dir / "Ollama.app" cli_path = app_path / "Contents" / "Resources" / "ollama" # Install Ollama via direct download if install: if app_path.exists(): result["actions_skipped"].append(f"Ollama already installed at {app_path}") else: try: # Create ~/Applications if needed install_dir.mkdir(parents=True, exist_ok=True) # Download URL download_url = "https://ollama.com/download/Ollama-darwin.zip" zip_path = Path("/tmp/Ollama-darwin.zip") extract_dir = Path("/tmp/ollama-extract") result["actions_taken"].append(f"Downloading from {download_url}...") # Download using curl (available on all macOS) download_proc = subprocess.run( ["curl", "-L", "-o", str(zip_path), download_url], capture_output=True, text=True, timeout=600, # 10 minute timeout for download ) if download_proc.returncode != 0: result["errors"].append(f"Download failed: {download_proc.stderr}") result["success"] = False return result result["actions_taken"].append("Download complete") # Clean up any previous extraction if extract_dir.exists(): shutil.rmtree(extract_dir) extract_dir.mkdir(parents=True, exist_ok=True) # Extract result["actions_taken"].append("Extracting...") extract_proc = subprocess.run( ["unzip", "-q", str(zip_path), "-d", str(extract_dir)], capture_output=True, text=True, timeout=120, ) if extract_proc.returncode != 0: result["errors"].append(f"Extraction failed: {extract_proc.stderr}") result["success"] = False return result # Move to ~/Applications extracted_app = extract_dir / "Ollama.app" if not extracted_app.exists(): # Try to find it for item in extract_dir.iterdir(): if item.name == "Ollama.app" or item.suffix == ".app": extracted_app = item break if extracted_app.exists(): shutil.move(str(extracted_app), str(app_path)) result["actions_taken"].append(f"Installed to {app_path}") else: result["errors"].append("Could not find Ollama.app in extracted contents") result["success"] = False return result # Clean up zip_path.unlink(missing_ok=True) shutil.rmtree(extract_dir, ignore_errors=True) # Note about PATH result["path_setup"] = { "cli_path": str(cli_path), "add_to_path": f'export PATH="{cli_path.parent}:$PATH"', "shell_config": "Add the above line to ~/.zshrc or ~/.bashrc", } except subprocess.TimeoutExpired: result["errors"].append("Download timed out (10 min limit)") result["success"] = False except Exception as e: result["errors"].append(f"Installation error: {e}") result["success"] = False # Start Ollama service if start_service and result["success"]: # Check if CLI exists effective_cli = None if cli_path.exists(): effective_cli = cli_path else: # Check if ollama is in PATH which_proc = subprocess.run(["which", "ollama"], capture_output=True, text=True) if which_proc.returncode == 0: effective_cli = Path(which_proc.stdout.strip()) if not effective_cli: result["errors"].append( f"Ollama CLI not found. Expected at {cli_path} or in PATH. You may need to add it to your PATH first." ) result["success"] = False else: # Check if already running status = await _check_ollama_status(force_refresh=True) if status.get("running"): result["actions_skipped"].append("Ollama service already running") else: try: # Start ollama serve in background # Using nohup to detach from terminal subprocess.Popen( ["nohup", str(effective_cli), "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True, ) result["actions_taken"].append("Started Ollama service (ollama serve)") # Wait for service to be ready await asyncio.sleep(3) # Verify it started status = await _check_ollama_status(force_refresh=True) if status.get("running"): result["actions_taken"].append("Service is running") else: result["warnings"].append( "Service may still be starting. Check with rlm_ollama_status in a few seconds." ) except Exception as e: result["errors"].append(f"Failed to start service: {e}") # Pull model if pull_model and result["success"]: # Check RAM before pulling large model if model == "gemma3:12b" and not sys_check["ram_sufficient"]: result["errors"].append( f"Insufficient RAM ({sys_check['ram_gb']}GB) for {model}. " f"Need {MIN_RAM_GB}GB+. Consider: gemma3:4b or gemma3:1b" ) result["success"] = False else: # Find CLI effective_cli = None if cli_path.exists(): effective_cli = cli_path else: which_proc = subprocess.run(["which", "ollama"], capture_output=True, text=True) if which_proc.returncode == 0: effective_cli = Path(which_proc.stdout.strip()) if not effective_cli: result["errors"].append("Ollama CLI not found. Cannot pull model.") result["success"] = False else: # Check if model already exists status = await _check_ollama_status(force_refresh=True) model_base = model.split(":")[0] already_pulled = any(m.startswith(model_base) for m in status.get("models", [])) if already_pulled: result["actions_skipped"].append(f"Model {model} already available") else: try: result["actions_taken"].append(f"Pulling model {model} (this may take several minutes)...") pull_proc = subprocess.run( [str(effective_cli), "pull", model], capture_output=True, text=True, timeout=1800, # 30 minute timeout ) if pull_proc.returncode == 0: result["actions_taken"].append(f"Successfully pulled {model}") else: result["errors"].append(f"Failed to pull {model}: {pull_proc.stderr}") result["success"] = False except subprocess.TimeoutExpired: result["errors"].append("Model pull timed out (30 min limit)") result["success"] = False except Exception as e: result["errors"].append(f"Pull error: {e}") result["success"] = False # Final status check if result["success"]: final_status = await _check_ollama_status(force_refresh=True) result["ollama_status"] = final_status return result - src/rlm_mcp_server.py:1279-1336 (registration)The FastMCP tool registration via @mcp.tool() decorator on async function rlm_setup_ollama_direct(). This is the public-facing tool definition with docstring, parameter schema, and calls _setup_ollama_direct() for the actual logic.
@mcp.tool() async def rlm_setup_ollama_direct( install: bool = False, start_service: bool = False, pull_model: bool = False, model: str = "gemma3:12b", ) -> dict: """Install Ollama via direct download (macOS). Downloads from ollama.com to ~/Applications. PROS: No Homebrew needed, no sudo required, fully headless, works on locked-down machines. CONS: Manual PATH setup, no auto-updates, service runs as foreground process. Args: install: Download and install Ollama to ~/Applications (no sudo needed) start_service: Start Ollama server (ollama serve) in background pull_model: Pull the default model (gemma3:12b) model: Model to pull (default: gemma3:12b). Use gemma3:4b or gemma3:1b for lower RAM systems. """ # If no actions specified, show comparison if not any([install, start_service, pull_model]): return { "message": "No actions specified. Use install=true, start_service=true, or pull_model=true.", "method": "direct_download", "advantages": [ "No Homebrew required", "No sudo/admin permissions needed", "Fully headless automation", "Works on locked-down/managed machines", ], "disadvantages": [ "Manual PATH setup needed (CLI at ~/Applications/Ollama.app/Contents/Resources/ollama)", "No automatic updates", "Service runs via 'ollama serve' (not a managed launchd service)", ], "example": "rlm_setup_ollama_direct(install=true, start_service=true, pull_model=true)", "alternative": "Use rlm_setup_ollama for Homebrew-based installation if you have Homebrew", } result = await _setup_ollama_direct( install=install, start_service=start_service, pull_model=pull_model, model=model, ) # Add summary if result["success"]: result["summary"] = ( f"Setup complete (direct download)! Actions: {', '.join(result['actions_taken']) or 'none'}. " f"Skipped: {', '.join(result['actions_skipped']) or 'none'}." ) if result.get("path_setup"): result["summary"] += f" NOTE: Add to PATH: {result['path_setup']['add_to_path']}" else: result["summary"] = f"Setup failed: {'; '.join(result['errors'])}" return result - src/rlm_mcp_server.py:330-451 (helper)The _check_system_requirements() helper function called by _setup_ollama_direct to verify platform (macOS), Apple Silicon, RAM sufficiency, and Homebrew status before proceeding with installation.
def _check_system_requirements() -> dict: """Check if the system meets requirements for running Ollama with gemma3:12b.""" import platform result = { "platform": platform.system(), "machine": platform.machine(), "is_macos": False, "is_apple_silicon": False, "ram_gb": 0, "ram_sufficient": False, "homebrew_installed": False, "ollama_installed": False, "meets_requirements": False, "issues": [], "recommendations": [], } # Check macOS if platform.system() == "Darwin": result["is_macos"] = True else: result["issues"].append(f"Not macOS (detected: {platform.system()})") result["recommendations"].append("Ollama auto-setup is only supported on macOS") # Check Apple Silicon (M1, M2, M3, M4) machine = platform.machine() if machine == "arm64": result["is_apple_silicon"] = True # Try to get specific chip info try: chip_info = subprocess.run( ["sysctl", "-n", "machdep.cpu.brand_string"], capture_output=True, text=True, timeout=5, ) if chip_info.returncode == 0: result["chip"] = chip_info.stdout.strip() except Exception: result["chip"] = "Apple Silicon (arm64)" else: result["issues"].append(f"Not Apple Silicon (detected: {machine})") result["recommendations"].append("Apple Silicon (M1/M2/M3/M4) recommended for optimal Ollama performance") # Check RAM try: if platform.system() == "Darwin": mem_info = subprocess.run( ["sysctl", "-n", "hw.memsize"], capture_output=True, text=True, timeout=5, ) if mem_info.returncode == 0: ram_bytes = int(mem_info.stdout.strip()) ram_gb = ram_bytes / (1024**3) result["ram_gb"] = round(ram_gb, 1) result["ram_sufficient"] = ram_gb >= MIN_RAM_GB if not result["ram_sufficient"]: result["issues"].append( f"Insufficient RAM: {result['ram_gb']}GB (need {MIN_RAM_GB}GB+ for gemma3:12b)" ) result["recommendations"].append( f"gemma3:12b requires ~{GEMMA3_12B_RAM_GB}GB RAM. " f"With {result['ram_gb']}GB total, consider using a smaller model." ) except Exception as e: result["issues"].append(f"Could not determine RAM: {e}") # Check Homebrew try: brew_check = subprocess.run( ["which", "brew"], capture_output=True, text=True, timeout=5, ) result["homebrew_installed"] = brew_check.returncode == 0 if result["homebrew_installed"]: result["homebrew_path"] = brew_check.stdout.strip() else: result["issues"].append("Homebrew not installed") result["recommendations"].append( 'Install Homebrew first: /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"' ) except Exception: result["issues"].append("Could not check for Homebrew") # Check if Ollama is already installed try: ollama_check = subprocess.run( ["which", "ollama"], capture_output=True, text=True, timeout=5, ) result["ollama_installed"] = ollama_check.returncode == 0 if result["ollama_installed"]: result["ollama_path"] = ollama_check.stdout.strip() # Get version try: version_check = subprocess.run( ["ollama", "--version"], capture_output=True, text=True, timeout=5, ) if version_check.returncode == 0: result["ollama_version"] = version_check.stdout.strip() except Exception: pass except Exception: pass # Determine if all requirements are met result["meets_requirements"] = ( result["is_macos"] and result["is_apple_silicon"] and result["ram_sufficient"] and result["homebrew_installed"] ) return result - src/rlm_mcp_server.py:818-925 (helper)The _check_ollama_status() helper function called by _setup_ollama_direct to check if Ollama is running and if models are available, used during start_service and pull_model phases.
async def _check_ollama_status(force_refresh: bool = False) -> dict: """Check Ollama server status and available models. Cached with TTL.""" import time cache = _ollama_status_cache now = time.time() # Return cached result if still valid if not force_refresh and cache["checked_at"] is not None: if now - cache["checked_at"] < cache["ttl_seconds"]: return { "running": cache["running"], "models": cache["models"], "default_model_available": cache["default_model_available"], "cached": True, "checked_at": cache["checked_at"], } # Check Ollama status if not HAS_HTTPX: cache.update( { "checked_at": now, "running": False, "models": [], "default_model_available": False, } ) return { "running": False, "error": "httpx not installed", "models": [], "default_model_available": False, "cached": False, } ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434") try: async with httpx.AsyncClient(timeout=5.0) as client: # Check if Ollama is running response = await client.get(f"{ollama_url}/api/tags") response.raise_for_status() data = response.json() models = [m.get("name", "") for m in data.get("models", [])] # Check if default model is available default_model = DEFAULT_MODELS["ollama"] # Handle model name variations (gemma3:12b vs gemma3:12b-instruct-q4_0) default_available = any(m.startswith(default_model.split(":")[0]) for m in models) cache.update( { "checked_at": now, "running": True, "models": models, "default_model_available": default_available, } ) return { "running": True, "url": ollama_url, "models": models, "model_count": len(models), "default_model": default_model, "default_model_available": default_available, "cached": False, "checked_at": now, } except httpx.ConnectError: cache.update( { "checked_at": now, "running": False, "models": [], "default_model_available": False, } ) return { "running": False, "url": ollama_url, "error": "connection_refused", "message": "Ollama server not running. Start with: ollama serve", "models": [], "default_model_available": False, "cached": False, } except Exception as e: cache.update( { "checked_at": now, "running": False, "models": [], "default_model_available": False, } ) return { "running": False, "url": ollama_url, "error": "check_failed", "message": str(e), "models": [], "default_model_available": False, "cached": False, }