Farnsworth

Overview Schema Related Servers Score Discussions

Farnsworth
configs

models.yaml•20.4 KiB

# Farnsworth Model Configurations # Updated January 2025 with latest efficient models models: # ============================================ # TIER 1: Ultra-Efficient (< 2GB VRAM/RAM) # Best for: Edge devices, CPU-only, speed # ============================================ qwen3-0.6b: name: "Qwen3-0.6B" description: "Ultra-lightweight with 100+ language support" backend: "ollama" ollama_name: "qwen3:0.6b" llama_cpp_repo: "Qwen/Qwen3-0.6B-GGUF" llama_cpp_file: "qwen3-0.6b-q4_k_m.gguf" params: 0.6e9 vram_gb: 1.0 ram_gb: 2.0 quantization: "Q4_K_M" context_length: 32768 strengths: - "multilingual" - "lightweight" - "fast" - "edge" license: "Apache-2.0" recommended_for: - "lightweight" - "multilingual" - "edge" release_date: "2025-05" tinyllama-1.1b: name: "TinyLlama-1.1B" description: "Fastest inference, optimized for edge devices" backend: "ollama" ollama_name: "tinyllama:1.1b" llama_cpp_repo: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" llama_cpp_file: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" params: 1.1e9 vram_gb: 0.8 ram_gb: 2.0 quantization: "Q4_K_M" context_length: 2048 strengths: - "speed" - "edge" - "mobile" - "fast" license: "Apache-2.0" recommended_for: - "speed" - "edge" - "mobile" release_date: "2024-01" bitnet-2b: name: "Microsoft BitNet b1.58-2B-4T" description: "Native 1-bit quantization, 5-7x faster on CPU" backend: "bitnet" bitnet_repo: "microsoft/BitNet-b1.58-2B-4T" params: 2.0e9 vram_gb: 0.5 ram_gb: 1.0 quantization: "1-bit" context_length: 4096 strengths: - "speed" - "efficiency" - "cpu-optimized" - "low-power" license: "MIT" recommended_for: - "speed" - "cpu-only" - "low-power" release_date: "2024-10" # ============================================ # TIER 2: Compact Quality (2-4GB VRAM/RAM) # Best for: General use, good quality/speed balance # ============================================ deepseek-r1-1.5b: name: "DeepSeek-R1-Distill-Qwen-1.5B" description: "o1-style reasoning in 1.5B params - best small reasoning model" backend: "ollama" ollama_name: "deepseek-r1:1.5b" llama_cpp_repo: "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF" llama_cpp_file: "DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf" params: 1.5e9 vram_gb: 2.0 ram_gb: 4.0 quantization: "Q4_K_M" context_length: 32768 strengths: - "reasoning" - "math" - "code" - "chain-of-thought" license: "MIT" recommended_for: - "default" - "reasoning" - "code" release_date: "2025-01" smollm2-1.7b: name: "SmolLM2-1.7B" description: "State-of-the-art compact model, beats Qwen2.5-1.5B" backend: "ollama" ollama_name: "smollm2:1.7b" llama_cpp_repo: "HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF" llama_cpp_file: "smollm2-1.7b-instruct-q4_k_m.gguf" params: 1.7e9 vram_gb: 1.5 ram_gb: 3.0 quantization: "Q4_K_M" context_length: 8192 strengths: - "instruction-following" - "quality" - "on-device" license: "Apache-2.0" recommended_for: - "quality" - "on-device" release_date: "2024-11" gemma-1b: name: "Gemma-1B" description: "Google's efficient small model" backend: "ollama" ollama_name: "gemma:1b" llama_cpp_repo: "google/gemma-1.1-2b-it-GGUF" llama_cpp_file: "gemma-1.1-2b-it.Q4_K_M.gguf" params: 1.0e9 vram_gb: 1.0 ram_gb: 2.0 quantization: "Q4_K_M" context_length: 8192 strengths: - "instruction-following" - "efficient" license: "Gemma" recommended_for: - "general" - "efficient" release_date: "2024-02" qwen3-4b: name: "Qwen3-4B" description: "Best for 4GB budget - MMLU-Pro 74%, excellent reasoning" backend: "ollama" ollama_name: "qwen3:4b" llama_cpp_repo: "Qwen/Qwen3-4B-GGUF" llama_cpp_file: "qwen3-4b-q4_k_m.gguf" params: 4.0e9 vram_gb: 2.75 ram_gb: 5.0 quantization: "Q4_K_M" context_length: 32768 strengths: - "reasoning" - "multilingual" - "code" - "math" license: "Apache-2.0" recommended_for: - "balanced" - "4gb-budget" release_date: "2025-07" # ============================================ # TIER 3: High Quality (4-8GB VRAM/RAM) # Best for: Complex tasks, best quality # ============================================ phi-4-mini: name: "Phi-4-mini" description: "GPT-3.5 class, beats 7B/8B in math - 128K context" backend: "ollama" ollama_name: "phi4:mini" llama_cpp_repo: "microsoft/Phi-4-mini-instruct-gguf" llama_cpp_file: "Phi-4-mini-instruct-Q4_K_M.gguf" params: 3.8e9 vram_gb: 3.0 ram_gb: 6.0 quantization: "Q4_K_M" context_length: 128000 strengths: - "reasoning" - "math" - "code" - "instruction-following" - "long-context" license: "MIT" recommended_for: - "quality" - "reasoning" - "math" release_date: "2025-05" phi-4-mini-reasoning: name: "Phi-4-mini-reasoning" description: "Rivals o1-mini on Math-500 and GPQA Diamond" backend: "ollama" ollama_name: "phi4:mini-reasoning" llama_cpp_repo: "microsoft/Phi-4-mini-reasoning-gguf" llama_cpp_file: "Phi-4-mini-reasoning-Q4_K_M.gguf" params: 3.8e9 vram_gb: 3.0 ram_gb: 6.0 quantization: "Q4_K_M" context_length: 128000 strengths: - "reasoning" - "math" - "proofs" - "chain-of-thought" license: "MIT" recommended_for: - "math" - "reasoning" - "proofs" release_date: "2025-05" deepseek-r1-7b: name: "DeepSeek-R1-Distill-Qwen-7B" description: "Strong reasoning, distilled from R1" backend: "ollama" ollama_name: "deepseek-r1:7b" llama_cpp_repo: "unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF" llama_cpp_file: "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf" params: 7.0e9 vram_gb: 5.0 ram_gb: 8.0 quantization: "Q4_K_M" context_length: 32768 strengths: - "reasoning" - "math" - "code" - "chain-of-thought" license: "MIT" recommended_for: - "reasoning" - "quality" release_date: "2025-01" # Multimodal capability gemma-3n-e2b: name: "Gemma-3n-E2B" description: "Multimodal: text, image, audio, video" backend: "ollama" ollama_name: "gemma3:2b" llama_cpp_repo: "google/gemma-3n-E2B-it-GGUF" llama_cpp_file: "gemma-3n-E2B-it-Q4_K_M.gguf" params: 5.0e9 effective_params: 2.0e9 vram_gb: 2.0 ram_gb: 4.0 quantization: "Q4_K_M" context_length: 8192 multimodal: true modalities: - "text" - "image" - "audio" - "video" strengths: - "multimodal" - "vision" - "audio" license: "Apache-2.0" recommended_for: - "multimodal" - "vision" release_date: "2024-12" phi-4-multimodal: name: "Phi-4-multimodal" description: "Microsoft multimodal with vision and speech" backend: "ollama" ollama_name: "phi4:multimodal" llama_cpp_repo: "microsoft/Phi-4-multimodal-instruct-gguf" llama_cpp_file: "Phi-4-multimodal-instruct-Q4_K_M.gguf" params: 5.6e9 vram_gb: 4.0 ram_gb: 8.0 quantization: "Q4_K_M" context_length: 128000 multimodal: true modalities: - "text" - "image" - "audio" strengths: - "multimodal" - "vision" - "speech" - "reasoning" license: "MIT" recommended_for: - "multimodal" - "vision" release_date: "2025-05" # ============================================ # TIER 4: Cloud API Models (Requires API Key) # Best for: Complex coding, agentic workflows # ============================================ minimax-m2: name: "MiniMax-M2" description: "SOTA coding model - 230B MoE, 10B active params, optimized for SWE tasks" backend: "openai_compatible" api_base: "https://api.deepinfra.com/v1/openai" api_model: "MiniMaxAI/MiniMax-M2" params: 230e9 active_params: 10e9 context_length: 128000 interleaved_thinking: true strengths: - "code" - "agentic" - "multi-file-editing" - "debugging" - "swe-bench" - "tool-calling" license: "MiniMax" recommended_for: - "code" - "agentic" - "complex-coding" env_key: "DEEPINFRA_API_KEY" release_date: "2025-04" notes: "Interleaved thinking model - preserve <think>...</think> tags in history" minimax-m2-1: name: "MiniMax-M2.1" description: "Enhanced M2 with multi-language programming and office automation" backend: "openai_compatible" api_base: "https://api.deepinfra.com/v1/openai" api_model: "MiniMaxAI/MiniMax-M2.1" params: 230e9 active_params: 10e9 context_length: 128000 interleaved_thinking: true strengths: - "code" - "agentic" - "multi-language-code" - "office-automation" - "real-world-tasks" license: "MiniMax" recommended_for: - "code" - "real-world-dev" env_key: "DEEPINFRA_API_KEY" release_date: "2025-06" # ============================================ # TIER 5: Vision/OCR Models (Document Processing) # ============================================ deepseek-ocr2: name: "DeepSeek-VL2" description: "Advanced vision-language model for OCR and document parsing" backend: "openai_compatible" api_base: "https://api.deepinfra.com/v1/openai" api_model: "deepseek-ai/deepseek-vl2" params: 27e9 context_length: 32768 multimodal: true modalities: - "text" - "image" capabilities: - "ocr" - "document_parsing" - "nutrition_labels" - "lab_results" - "prescription_reading" strengths: - "ocr" - "vision" - "document-understanding" - "table-extraction" - "handwriting" license: "DeepSeek" recommended_for: - "document_parsing" - "ocr" - "health_documents" env_key: "DEEPINFRA_API_KEY" release_date: "2024-12" notes: "Primary model for health document OCR - lab results, prescriptions, nutrition labels" qwen-vl-max: name: "Qwen-VL-Max" description: "Alibaba's multimodal model for vision tasks" backend: "openai_compatible" api_base: "https://dashscope.aliyuncs.com/compatible-mode/v1" api_model: "qwen-vl-max" params: 72e9 context_length: 32768 multimodal: true modalities: - "text" - "image" capabilities: - "ocr" - "visual_qa" - "document_analysis" strengths: - "vision" - "ocr" - "chinese" - "document-understanding" license: "Tongyi Qianwen" recommended_for: - "vision" - "multilingual_ocr" env_key: "DASHSCOPE_API_KEY" release_date: "2024-08" # Embedding models embeddings: all-minilm-l6-v2: name: "all-MiniLM-L6-v2" provider: "sentence-transformers" dimensions: 384 max_sequence_length: 256 ram_mb: 100 recommended_for: - "default" - "speed" bge-small-en: name: "BAAI/bge-small-en-v1.5" provider: "sentence-transformers" dimensions: 384 max_sequence_length: 512 ram_mb: 150 recommended_for: - "accuracy" nomic-embed-text: name: "nomic-embed-text" provider: "ollama" dimensions: 768 max_sequence_length: 8192 ram_mb: 300 recommended_for: - "long-context" gte-small: name: "thenlper/gte-small" provider: "sentence-transformers" dimensions: 384 max_sequence_length: 512 ram_mb: 100 recommended_for: - "retrieval" # Hardware profiles for automatic model selection hardware_profiles: # MAX EVERYTHING - For users with massive hardware max: description: "MAX MODE: Unlimited resources, best quality, all features" models: - "minimax-m2" - "phi-4-mini-reasoning" - "deepseek-r1-7b" - "phi-4-mini" - "phi-4-multimodal" - "qwen3-4b" - "deepseek-r1-1.5b" embedding: "nomic-embed-text" swarm_enabled: true swarm_strategy: "parallel_vote" max_parallel: 8 enable_cloud: true enable_multimodal: true enable_speculation: true context_window: 128000 notes: "Uses ALL available models including cloud APIs for maximum quality" # Under 4GB total RAM minimal: description: "Minimal hardware (< 4GB RAM)" models: - "tinyllama-1.1b" - "qwen3-0.6b" embedding: "all-minilm-l6-v2" swarm_enabled: false # CPU only, 8GB+ RAM cpu_only: description: "CPU-only with 8GB+ RAM" models: - "bitnet-2b" - "qwen3-0.6b" - "tinyllama-1.1b" - "smollm2-1.7b" embedding: "all-minilm-l6-v2" swarm_enabled: true swarm_strategy: "fastest_first" # 2-4 GB VRAM low_vram: description: "Low VRAM (2-4GB)" models: - "deepseek-r1-1.5b" - "smollm2-1.7b" - "qwen3-0.6b" - "gemma-3n-e2b" embedding: "all-minilm-l6-v2" swarm_enabled: true swarm_strategy: "mixture_of_experts" # 4-8 GB VRAM medium_vram: description: "Medium VRAM (4-8GB)" models: - "phi-4-mini" - "deepseek-r1-1.5b" - "qwen3-4b" - "smollm2-1.7b" - "gemma-3n-e2b" embedding: "bge-small-en" swarm_enabled: true swarm_strategy: "pso_collaborative" # 8+ GB VRAM high_vram: description: "High VRAM (8GB+)" models: - "phi-4-mini-reasoning" - "deepseek-r1-7b" - "phi-4-mini" - "qwen3-4b" - "gemma-3n-e2b" embedding: "nomic-embed-text" swarm_enabled: true swarm_strategy: "parallel_vote" # Task-based model selection (Mixture of Experts routing) task_routing: code: description: "Programming and debugging" primary: "deepseek-r1-1.5b" # Local first - saves tokens secondary: "phi-4-mini" fallback: "qwen3-4b" # Cloud escalation only for complex multi-file tasks escalation_model: "minimax-m2" escalation_threshold: 0.5 # Low confidence triggers cloud swarm_models: # All local for swarm operations - "deepseek-r1-1.5b" - "phi-4-mini" - "qwen3-4b" notes: "Local models for swarm/memory, MiniMax M2 only for complex escalation" reasoning: description: "Logic and analysis" primary: "phi-4-mini-reasoning" secondary: "deepseek-r1-1.5b" fallback: "qwen3-4b" swarm_models: - "phi-4-mini-reasoning" - "deepseek-r1-1.5b" - "phi-4-mini" math: description: "Mathematical computation" primary: "phi-4-mini-reasoning" secondary: "deepseek-r1-1.5b" fallback: "qwen3-4b" swarm_models: - "phi-4-mini-reasoning" - "deepseek-r1-1.5b" creative: description: "Writing and brainstorming" primary: "qwen3-4b" secondary: "smollm2-1.7b" fallback: "qwen3-0.6b" swarm_models: - "qwen3-4b" - "smollm2-1.7b" research: description: "Information gathering" primary: "phi-4-mini" secondary: "deepseek-r1-1.5b" fallback: "smollm2-1.7b" multilingual: description: "Non-English languages" primary: "qwen3-4b" secondary: "qwen3-0.6b" fallback: "gemma-3n-e2b" multimodal: description: "Vision, audio, video" primary: "phi-4-multimodal" secondary: "gemma-3n-e2b" fallback: null speed: description: "Low-latency responses" primary: "bitnet-2b" secondary: "tinyllama-1.1b" fallback: "qwen3-0.6b" # Model swarm configurations # ALL SWARMS USE LOCAL MODELS ONLY - saves tokens/context swarm: # Default swarm for general use default: description: "Balanced local swarm - saves tokens" strategy: "mixture_of_experts" use_local_only: true models: - id: "deepseek-r1-1.5b" role: "reasoning" weight: 1.0 - id: "phi-4-mini" role: "quality" weight: 0.8 - id: "qwen3-0.6b" role: "speed" weight: 0.5 - id: "smollm2-1.7b" role: "generalist" weight: 0.7 enable_verification: true max_parallel: 3 # Cloud escalation for low-confidence results cloud_escalation: enabled: true threshold: 0.4 model: "minimax-m2" # Memory/context operations - ultra lightweight memory: description: "For memory retrieval, context building, summarization" strategy: "fastest_first" use_local_only: true models: - id: "qwen3-0.6b" role: "summarize" weight: 1.0 - id: "tinyllama-1.1b" role: "speed" weight: 0.9 enable_verification: false max_parallel: 2 notes: "Tiny models for memory ops - preserves context window" # Speed-optimized swarm fast: description: "Speed-optimized with fast escalation" strategy: "fastest_first" use_local_only: true models: - id: "bitnet-2b" role: "speed" weight: 1.0 - id: "tinyllama-1.1b" role: "speed" weight: 0.9 - id: "qwen3-0.6b" role: "speed" weight: 0.8 - id: "deepseek-r1-1.5b" role: "reasoning" weight: 0.6 enable_verification: false max_parallel: 2 # Quality-optimized swarm quality: description: "Quality-first with verification" strategy: "parallel_vote" models: - id: "phi-4-mini-reasoning" role: "reasoning" weight: 1.0 - id: "deepseek-r1-7b" role: "reasoning" weight: 0.9 - id: "phi-4-mini" role: "quality" weight: 0.8 - id: "qwen3-4b" role: "generalist" weight: 0.7 enable_verification: true max_parallel: 4 # Reasoning-focused swarm reasoning: description: "Chain-of-thought reasoning specialists" strategy: "speculative_ensemble" models: - id: "phi-4-mini-reasoning" role: "reasoning" weight: 1.0 - id: "deepseek-r1-1.5b" role: "reasoning" weight: 0.9 - id: "qwen3-0.6b" role: "speed" weight: 0.5 enable_verification: true max_parallel: 2 # PSO collaborative swarm collaborative: description: "PSO-optimized collaborative inference" strategy: "pso_collaborative" pso_config: inertia: 0.7 cognitive: 1.5 social: 1.5 models: - id: "deepseek-r1-1.5b" role: "reasoning" - id: "phi-4-mini" role: "quality" - id: "smollm2-1.7b" role: "generalist" - id: "qwen3-0.6b" role: "speed" - id: "tinyllama-1.1b" role: "speed" enable_verification: true max_parallel: 5 # MAX EVERYTHING - Ultimate swarm for massive hardware max: description: "MAXIMUM POWER - All models, parallel voting, cloud + local" strategy: "parallel_vote" use_local_only: false enable_cloud: true models: - id: "minimax-m2" role: "coding" weight: 1.0 cloud: true - id: "phi-4-mini-reasoning" role: "reasoning" weight: 1.0 - id: "deepseek-r1-7b" role: "reasoning" weight: 0.95 - id: "phi-4-mini" role: "quality" weight: 0.9 - id: "qwen3-4b" role: "generalist" weight: 0.85 - id: "deepseek-r1-1.5b" role: "speed-reasoning" weight: 0.8 - id: "phi-4-multimodal" role: "multimodal" weight: 0.8 enable_verification: true verification_model: "phi-4-mini-reasoning" max_parallel: 8 voting_threshold: 0.6 context_window: 128000 notes: "Run ALL models in parallel, vote on best response, verify with reasoning model" # Speculative decoding pairs (draft + target) speculative_decoding: pairs: - draft: "bitnet-2b" target: "phi-4-mini" speedup: 2.5 description: "BitNet drafts for Phi-4" - draft: "tinyllama-1.1b" target: "deepseek-r1-1.5b" speedup: 1.8 description: "TinyLlama drafts for DeepSeek" - draft: "qwen3-0.6b" target: "qwen3-4b" speedup: 2.0 description: "Qwen family speculative" - draft: "smollm2-1.7b" target: "phi-4-mini-reasoning" speedup: 1.5 description: "SmolLM drafts for reasoning" # Benchmark reference (for model selection decisions) benchmarks: # Scores are approximate, from various sources phi-4-mini-reasoning: math-500: 94.5 gpqa-diamond: 68.0 mmlu-pro: 72.0 deepseek-r1-1.5b: math-500: 83.9 gpqa-diamond: 33.8 aime-2024: 28.3 phi-4-mini: mmlu: 84.8 humaneval: 82.0 math: 80.4 qwen3-4b: mmlu-pro: 74.0 gpqa-diamond: 67.0 livecode: 64.0

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/timowhite88/Farnsworth'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

models.yaml•20.4 KiB