folder-mcp

Overview Schema Related Servers Score Discussions

model-evaluator.ts•14 KiB

import { MachineCapabilities, GPUCapabilities } from './machine-capabilities.js'; import { readFileSync } from 'fs'; import { join } from 'path'; import { fileURLToPath } from 'url'; import { dirname } from 'path'; // Get current directory for ES modules const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); export interface CuratedModel { id: string; displayName: string; description: string; huggingfaceId?: string; modelName?: string; // For Ollama models dimensions: number; modelSizeMB: number; languagePerformance: Record<string, number>; mtebScore?: number; contextWindow?: number; requirements?: { prefixes?: { query?: string; passage?: string; }; cpu?: { minRAM?: number; recRAM?: number; expectedTokensPerSec?: number; optimalCpuFeatures?: string[]; notes?: string; }; gpu?: { minVRAM?: number; recommendedVRAM?: number; expectedTokensPerSec?: number; notes?: string; }; vram?: number; // For Ollama models installCommand?: string; // For Ollama models }; downloadInfo?: { url: string; architecture: string; }; quantization?: string; } export interface ModelCatalog { version: string; lastUpdated: string; dataSource: string; gpuModels: { description: string; provider: string; downloadMethod: string; models: CuratedModel[]; }; cpuModels: { description: string; provider: string; downloadMethod: string; baseUrl: string; models: CuratedModel[]; }; metadata: any; } export interface ModelCompatibilityScore { model: CuratedModel; score: number; reasons: string[]; hardwareCompatible: boolean; languageScore: number; recommendedUse: string; } export interface EvaluationCriteria { languages: string[]; mode: 'assisted' | 'manual'; prioritizeSpeed?: boolean; prioritizeAccuracy?: boolean; } export class ModelCompatibilityEvaluator { private catalog!: ModelCatalog; constructor() { // Load catalog synchronously using dynamic import this.loadCatalogSync(); } private loadCatalogSync(): void { try { const catalogPath = join(__dirname, '../../config/curated-models.json'); const jsonContent = readFileSync(catalogPath, 'utf-8'); this.catalog = JSON.parse(jsonContent) as ModelCatalog; } catch (error) { const errorMessage = `Failed to load curated-models.json: ${error instanceof Error ? error.message : String(error)}`; console.error(`[DAEMON] ${errorMessage}`); throw new Error(errorMessage); } } evaluateModelCompatibility( capabilities: MachineCapabilities, criteria: EvaluationCriteria ): ModelCompatibilityScore[] { const allModels = this.getAllAvailableModels(criteria.mode); const scores: ModelCompatibilityScore[] = []; for (const model of allModels) { const score = this.scoreModel(model, capabilities, criteria); scores.push(score); } // Sort by score (highest first) return scores.sort((a, b) => b.score - a.score); } private getAllAvailableModels(mode: 'assisted' | 'manual'): CuratedModel[] { const models: CuratedModel[] = []; // Always include GPU and CPU models from curated catalog models.push(...this.catalog.gpuModels.models); models.push(...this.catalog.cpuModels.models); // Ollama models are handled separately via runtime detection // They are not included in the curated catalog anymore return models; } scoreModel( model: CuratedModel, capabilities: MachineCapabilities, criteria: EvaluationCriteria ): ModelCompatibilityScore { let score = 0; const reasons: string[] = []; let languageScore = 0; let recommendedUse = 'Not recommended'; // Check hardware compatibility (BINARY FILTER - not scored) const hardwareResult = this.evaluateHardwareCompatibility(model, capabilities); const hardwareCompatible = hardwareResult.compatible; reasons.push(...hardwareResult.reasons); // If hardware is incompatible, return early with minimal score if (!hardwareCompatible) { return { model, score: 0, reasons, hardwareCompatible: false, languageScore: 0, recommendedUse: 'Hardware insufficient' }; } // Get optimal weights based on use case const weights = this.getOptimalWeights(criteria.languages); reasons.push(`Scoring weights: MTEB ${weights.mteb}%, Context ${weights.context}%, Language ${weights.language}%, Speed ${weights.speed}%`); // Language Performance: Variable weight based on use case const langScore = this.evaluateLanguagePerformance(model, criteria.languages); languageScore = langScore.averageScore; const languagePoints = langScore.averageScore * weights.language; score += languagePoints; reasons.push(`Language compatibility: ${(langScore.averageScore * 100).toFixed(0)}% (${weights.language}% weight)`); // MTEB Performance: Variable weight based on use case if (model.mtebScore) { const mtebNormalized = (model.mtebScore / 80) * 100; // Normalize to 0-100 const mtebPoints = mtebNormalized * (weights.mteb / 100); score += mtebPoints; reasons.push(`MTEB performance: ${model.mtebScore}% (${weights.mteb}% weight)`); } // Context Length: Very important for document processing const contextScore = this.getContextLengthScore(model.contextWindow || 256); const contextPoints = contextScore * (weights.context / 100); score += contextPoints; reasons.push(`Context length: ${model.contextWindow || 256} tokens = ${contextScore}/100 (${weights.context}% weight)`); // Speed: Tie-breaker only const gpuSpeed = model.requirements?.gpu?.expectedTokensPerSec; const cpuSpeed = model.requirements?.cpu?.expectedTokensPerSec; const bestSpeed = Math.max(gpuSpeed || 0, cpuSpeed || 0); if (bestSpeed > 0) { const speedScore = Math.min(bestSpeed / 500, 1) * 100; // Normalize to 0-100 const speedPoints = speedScore * (weights.speed / 100); score += speedPoints; reasons.push(`Speed: ${bestSpeed} tokens/sec (${weights.speed}% weight)`); } // Determine recommended use if (hardwareCompatible && languageScore > 0.7) { recommendedUse = 'Excellent choice'; } else if (hardwareCompatible && languageScore > 0.6) { recommendedUse = 'Good option'; } else if (hardwareCompatible) { recommendedUse = 'Usable with limitations'; } else { recommendedUse = 'Hardware insufficient'; } return { model, score: Math.round(score * 10) / 10, // Round to 1 decimal reasons, hardwareCompatible, languageScore, recommendedUse }; } private evaluateHardwareCompatibility( model: CuratedModel, capabilities: MachineCapabilities ): { compatible: boolean; score: number; reasons: string[] } { const reasons: string[] = []; let score = 0; let compatible = false; // Check if this is a GPU model by checking if it exists in GPU catalog const isGpuModel = this.catalog.gpuModels.models.some(gpuModel => gpuModel.id === model.id); if (isGpuModel) { const gpuReq = model.requirements?.gpu; if (gpuReq && capabilities.gpu.type !== 'none') { // Use recommended VRAM if available, otherwise fall back to minimum const requiredVRAM = gpuReq.recommendedVRAM || gpuReq.minVRAM || 4096; const availableVRAM = capabilities.gpu.vramGB || 0; // Apply 20% safety margin for GPU VRAM (system stability) const requiredWithMargin = requiredVRAM * 1.2; if (availableVRAM * 1024 >= requiredWithMargin) { compatible = true; score += 15; // High score for GPU compatibility reasons.push(`GPU compatible: ${availableVRAM}GB VRAM available (need ${(requiredWithMargin/1024).toFixed(1)}GB with safety margin)`); } else { reasons.push(`Insufficient VRAM: need ${(requiredWithMargin/1024).toFixed(1)}GB with safety margin, have ${availableVRAM}GB`); } } else if (capabilities.gpu.type === 'none') { // Fall back to CPU requirements const cpuReq = model.requirements?.cpu; if (cpuReq) { const requiredRAM = cpuReq.minRAM || cpuReq.recRAM || 2048; // Apply 25% safety margin for CPU RAM (system stability) const requiredWithMargin = requiredRAM * 1.25; if (capabilities.memory.availableRAMGB * 1024 >= requiredWithMargin) { compatible = true; score += 8; // Lower score for CPU fallback reasons.push(`CPU fallback: ${capabilities.memory.availableRAMGB}GB RAM available (need ${(requiredWithMargin/1024).toFixed(1)}GB with safety margin)`); } else { reasons.push(`Insufficient RAM: need ${(requiredWithMargin/1024).toFixed(1)}GB with safety margin`); } } } } // Check if this is a CPU/ONNX model by checking if it exists in CPU catalog const isCpuModel = this.catalog.cpuModels.models.some(cpuModel => cpuModel.id === model.id); if (!isGpuModel && isCpuModel) { const cpuReq = model.requirements?.cpu; if (cpuReq) { const requiredRAM = cpuReq.minRAM || 512; // Apply 25% safety margin for CPU RAM (system stability) const requiredWithMargin = requiredRAM * 1.25; if (capabilities.memory.availableRAMGB * 1024 >= requiredWithMargin) { compatible = true; score += 12; // Good score for CPU compatibility reasons.push(`CPU optimized: ${capabilities.memory.availableRAMGB}GB RAM available (need ${(requiredWithMargin/1024).toFixed(1)}GB with safety margin)`); // Bonus for CPU features if (cpuReq.optimalCpuFeatures) { const hasFeatures = cpuReq.optimalCpuFeatures.some(feature => capabilities.cpu.features.some(cpuFeature => cpuFeature.toLowerCase().includes(feature.toLowerCase()) ) ); if (hasFeatures) { score += 3; reasons.push('Optimized CPU features available (AVX2/FMA)'); } } } else { reasons.push(`Insufficient RAM: need ${(requiredWithMargin/1024).toFixed(1)}GB with safety margin`); } } else { // Assume minimal requirements for ONNX models compatible = true; score += 10; reasons.push('CPU model with minimal requirements'); } } // Ollama models are no longer in the curated catalog // They are handled separately via runtime detection return { compatible, score, reasons }; } private evaluateLanguagePerformance( model: CuratedModel, languages: string[] ): { score: number; averageScore: number } { if (languages.length === 0) { return { score: 0.5, averageScore: 0.5 }; // Neutral score (will become 30 points when multiplied by 60) } const languageScores: number[] = []; for (const lang of languages) { const langCode = lang.toLowerCase(); const performance = model.languagePerformance[langCode]; if (performance !== undefined) { languageScores.push(performance); } else { // Try to find similar language codes or fallback to English const fallbackScore = model.languagePerformance['en'] || 0.5; languageScores.push(fallbackScore * 0.7); // Reduce for unsupported language } } const averageScore = languageScores.length > 0 ? languageScores.reduce((a, b) => a + b) / languageScores.length : 0.5; // Return raw averageScore (0-1 scale), will be multiplied by 60 in scoreModel const score = averageScore; // Return raw score for proper calculation return { score, averageScore }; } // Get model by ID across curated categories only getModelById(modelId: string): CuratedModel | undefined { const allModels = [ ...this.catalog.gpuModels.models, ...this.catalog.cpuModels.models ]; return allModels.find(model => model.id === modelId); } // Get available languages across curated models only getSupportedLanguages(): string[] { const languages = new Set<string>(); const allModels = [ ...this.catalog.gpuModels.models, ...this.catalog.cpuModels.models ]; for (const model of allModels) { Object.keys(model.languagePerformance).forEach(lang => languages.add(lang)); } return Array.from(languages).sort(); } /** * Get optimal scoring weights based on language selection and use case */ private getOptimalWeights(languages: string[]): { mteb: number; context: number; language: number; speed: number; } { const isEnglishOnly = languages.length === 1 && languages[0] === 'en'; const isMultilingual = languages.length > 1; if (isEnglishOnly) { return { mteb: 50, // Most important - direct retrieval performance context: 40, // Critical for documents language: 5, // Minor (already filtered) speed: 5 // Tie-breaker }; } else if (isMultilingual) { return { mteb: 35, // Less reliable for non-English context: 45, // Even more critical for multilingual language: 15, // Cross-language stability matters speed: 5 // Still just tie-breaker }; } else { // Single non-English return { mteb: 30, // Often English-biased context: 45, // Critical for non-English documents language: 20, // Important for quality speed: 5 // Tie-breaker }; } } /** * Score models based on context window size for document processing */ private getContextLengthScore(contextWindow: number): number { // Scoring buckets based on real-world document sizes if (contextWindow >= 8192) return 100; // Full documents, PDFs if (contextWindow >= 2048) return 75; // Long chunks if (contextWindow >= 512) return 50; // Decent chunks if (contextWindow >= 256) return 25; // Minimal viable return 10; // 128 tokens = barely usable for documents } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/okets/folder-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

model-evaluator.ts•14 KiB