folder-mcp

Overview Schema Related Servers Score Discussions

model-handlers.ts•22.8 KiB

/** * Model Handlers * * Handles model-related operations for the WebSocket protocol. * Provides model list retrieval and cache status checking. */ import { WSClientMessage, ModelListResponseMessage, ModelRecommendResponseMessage, ModelCompatibilityScore, DefaultModelSetResponseMessage, isModelListMessage, isModelRecommendMessage, isDefaultModelSetMessage, createModelListResponse, createModelRecommendResponse, createDefaultModelSetResponse, createErrorMessage } from '../message-types.js'; import { ILoggingService } from '../../../di/interfaces.js'; import { RequestLogger } from '../../../domain/daemon/request-logger.js'; import { createStructuredLogger, CorrelationIdManager } from '../../../infrastructure/logging/message-formatting.js'; import { ModelSelectionService } from '../../../application/models/model-selection-service.js'; import { OllamaDetector } from '../../../infrastructure/ollama/ollama-detector.js'; import { IFMDMService } from '../../services/fmdm-service.js'; import { IDefaultModelService } from '../../services/default-model-service.js'; /** * Model handlers for WebSocket protocol */ export class ModelHandlers { private requestLogger: RequestLogger; private structuredLogger: any; constructor( private logger: ILoggingService, private modelSelectionService: ModelSelectionService, private ollamaDetector: OllamaDetector, private fmdmService: IFMDMService, private defaultModelService: IDefaultModelService ) { this.requestLogger = new RequestLogger(this.logger); this.structuredLogger = createStructuredLogger(this.logger, 'model-handler'); } /** * Get machine capabilities - exposed for get_server_info endpoint */ async getMachineCapabilities() { return await this.modelSelectionService.getMachineCapabilities(); } /** * Handle model list request */ async handleModelList(message: WSClientMessage, clientId?: string): Promise<ModelListResponseMessage> { if (!isModelListMessage(message)) { throw new Error('Invalid model list message'); } const { id } = message; // Generate correlation ID and start operation tracking const correlationId = CorrelationIdManager.generateId('model-list'); return CorrelationIdManager.withId(correlationId, () => { // Start structured operation logging const operationRequestId = this.structuredLogger.logOperation('started', 'model_list_request', { requestId: correlationId, clientId: clientId || 'unknown', metadata: { messageId: id, triggerType: 'user' } }); // Also use the existing request logger for compatibility const requestId = this.requestLogger.startRequest( 'model_list', { messageId: id }, { triggerType: 'user', ...(clientId && { clientId }) } ); return this.executeModelListOperation(id, requestId, operationRequestId, clientId); }); } private async executeModelListOperation( messageId: string, requestId: string, operationRequestId: string, clientId?: string ): Promise<ModelListResponseMessage> { const startTime = Date.now(); try { // Get supported models from system configuration const supportedModels = this.getSupportedModels(); // For now, we'll just return the models without cache status // Cache status checking can be added later as an enhancement const response = createModelListResponse( messageId, supportedModels, 'python' // Hardcoded to python for Phase 8 Task 10 ); const duration = Date.now() - startTime; // Complete structured operation logging this.structuredLogger.logOperation('completed', 'model_list_request', { requestId: operationRequestId, clientId: clientId || 'unknown', duration, metadata: { modelCount: supportedModels.length, responseSize: JSON.stringify(response).length, provider: 'python' } }); // Complete legacy request tracking for compatibility this.requestLogger.completeRequest(requestId, 'success', { responseSize: JSON.stringify(response).length, performanceMetrics: { processingTime: duration } }); return response; } catch (error) { const duration = Date.now() - startTime; const errorMessage = error instanceof Error ? error.message : String(error); // Complete structured operation logging with error this.structuredLogger.logOperation('failed', 'model_list_request', { requestId: operationRequestId, clientId: clientId || 'unknown', duration, error: error instanceof Error ? error : new Error(String(error)), recommendedAction: 'Check model configuration and system status' }); // Complete legacy request tracking for compatibility this.requestLogger.completeRequest(requestId, 'failure', { errorCode: 'MODEL_LIST_ERROR', errorMessage }); // Return error as a model list response with empty models return createModelListResponse(messageId, [], 'python'); } } /** * Handle model recommendation request */ async handleModelRecommend(message: WSClientMessage, clientId?: string): Promise<ModelRecommendResponseMessage> { if (!isModelRecommendMessage(message)) { throw new Error('Invalid model recommendation message'); } const { id, payload } = message; const { languages, mode } = payload; // Generate correlation ID and start operation tracking const correlationId = CorrelationIdManager.generateId('model-recommend'); return CorrelationIdManager.withId(correlationId, () => { // Start structured operation logging const operationRequestId = this.structuredLogger.logOperation('started', 'model_recommend_request', { requestId: correlationId, clientId: clientId || 'unknown', metadata: { messageId: id, triggerType: 'user', mode, languages: languages.join(',') } }); // Also use the existing request logger for compatibility const requestId = this.requestLogger.startRequest( 'model_recommend', { messageId: id, mode, languages }, { triggerType: 'user', ...(clientId && { clientId }) } ); return this.executeModelRecommendOperation(id, requestId, operationRequestId, languages, mode, clientId); }); } private async executeModelRecommendOperation( messageId: string, requestId: string, operationRequestId: string, languages: string[], mode: 'assisted' | 'manual', clientId?: string ): Promise<ModelRecommendResponseMessage> { const startTime = Date.now(); try { // Get machine capabilities and model recommendations const machineCapabilities = await this.modelSelectionService.getMachineCapabilities(); const recommendation = mode === 'assisted' ? await this.modelSelectionService.getAssistedModeRecommendation(languages) : await this.modelSelectionService.getManualModeOptions(languages); // Convert compatibility scores to WebSocket format const models: ModelCompatibilityScore[] = []; // Add all scored models (compatible and incompatible for manual mode) const allScores = [recommendation.primaryChoice, ...recommendation.alternatives]; const scoresToProcess = allScores.filter(score => { if (!score) return false; // Skip incompatible models in assisted mode if (mode === 'assisted' && !score.hardwareCompatible) return false; return true; }); // Check local copy status for all models in parallel const localCopyChecks = await Promise.all( scoresToProcess.map(score => this.checkLocalCopy(score.model)) ); // Build model list with local copy status for (let i = 0; i < scoresToProcess.length; i++) { const score = scoresToProcess[i]; const isLocalCopy = localCopyChecks[i] || false; // Ensure boolean if (!score) continue; // Extra safety check models.push({ modelId: score.model.id, displayName: score.model.displayName, score: score.score, compatibility: score.hardwareCompatible ? 'supported' : this.getCompatibilityStatus(score), compatibilityReason: score.hardwareCompatible ? undefined : score.reasons.join(', '), details: { speed: this.formatSpeed(score.model), accuracy: this.formatAccuracy(score.model), languages: this.formatLanguageCount(score.model), type: this.formatModelType(score.model), size: this.formatModelSize(score.model), localCopy: isLocalCopy, ...(mode === 'assisted' && score === recommendation.primaryChoice && { recommendation: 'Recommended' }) } }); } // Add Ollama models for manual mode if (mode === 'manual') { try { const ollamaResult = await this.ollamaDetector.detectModels('manual'); for (const ollamaModel of ollamaResult.models) { models.push({ modelId: ollamaModel.id, displayName: ollamaModel.displayName, score: 0, // No scoring for Ollama models compatibility: 'user_managed', details: { speed: '-', accuracy: '-', languages: '-', type: 'Ollama', size: this.formatModelSize(ollamaModel), localCopy: true // Ollama models are already downloaded if they're detected } }); } } catch (error) { // Ollama detection failed - that's OK, just continue without Ollama models this.logger.warn('Ollama detection failed (this is OK):', error); } } // Add any additional locally cached models not already in the recommendations await this.addCachedModelsNotInRecommendations(models, languages, mode); // Build response const response = createModelRecommendResponse( messageId, mode, models, { hasGPU: machineCapabilities.gpu.type !== 'none', ...(machineCapabilities.gpu.vramGB !== undefined && { gpuMemoryGB: machineCapabilities.gpu.vramGB }), cpuCores: machineCapabilities.cpu.cores, ramGB: machineCapabilities.memory.totalRAMGB }, mode === 'assisted' && recommendation.primaryChoice ? recommendation.primaryChoice.model.id : undefined ); const duration = Date.now() - startTime; // Complete structured operation logging this.structuredLogger.logOperation('completed', 'model_recommend_request', { requestId: operationRequestId, clientId: clientId || 'unknown', duration, metadata: { modelCount: models.length, responseSize: JSON.stringify(response).length, mode, languageCount: languages.length } }); // Complete legacy request tracking for compatibility this.requestLogger.completeRequest(requestId, 'success', { responseSize: JSON.stringify(response).length, performanceMetrics: { processingTime: duration } }); return response; } catch (error) { const duration = Date.now() - startTime; const errorMessage = error instanceof Error ? error.message : String(error); // Complete structured operation logging with error this.structuredLogger.logOperation('failed', 'model_recommend_request', { requestId: operationRequestId, clientId: clientId || 'unknown', duration, error: error instanceof Error ? error : new Error(String(error)), recommendedAction: 'Check model selection service and machine capabilities' }); // Complete legacy request tracking for compatibility this.requestLogger.completeRequest(requestId, 'failure', { errorCode: 'MODEL_RECOMMEND_ERROR', errorMessage }); // Return error as empty recommendation response return createModelRecommendResponse( messageId, mode, [], { hasGPU: false, cpuCores: 1, ramGB: 1 } ); } } /** * Handle default model set request * Allows clients to set the system-wide default embedding model */ async handleDefaultModelSet(message: WSClientMessage, clientId?: string): Promise<DefaultModelSetResponseMessage> { if (!isDefaultModelSetMessage(message)) { throw new Error('Invalid default model set message'); } const { id, payload } = message; const { modelId, languages } = payload; // Generate correlation ID and start operation tracking const correlationId = CorrelationIdManager.generateId('default-model-set'); return CorrelationIdManager.withId(correlationId, () => { // Start structured operation logging const operationRequestId = this.structuredLogger.logOperation('started', 'default_model_set_request', { requestId: correlationId, clientId: clientId || 'unknown', metadata: { messageId: id, triggerType: 'user', modelId, languages: languages?.join(',') } }); // Also use the existing request logger for compatibility const requestId = this.requestLogger.startRequest( 'default_model_set', { messageId: id, modelId, languages }, { triggerType: 'user', ...(clientId && { clientId }) } ); return this.executeDefaultModelSetOperation(id, requestId, operationRequestId, modelId, languages, clientId); }); } private async executeDefaultModelSetOperation( messageId: string, requestId: string, operationRequestId: string, modelId: string, languages: string[] | undefined, clientId?: string ): Promise<DefaultModelSetResponseMessage> { const startTime = Date.now(); try { // Use DefaultModelService to set the model (handles validation, persistence, and FMDM broadcast) const newConfig = await this.defaultModelService.setDefaultModel(modelId, languages); const duration = Date.now() - startTime; // Complete structured operation logging this.structuredLogger.logOperation('completed', 'default_model_set_request', { requestId: operationRequestId, clientId: clientId || 'unknown', duration, metadata: { modelId: newConfig.modelId, source: newConfig.source, languages: newConfig.languages?.join(',') } }); // Complete legacy request tracking for compatibility this.requestLogger.completeRequest(requestId, 'success', { performanceMetrics: { processingTime: duration } }); this.logger.info(`[ModelHandlers] Default model set to: ${newConfig.modelId}${newConfig.languages ? ` with languages: ${newConfig.languages.join(', ')}` : ''} by client ${clientId || 'unknown'}`); return createDefaultModelSetResponse(messageId, true, { modelId: newConfig.modelId, source: newConfig.source, ...(newConfig.languages && { languages: newConfig.languages }) }); } catch (error) { const duration = Date.now() - startTime; const errorMessage = error instanceof Error ? error.message : String(error); // Complete structured operation logging with error this.structuredLogger.logOperation('failed', 'default_model_set_request', { requestId: operationRequestId, clientId: clientId || 'unknown', duration, error: error instanceof Error ? error : new Error(String(error)), recommendedAction: 'Check model ID is valid and model exists in curated models list' }); // Complete legacy request tracking for compatibility this.requestLogger.completeRequest(requestId, 'failure', { errorCode: 'DEFAULT_MODEL_SET_ERROR', errorMessage }); this.logger.error(`[ModelHandlers] Failed to set default model: ${errorMessage}`); return createDefaultModelSetResponse(messageId, false, undefined, errorMessage); } } private getCompatibilityStatus(score: any): 'needs_gpu' | 'needs_vram' | 'incompatible' { const reasons = score.reasons.join(' '); if (reasons.includes('GPU required') || reasons.includes('No GPU')) { return 'needs_gpu'; } if (reasons.includes('VRAM')) { return 'needs_vram'; } return 'incompatible'; } private formatSpeed(model: any): string { const tokensPerSec = model.requirements?.gpu?.expectedTokensPerSec || model.requirements?.cpu?.expectedTokensPerSec || 0; if (tokensPerSec > 200) return 'High'; if (tokensPerSec > 100) return 'Medium'; return 'Low'; } private formatAccuracy(model: any): string { if (model.mtebScore) return `${model.mtebScore}%`; return '-'; } private formatLanguageCount(model: any): string { const count = Object.keys(model.languagePerformance || {}).length; if (count > 100) return '100+'; if (count > 50) return '50+'; return `${count}`; } private formatModelType(model: any): string { if (model.huggingfaceId?.includes('Xenova')) return 'CPU'; if (model.requirements?.gpu) return 'GPU'; return 'Curated'; } /** * Check if a model is available locally - now uses FMDM cache! */ private async checkLocalCopy(model: any): Promise<boolean> { const modelId = model.id; // For Ollama models - they are installed if detected if (model.source === 'ollama') { return true; // Ollama models are by definition installed if they're detected } // For curated models - instant FMDM lookup (no Python spawning!) const fmdm = this.fmdmService.getFMDM(); const modelInfo = fmdm.curatedModels.find(m => m.id === modelId); if (modelInfo) { return modelInfo.installed; } // Fallback for unknown models (shouldn't happen with our curated list) if (modelId.includes('xenova')) { // Quick filesystem check for ONNX models not in FMDM try { const { ONNXDownloader } = await import('../../../infrastructure/embeddings/onnx/onnx-downloader.js'); const downloader = new ONNXDownloader(); return await downloader.isModelAvailable(modelId); } catch { return false; } } // For GPU models not in FMDM - assume not installed (avoids Python spawning) return false; } private formatModelSize(model: any): string { // Handle curated models (modelSizeMB property) if (model.modelSizeMB) { const sizeInMB = model.modelSizeMB; if (sizeInMB >= 1024) { const sizeInGB = sizeInMB / 1024; return `${sizeInGB.toFixed(1)}GB`; } return `${sizeInMB}MB`; } // Handle Ollama models (size property in bytes) if (model.size) { const sizeInBytes = model.size; const sizeInGB = sizeInBytes / (1024 * 1024 * 1024); const sizeInMB = sizeInBytes / (1024 * 1024); if (sizeInGB >= 1) { return `${sizeInGB.toFixed(1)}GB`; } return `${Math.round(sizeInMB)}MB`; } return '-'; } /** * Get supported models - single source of truth for all model lists */ getSupportedModels(): string[] { // Use FMDM curated models as the single source of truth const fmdm = this.fmdmService.getFMDM(); return fmdm.curatedModels.map(model => model.id); } /** * Validate if a model is supported */ isModelSupported(model: string): boolean { return this.getSupportedModels().includes(model); } /** * Get model display name by ID for better error messages */ getModelDisplayName(modelId: string): string { // Try getting from modelSelectionService first const model = this.modelSelectionService.getModelById(modelId); if (model) { return model.displayName; } // For curated models, get displayName from curated-models.json via FMDM // This requires loading curated-models.json to get the displayName field // For now, fall back to the model ID as the display name return modelId; } /** * Get supported models with display names for error messages */ getSupportedModelsWithNames(): { id: string; displayName: string }[] { const supportedIds = this.getSupportedModels(); return supportedIds.map(id => ({ id, displayName: this.getModelDisplayName(id) })); } /** * Add any locally cached models that aren't already in the recommendations */ private async addCachedModelsNotInRecommendations( models: ModelCompatibilityScore[], languages: string[], mode: 'assisted' | 'manual' ): Promise<void> { // Get all supported models const allSupportedModelIds = this.getSupportedModels(); const alreadyIncludedIds = new Set(models.map(m => m.modelId)); // Find models not already in the recommendations const notIncludedIds = allSupportedModelIds.filter(id => !alreadyIncludedIds.has(id)); if (notIncludedIds.length === 0) { return; // No additional models to check } // Check which of these models are locally cached const cachedModelChecks = await Promise.all( notIncludedIds.map(async (modelId) => { const model = this.modelSelectionService.getModelById(modelId); if (!model) return null; const isLocalCopy = await this.checkLocalCopy(model); return isLocalCopy ? { modelId, model } : null; }) ); // Add cached models to the list for (const cachedModel of cachedModelChecks) { if (!cachedModel) continue; const { model } = cachedModel; // Get basic machine capabilities for simple compatibility scoring const machineCapabilities = await this.modelSelectionService.getMachineCapabilities(); // Actually evaluate cached models to get proper scores const evaluator = this.modelSelectionService.getEvaluator(); const score = evaluator.scoreModel(model, machineCapabilities, { languages, mode, prioritizeAccuracy: mode === 'assisted' }); models.push({ modelId: model.id, displayName: model.displayName, score: score.score, // Use actual calculated score compatibility: score.hardwareCompatible ? 'supported' : this.getCompatibilityStatus(score), compatibilityReason: score.hardwareCompatible ? undefined : score.reasons.join(', '), details: { speed: this.formatSpeed(model), accuracy: this.formatAccuracy(model), languages: this.formatLanguageCount(model), type: this.formatModelType(model), size: this.formatModelSize(model), localCopy: true, // These are all locally cached recommendation: 'Previously Downloaded' // Special indicator } }); } } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/okets/folder-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

model-handlers.ts•22.8 KiB