Skip to main content
Glama
by Coder-RL
model-registry.ts22.4 kB
import { EventEmitter } from 'events'; export interface ModelMetadata { id: string; name: string; version: string; description: string; type: 'llm' | 'embedding' | 'classification' | 'regression' | 'generation' | 'custom'; framework: 'pytorch' | 'tensorflow' | 'onnx' | 'huggingface' | 'openai' | 'anthropic'; size: number; parameters: number; architecture: string; inputSchema: ModelSchema; outputSchema: ModelSchema; requirements: ModelRequirements; performance: ModelPerformance; tags: string[]; author: string; license: string; createdAt: Date; updatedAt: Date; status: 'draft' | 'testing' | 'staging' | 'production' | 'deprecated'; } export interface ModelSchema { type: 'object' | 'array' | 'string' | 'number' | 'boolean'; properties?: Record<string, ModelSchemaProperty>; items?: ModelSchema; required?: string[]; example?: any; } export interface ModelSchemaProperty { type: string; description: string; format?: string; enum?: any[]; minimum?: number; maximum?: number; pattern?: string; } export interface ModelRequirements { cpu: { cores: number; architecture: string[]; }; memory: { ram: number; vram?: number; }; gpu?: { required: boolean; memory: number; compute: string[]; }; storage: { size: number; type: 'ssd' | 'hdd' | 'nvme'; }; runtime: { python?: string; cuda?: string; dependencies: string[]; }; } export interface ModelPerformance { latency: { p50: number; p95: number; p99: number; }; throughput: { requestsPerSecond: number; tokensPerSecond?: number; }; accuracy?: { metric: string; value: number; dataset: string; }; resourceUtilization: { cpu: number; memory: number; gpu?: number; }; benchmarks: ModelBenchmark[]; } export interface ModelBenchmark { name: string; dataset: string; metric: string; value: number; timestamp: Date; environment: string; } export interface ModelArtifact { id: string; modelId: string; type: 'weights' | 'config' | 'tokenizer' | 'onnx' | 'tensorrt' | 'openvino'; path: string; size: number; checksum: string; format: string; compression?: string; createdAt: Date; } export interface ModelVersion { version: string; modelId: string; changelog: string[]; artifacts: ModelArtifact[]; experiments: ModelExperiment[]; promotion: ModelPromotion; createdBy: string; createdAt: Date; } export interface ModelExperiment { id: string; name: string; description: string; parameters: Record<string, any>; metrics: Record<string, number>; dataset: string; duration: number; status: 'running' | 'completed' | 'failed'; createdAt: Date; completedAt?: Date; } export interface ModelPromotion { environment: string; promotedBy: string; promotedAt: Date; approvals: ModelApproval[]; rollbackPlan?: string; } export interface ModelApproval { approver: string; approved: boolean; comments?: string; timestamp: Date; } export interface ModelDeployment { id: string; modelId: string; version: string; environment: string; configuration: DeploymentConfiguration; endpoints: ModelEndpoint[]; status: 'deploying' | 'active' | 'inactive' | 'failed' | 'updating'; health: 'healthy' | 'degraded' | 'unhealthy'; metrics: DeploymentMetrics; createdAt: Date; updatedAt: Date; } export interface DeploymentConfiguration { replicas: number; resources: { cpu: string; memory: string; gpu?: string; }; scaling: { minReplicas: number; maxReplicas: number; targetCPU: number; targetMemory: number; }; networking: { port: number; protocol: 'http' | 'grpc'; loadBalancer: boolean; }; environment: Record<string, string>; secrets: string[]; } export interface ModelEndpoint { id: string; url: string; type: 'inference' | 'batch' | 'streaming'; authentication: 'none' | 'api_key' | 'oauth' | 'jwt'; rateLimit: { requests: number; window: number; }; timeout: number; retries: number; } export interface DeploymentMetrics { requests: { total: number; success: number; errors: number; rate: number; }; latency: { p50: number; p95: number; p99: number; mean: number; }; resources: { cpu: number; memory: number; gpu?: number; }; uptime: number; lastUpdated: Date; } export interface ModelInferenceRequest { modelId: string; version?: string; input: any; parameters?: Record<string, any>; metadata?: Record<string, any>; timeout?: number; streaming?: boolean; } export interface ModelInferenceResponse { requestId: string; modelId: string; version: string; output: any; confidence?: number; latency: number; tokens?: { input: number; output: number; total: number; }; metadata?: Record<string, any>; timestamp: Date; } export class ModelRegistry extends EventEmitter { private models = new Map<string, ModelMetadata>(); private versions = new Map<string, ModelVersion[]>(); private artifacts = new Map<string, ModelArtifact[]>(); private deployments = new Map<string, ModelDeployment>(); private experiments = new Map<string, ModelExperiment>(); constructor() { super(); this.initializeBuiltInModels(); } // Model Management async registerModel(metadata: Omit<ModelMetadata, 'id' | 'createdAt' | 'updatedAt'>): Promise<string> { try { const modelId = this.generateId(); const model: ModelMetadata = { id: modelId, createdAt: new Date(), updatedAt: new Date(), ...metadata }; this.models.set(modelId, model); this.versions.set(modelId, []); this.artifacts.set(modelId, []); this.emit('modelRegistered', { model }); return modelId; } catch (error) { this.emit('error', { operation: 'registerModel', error }); throw error; } } async updateModel(modelId: string, updates: Partial<ModelMetadata>): Promise<boolean> { try { const model = this.models.get(modelId); if (!model) { throw new Error(`Model ${modelId} not found`); } Object.assign(model, updates, { updatedAt: new Date() }); this.emit('modelUpdated', { model }); return true; } catch (error) { this.emit('error', { operation: 'updateModel', error }); return false; } } async getModel(modelId: string): Promise<ModelMetadata | undefined> { return this.models.get(modelId); } async getModels(filters?: { type?: string; framework?: string; status?: string; tags?: string[]; }): Promise<ModelMetadata[]> { let models = Array.from(this.models.values()); if (filters) { if (filters.type) { models = models.filter(m => m.type === filters.type); } if (filters.framework) { models = models.filter(m => m.framework === filters.framework); } if (filters.status) { models = models.filter(m => m.status === filters.status); } if (filters.tags && filters.tags.length > 0) { models = models.filter(m => filters.tags!.some(tag => m.tags.includes(tag)) ); } } return models.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime()); } async deleteModel(modelId: string): Promise<boolean> { try { const model = this.models.get(modelId); if (!model) { return false; } // Check if model has active deployments const activeDeployments = Array.from(this.deployments.values()) .filter(d => d.modelId === modelId && d.status === 'active'); if (activeDeployments.length > 0) { throw new Error('Cannot delete model with active deployments'); } this.models.delete(modelId); this.versions.delete(modelId); this.artifacts.delete(modelId); this.emit('modelDeleted', { modelId }); return true; } catch (error) { this.emit('error', { operation: 'deleteModel', error }); return false; } } // Version Management async createVersion(modelId: string, version: Omit<ModelVersion, 'modelId' | 'createdAt'>): Promise<boolean> { try { const model = this.models.get(modelId); if (!model) { throw new Error(`Model ${modelId} not found`); } const modelVersion: ModelVersion = { modelId, createdAt: new Date(), ...version }; const versions = this.versions.get(modelId) || []; versions.push(modelVersion); this.versions.set(modelId, versions); this.emit('versionCreated', { modelId, version: modelVersion }); return true; } catch (error) { this.emit('error', { operation: 'createVersion', error }); return false; } } async getVersions(modelId: string): Promise<ModelVersion[]> { return this.versions.get(modelId) || []; } async getLatestVersion(modelId: string): Promise<ModelVersion | undefined> { const versions = this.versions.get(modelId) || []; return versions.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime())[0]; } // Artifact Management async uploadArtifact(artifact: Omit<ModelArtifact, 'id' | 'createdAt'>): Promise<string> { try { const artifactId = this.generateId(); const modelArtifact: ModelArtifact = { id: artifactId, createdAt: new Date(), ...artifact }; const artifacts = this.artifacts.get(artifact.modelId) || []; artifacts.push(modelArtifact); this.artifacts.set(artifact.modelId, artifacts); this.emit('artifactUploaded', { artifact: modelArtifact }); return artifactId; } catch (error) { this.emit('error', { operation: 'uploadArtifact', error }); throw error; } } async getArtifacts(modelId: string, type?: string): Promise<ModelArtifact[]> { let artifacts = this.artifacts.get(modelId) || []; if (type) { artifacts = artifacts.filter(a => a.type === type); } return artifacts.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime()); } async downloadArtifact(artifactId: string): Promise<Buffer> { // In production, this would download from storage service const artifact = this.findArtifactById(artifactId); if (!artifact) { throw new Error(`Artifact ${artifactId} not found`); } // Simulate artifact download return Buffer.from(`Artifact content for ${artifact.id}`); } // Experiment Management async createExperiment(experiment: Omit<ModelExperiment, 'id' | 'createdAt'>): Promise<string> { try { const experimentId = this.generateId(); const modelExperiment: ModelExperiment = { id: experimentId, createdAt: new Date(), ...experiment }; this.experiments.set(experimentId, modelExperiment); this.emit('experimentCreated', { experiment: modelExperiment }); return experimentId; } catch (error) { this.emit('error', { operation: 'createExperiment', error }); throw error; } } async updateExperiment(experimentId: string, updates: Partial<ModelExperiment>): Promise<boolean> { try { const experiment = this.experiments.get(experimentId); if (!experiment) { throw new Error(`Experiment ${experimentId} not found`); } Object.assign(experiment, updates); this.emit('experimentUpdated', { experiment }); return true; } catch (error) { this.emit('error', { operation: 'updateExperiment', error }); return false; } } async getExperiments(modelId?: string): Promise<ModelExperiment[]> { let experiments = Array.from(this.experiments.values()); if (modelId) { // Filter experiments by model - would need to track this relationship experiments = experiments.filter(e => e.name.includes(modelId)); } return experiments.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime()); } // Model Promotion async promoteModel( modelId: string, version: string, environment: string, approvals: ModelApproval[] ): Promise<boolean> { try { const model = this.models.get(modelId); if (!model) { throw new Error(`Model ${modelId} not found`); } const versions = this.versions.get(modelId) || []; const targetVersion = versions.find(v => v.version === version); if (!targetVersion) { throw new Error(`Version ${version} not found for model ${modelId}`); } // Check if all required approvals are present const requiredApprovals = approvals.filter(a => a.approved); if (requiredApprovals.length === 0) { throw new Error('At least one approval is required for promotion'); } targetVersion.promotion = { environment, promotedBy: requiredApprovals[0].approver, promotedAt: new Date(), approvals }; this.emit('modelPromoted', { modelId, version, environment }); return true; } catch (error) { this.emit('error', { operation: 'promoteModel', error }); return false; } } // Model Comparison async compareModels(modelIds: string[]): Promise<{ models: ModelMetadata[]; comparison: ModelComparison; }> { const models = modelIds.map(id => this.models.get(id)).filter(Boolean) as ModelMetadata[]; if (models.length < 2) { throw new Error('At least 2 models are required for comparison'); } const comparison: ModelComparison = { parameters: models.map(m => ({ id: m.id, parameters: m.parameters })), performance: models.map(m => ({ id: m.id, latency: m.performance.latency.p95, throughput: m.performance.throughput.requestsPerSecond, accuracy: m.performance.accuracy?.value })), requirements: models.map(m => ({ id: m.id, memory: m.requirements.memory.ram, storage: m.requirements.storage.size, gpu: m.requirements.gpu?.required })), recommendations: this.generateComparisonRecommendations(models) }; return { models, comparison }; } // Model Search async searchModels(query: string, filters?: { type?: string; framework?: string; minAccuracy?: number; maxLatency?: number; }): Promise<ModelMetadata[]> { let models = Array.from(this.models.values()); // Text search if (query.trim()) { models = models.filter(m => m.name.toLowerCase().includes(query.toLowerCase()) || m.description.toLowerCase().includes(query.toLowerCase()) || m.tags.some(tag => tag.toLowerCase().includes(query.toLowerCase())) ); } // Apply filters if (filters) { if (filters.type) { models = models.filter(m => m.type === filters.type); } if (filters.framework) { models = models.filter(m => m.framework === filters.framework); } if (filters.minAccuracy !== undefined) { models = models.filter(m => m.performance.accuracy && m.performance.accuracy.value >= filters.minAccuracy! ); } if (filters.maxLatency !== undefined) { models = models.filter(m => m.performance.latency.p95 <= filters.maxLatency! ); } } return models; } // Analytics async getRegistryAnalytics(): Promise<{ totalModels: number; modelsByType: Record<string, number>; modelsByFramework: Record<string, number>; modelsByStatus: Record<string, number>; popularModels: { id: string; name: string; deployments: number }[]; recentActivity: RecentActivity[]; storageUsage: { totalSize: number; artifactCount: number; avgModelSize: number; }; }> { const models = Array.from(this.models.values()); const modelsByType: Record<string, number> = {}; const modelsByFramework: Record<string, number> = {}; const modelsByStatus: Record<string, number> = {}; let totalArtifacts = 0; let totalSize = 0; models.forEach(model => { modelsByType[model.type] = (modelsByType[model.type] || 0) + 1; modelsByFramework[model.framework] = (modelsByFramework[model.framework] || 0) + 1; modelsByStatus[model.status] = (modelsByStatus[model.status] || 0) + 1; const artifacts = this.artifacts.get(model.id) || []; totalArtifacts += artifacts.length; totalSize += artifacts.reduce((sum, a) => sum + a.size, 0); }); const popularModels = models .map(m => ({ id: m.id, name: m.name, deployments: Array.from(this.deployments.values()) .filter(d => d.modelId === m.id).length })) .sort((a, b) => b.deployments - a.deployments) .slice(0, 10); const recentActivity: RecentActivity[] = [ ...models.slice(0, 5).map(m => ({ type: 'model_registered' as const, modelId: m.id, modelName: m.name, timestamp: m.createdAt })), ...Array.from(this.experiments.values()).slice(0, 5).map(e => ({ type: 'experiment_created' as const, experimentId: e.id, experimentName: e.name, timestamp: e.createdAt })) ].sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime()).slice(0, 10); return { totalModels: models.length, modelsByType, modelsByFramework, modelsByStatus, popularModels, recentActivity, storageUsage: { totalSize, artifactCount: totalArtifacts, avgModelSize: totalSize / Math.max(models.length, 1) } }; } private findArtifactById(artifactId: string): ModelArtifact | undefined { for (const artifacts of this.artifacts.values()) { const artifact = artifacts.find(a => a.id === artifactId); if (artifact) return artifact; } return undefined; } private generateComparisonRecommendations(models: ModelMetadata[]): string[] { const recommendations: string[] = []; // Performance recommendations const bestPerformance = models.reduce((best, model) => model.performance.latency.p95 < best.performance.latency.p95 ? model : best ); recommendations.push(`${bestPerformance.name} has the best latency performance`); // Accuracy recommendations const bestAccuracy = models.reduce((best, model) => { if (!model.performance.accuracy || !best.performance.accuracy) return best; return model.performance.accuracy.value > best.performance.accuracy.value ? model : best; }); if (bestAccuracy.performance.accuracy) { recommendations.push(`${bestAccuracy.name} has the highest accuracy`); } // Resource recommendations const leastResource = models.reduce((least, model) => model.requirements.memory.ram < least.requirements.memory.ram ? model : least ); recommendations.push(`${leastResource.name} has the lowest memory requirements`); return recommendations; } private initializeBuiltInModels(): void { // Initialize some example models const builtInModels: Omit<ModelMetadata, 'id' | 'createdAt' | 'updatedAt'>[] = [ { name: 'Claude-3-Sonnet', version: '3.0.0', description: 'Advanced language model for complex reasoning and analysis', type: 'llm', framework: 'anthropic', size: 1024 * 1024 * 1024 * 100, // 100GB parameters: 175000000000, // 175B architecture: 'transformer', inputSchema: { type: 'object', properties: { messages: { type: 'array', description: 'Array of conversation messages' }, max_tokens: { type: 'number', description: 'Maximum tokens to generate' } }, required: ['messages'] }, outputSchema: { type: 'object', properties: { content: { type: 'string', description: 'Generated response' }, usage: { type: 'object', description: 'Token usage statistics' } } }, requirements: { cpu: { cores: 16, architecture: ['x86_64', 'arm64'] }, memory: { ram: 32 * 1024 * 1024 * 1024, // 32GB vram: 80 * 1024 * 1024 * 1024 // 80GB }, gpu: { required: true, memory: 80 * 1024 * 1024 * 1024, compute: ['7.0', '8.0', '8.6'] }, storage: { size: 200 * 1024 * 1024 * 1024, // 200GB type: 'nvme' }, runtime: { python: '3.9+', cuda: '11.8+', dependencies: ['torch>=2.0.0', 'transformers>=4.25.0'] } }, performance: { latency: { p50: 150, p95: 300, p99: 500 }, throughput: { requestsPerSecond: 10, tokensPerSecond: 50 }, accuracy: { metric: 'hellaswag', value: 0.95, dataset: 'hellaswag-validation' }, resourceUtilization: { cpu: 60, memory: 70, gpu: 85 }, benchmarks: [] }, tags: ['nlp', 'language-model', 'reasoning', 'anthropic'], author: 'Anthropic', license: 'Commercial', status: 'production' } ]; builtInModels.forEach(async (model) => { await this.registerModel(model); }); } private generateId(): string { return Math.random().toString(36).substring(2, 15); } } // Helper interfaces interface ModelComparison { parameters: { id: string; parameters: number }[]; performance: { id: string; latency: number; throughput: number; accuracy?: number }[]; requirements: { id: string; memory: number; storage: number; gpu?: boolean }[]; recommendations: string[]; } interface RecentActivity { type: 'model_registered' | 'experiment_created' | 'model_deployed' | 'version_created'; modelId?: string; modelName?: string; experimentId?: string; experimentName?: string; timestamp: Date; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Coder-RL/Claude_MCPServer_Dev1'

If you have feedback or need assistance with the MCP directory API, please join our Discord server