Skip to main content
Glama
session.ts5.28 kB
import { getLlama, LlamaModel, LlamaContext, LlamaChatSession, } from "node-llama-cpp"; import { modelDownloader } from "./model-downloader.js"; export interface LLMConfig { modelPath?: string; gpuDevice?: string; gpuLayers?: number; contextSize?: number; temperature?: number; topP?: number; } export class ZigNetLLM { private model: LlamaModel | null = null; private context: LlamaContext | null = null; private session: LlamaChatSession | null = null; private config: Required<Omit<LLMConfig, "gpuDevice">> & { gpuDevice?: string; }; constructor(config: LLMConfig = {}) { this.config = { modelPath: config.modelPath || "", gpuDevice: config.gpuDevice, gpuLayers: config.gpuLayers ?? 35, // RTX 3090 can handle all layers contextSize: config.contextSize ?? 4096, temperature: config.temperature ?? 0.7, topP: config.topP ?? 0.9, }; } /** * Initialize the LLM model and session */ async initialize(): Promise<void> { if (this.session) { console.log("ℹ️ LLM already initialized"); return; } console.log("🚀 Initializing ZigNet LLM..."); // Set GPU device if specified (applies CUDA_VISIBLE_DEVICES) if (this.config.gpuDevice !== undefined) { process.env.CUDA_VISIBLE_DEVICES = this.config.gpuDevice; console.log(`🎯 GPU device selection: ${this.config.gpuDevice}`); } // Get model path (download if needed) const modelPath = this.config.modelPath || (await modelDownloader.ensureModel((progress) => { if (progress.percent % 10 < 1) { // Log every 10% console.log( `📥 Downloading model: ${progress.percent.toFixed(1)}%`, ); } })); console.log(`📦 Loading model: ${modelPath}`); console.log(`🎮 GPU layers: ${this.config.gpuLayers}`); // Initialize llama const llama = await getLlama(); // Load model this.model = await llama.loadModel({ modelPath, gpuLayers: this.config.gpuLayers, }); // Create context this.context = await this.model.createContext({ contextSize: this.config.contextSize, }); // Create chat session with Zig-specific system prompt this.session = new LlamaChatSession({ contextSequence: this.context.getSequence(), systemPrompt: `You are ZigNet, an AI assistant specialized in Zig programming language (v0.13-0.15). Your expertise includes: - Explaining Zig syntax, features, and idioms - Understanding comptime, generics, and error handling - Providing code examples and fixes - Referencing official Zig documentation Always: - Generate idiomatic Zig code - Explain Zig-specific concepts clearly - Suggest best practices - Validate syntax mentally before responding When unsure, reference official Zig docs or suggest using 'zig ast-check'.`, }); console.log("✅ LLM initialized successfully!"); } /** * Query the LLM with a prompt */ async query(prompt: string): Promise<string> { if (!this.session) { await this.initialize(); } if (!this.session) { throw new Error("Failed to initialize LLM session"); } console.log(`🤔 Querying LLM: ${prompt.substring(0, 50)}...`); const response = await this.session.prompt(prompt, { temperature: this.config.temperature, topP: this.config.topP, }); return response; } /** * Dispose resources */ dispose(): void { if (this.context) { void this.context.dispose(); this.context = null; } if (this.model) { void this.model.dispose(); this.model = null; } this.session = null; console.log("🗑️ LLM resources disposed"); } } /** * Singleton instance */ let globalLLM: ZigNetLLM | null = null; /** * Get or create the global LLM instance */ export async function getLLM(config?: LLMConfig): Promise<ZigNetLLM> { if (!globalLLM) { // If no config provided, load from environment if (!config) { const { MODEL_PATH, GPU_DEVICE, GPU_LAYERS, CONTEXT_SIZE, TEMPERATURE, TOP_P, } = await import("../config.js"); config = { modelPath: MODEL_PATH, gpuDevice: GPU_DEVICE, gpuLayers: GPU_LAYERS, contextSize: CONTEXT_SIZE, temperature: TEMPERATURE, topP: TOP_P, }; } globalLLM = new ZigNetLLM(config); await globalLLM.initialize(); } return globalLLM; } /** * Dispose the global LLM instance */ export function disposeLLM(): void { if (globalLLM) { globalLLM.dispose(); globalLLM = null; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/fulgidus/zignet'

If you have feedback or need assistance with the MCP directory API, please join our Discord server