import { config } from '../config.js';
import { withRetry } from '../utils/retry.js';
import type { OllamaChatMessage, OllamaChatResponse, OllamaEmbedResponse } from '../types/index.js';
const { baseUrl, primaryModel, fallbackModel, embeddingModel, chatTimeout, embedTimeout } =
config.ollama;
async function ollamaFetch<T>(
path: string,
body: Record<string, unknown>,
timeoutMs: number
): Promise<T> {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
try {
const res = await fetch(`${baseUrl}${path}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
signal: controller.signal,
});
if (!res.ok) {
const text = await res.text().catch(() => '');
throw new Error(`Ollama ${path} returned ${res.status}: ${text}`);
}
return (await res.json()) as T;
} finally {
clearTimeout(timer);
}
}
/**
* Generate embeddings for one or more texts.
* Returns Float32Array[] (one per input text).
*/
export async function embed(input: string | string[]): Promise<Float32Array[]> {
const texts = Array.isArray(input) ? input : [input];
const result = await withRetry(
() =>
ollamaFetch<OllamaEmbedResponse>(
'/api/embed',
{ model: embeddingModel, input: texts },
embedTimeout
),
{ maxRetries: 2, baseDelayMs: 500 }
);
return result.embeddings.map((e) => new Float32Array(e));
}
/**
* Chat completion with automatic model fallback.
* Tries primaryModel first, falls back to fallbackModel on error/timeout.
*/
export async function chat(
messages: OllamaChatMessage[],
options: { temperature?: number; maxTokens?: number } = {}
): Promise<string> {
const models = [primaryModel, fallbackModel];
for (let i = 0; i < models.length; i++) {
const model = models[i];
try {
const result = await withRetry(
() =>
ollamaFetch<OllamaChatResponse>(
'/api/chat',
{
model,
messages,
stream: false,
options: {
temperature: options.temperature ?? 0.3,
num_predict: options.maxTokens ?? 2048,
},
},
chatTimeout
),
{ maxRetries: 1, baseDelayMs: 1000 }
);
return result.message.content;
} catch (err) {
if (i === models.length - 1) throw err;
// Fall through to next model
}
}
throw new Error('All models failed');
}
/**
* Check if Ollama is reachable and models are available.
*/
export async function healthCheck(): Promise<{
healthy: boolean;
models: string[];
error?: string;
}> {
try {
const res = await fetch(`${baseUrl}/api/tags`, {
signal: AbortSignal.timeout(5000),
});
if (!res.ok) throw new Error(`Status ${res.status}`);
const data = (await res.json()) as { models: { name: string }[] };
return {
healthy: true,
models: data.models.map((m) => m.name),
};
} catch (err) {
return {
healthy: false,
models: [],
error: err instanceof Error ? err.message : String(err),
};
}
}