import { readFileSync } from "node:fs";
import { z } from "zod";
const MessageSchema = z.object({
role: z.enum(["system", "user", "assistant"]),
content: z.string(),
});
const RequestSchema = z.object({
version: z.literal("v1"),
model: z.string().min(1),
temperature: z.number().optional(),
maxOutputTokens: z.number().int().positive().optional(),
messages: z.array(MessageSchema).min(1),
metadata: z
.object({
queryId: z.string().optional(),
mode: z.string().optional(),
runKind: z.enum(["cold", "warm"]).optional(),
tokenBudget: z.number().int().positive().optional(),
})
.optional(),
});
const OllamaResponseSchema = z.object({
message: z.object({
role: z.string(),
content: z.string(),
}),
prompt_eval_count: z.number().int().nonnegative().optional(),
eval_count: z.number().int().nonnegative().optional(),
});
function readStdin(): string {
return readFileSync(0, "utf-8");
}
async function main(): Promise<void> {
const raw = readStdin();
const request = RequestSchema.parse(JSON.parse(raw));
const endpoint = (
process.env.DOCLEA_LIVE_LLM_OLLAMA_ENDPOINT ?? "http://localhost:11434"
)
.trim()
.replace(/\/+$/, "");
const response = await fetch(`${endpoint}/api/chat`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: request.model,
stream: false,
messages: request.messages,
options: {
temperature: request.temperature ?? 0,
num_predict: request.maxOutputTokens,
},
}),
});
if (!response.ok) {
const text = await response.text().catch(() => "");
throw new Error(
`Ollama request failed: ${response.status} ${response.statusText}${text ? `: ${text.slice(0, 240)}` : ""}`,
);
}
const parsed = OllamaResponseSchema.parse(await response.json());
const outputText = parsed.message.content ?? "";
if (!outputText.trim()) {
throw new Error("Ollama returned an empty completion.");
}
const payload = {
outputText,
usage: {
inputTokens: parsed.prompt_eval_count ?? 0,
outputTokens: parsed.eval_count ?? 0,
},
};
process.stdout.write(`${JSON.stringify(payload)}\n`);
}
await main();