/**
* @module api/chat-api
* @description RAG-enhanced chat API with semantic search
*
* Provides OpenAI-compatible chat completion endpoints with automatic
* Graph-RAG semantic search integration. Queries are enriched with
* relevant context from the Neo4j graph database before being sent
* to the LLM.
*
* **Features:**
* - OpenAI-compatible `/v1/chat/completions` endpoint
* - Automatic semantic search for relevant context
* - Multi-provider LLM support (OpenAI, Anthropic, Ollama, etc.)
* - Streaming and non-streaming responses
* - Context injection from graph database
*
* **Endpoints:**
* - `POST /v1/chat/completions` - Chat completion with RAG (OpenAI-compatible)
* - `POST /v1/embeddings` - Generate embeddings (OpenAI-compatible)
* - `GET /v1/models` - List available LLM models (OpenAI-compatible)
* - `GET /models` - List available LLM models (alias)
* - `GET /api/preambles` - List available agent preambles
* - `GET /api/preambles/:name` - Get specific preamble
* - `GET /api/tools` - List available MCP tools
* - `GET /api/models` - List models (legacy endpoint)
*
* @example
* ```typescript
* // Chat with RAG context (OpenAI-compatible)
* fetch('/v1/chat/completions', {
* method: 'POST',
* headers: { 'Content-Type': 'application/json' },
* body: JSON.stringify({
* model: 'gpt-4',
* messages: [{ role: 'user', content: 'What did we decide about auth?' }],
* stream: false
* })
* });
* ```
*
* @since 1.0.0
*/
import express from "express";
import fs from "fs/promises";
import path from "path";
import { fileURLToPath } from "url";
import type { IGraphManager } from "../types/index.js";
import { handleVectorSearchNodes } from "../tools/vectorSearch.tools.js";
import { CopilotAgentClient, LLMProvider } from "../orchestrator/llm-client.js";
import {
normalizeProvider,
fetchAvailableModels,
} from "../orchestrator/types.js";
import { consolidatedTools } from "../orchestrator/tools.js";
import { createSecureFetchOptions } from "../utils/fetch-helper.js";
// ES module equivalent of __dirname
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
/**
* Configuration for chat API
*/
interface ChatConfig {
semanticSearchEnabled: boolean;
semanticSearchLimit: number;
minSimilarityThreshold: number;
llmProvider: "openai" | "ollama" | string;
llmApiUrl: string;
defaultModel: string;
embeddingModel: string;
}
/**
* Chat message structure
*/
interface ChatMessage {
role: "user" | "assistant" | "system";
content: string;
}
/**
* Chat completion request body
*
* NOTE: Preambles/chatmodes are now handled client-side!
* Clients should fetch preamble content from GET /api/preambles/:name
* and inject it as a system message: { role: 'system', content: preambleContent }
*/
interface ChatCompletionRequest {
messages: ChatMessage[];
model?: string;
stream?: boolean;
enable_tools?: boolean; // Enable MCP tool calling (default: true)
tools?: string[]; // Specific tools to enable (optional)
max_tool_calls?: number; // Max tool calls per response (default: 3)
working_directory?: string; // Working directory for tool execution (VSCode workspace path)
tool_parameters?: {
vector_search_nodes?: {
limit?: number; // Max results (default: 10)
min_similarity?: number; // Similarity threshold 0-1 (default: 0.75)
depth?: number; // Graph traversal depth 1-3 (default: 1)
types?: string[]; // Filter by node types
};
memory_edge?: {
depth?: number; // Subgraph traversal depth (default: 1)
};
};
}
/**
* Default configuration
*
* Provider Switching:
* - Set MIMIR_DEFAULT_PROVIDER to:
* - 'ollama' (Native Ollama API - uses /api/chat endpoint)
* - 'openai', 'copilot', or 'llama.cpp' (OpenAI-compatible - uses /v1/chat/completions endpoint)
* - Configure MIMIR_LLM_API for the base URL (e.g., http://ollama:11434, http://copilot-api:4141, http://llama-server:11434)
*
* Provider aliases are normalized automatically:
* llama.cpp → openai (OpenAI-compatible)
* copilot → openai (OpenAI-compatible)
*/
const DEFAULT_CONFIG: ChatConfig = {
semanticSearchEnabled: true,
semanticSearchLimit: 10,
minSimilarityThreshold: 0.75,
llmProvider: normalizeProvider(
process.env.MIMIR_DEFAULT_PROVIDER || "ollama"
).toString(),
// Base URL only - LangChain clients add their own paths
llmApiUrl: process.env.MIMIR_LLM_API || "http://ollama:11434",
defaultModel: process.env.MIMIR_DEFAULT_MODEL || "qwen3:4b",
embeddingModel: process.env.MIMIR_EMBEDDINGS_MODEL || "mxbai-embed-large",
};
/**
* Get available preamble files
*/
async function getAvailablePreambles(): Promise<
{ name: string; filename: string; displayName: string }[]
> {
const preambleDir = "./docs/agents";
try {
const files = await fs.readdir(preambleDir);
const preambles = files
.filter((f) => f.startsWith("claudette-") && f.endsWith(".md"))
.map((filename) => {
const name = filename.replace("claudette-", "").replace(".md", "");
const displayName = name
.split("-")
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
.join(" ");
return { name, filename, displayName };
})
.sort((a, b) => a.displayName.localeCompare(b.displayName));
return preambles;
} catch (error) {
console.warn("⚠️ Could not read preambles directory:", error);
return [];
}
}
/**
* Load preamble by name (e.g., 'mimir-v2', 'debug', 'research')
*/
async function loadPreamble(preambleName?: string): Promise<string> {
const defaultPreamble = "mimir-v2";
const name = preambleName || defaultPreamble;
const preamblePath = `./docs/agents/claudette-${name}.md`;
try {
const content = await fs.readFile(preamblePath, "utf-8");
console.log(`✅ Loaded preamble: ${name} from ${preamblePath}`);
return content;
} catch (error) {
console.warn(`⚠️ Could not load preamble: ${name}, using fallback`);
// Fallback preamble
return `# Claudette Agent v5.2.1
You are an autonomous AI assistant that helps users accomplish their goals by:
- Providing accurate, relevant information
- Breaking down complex tasks into manageable steps
- Using context from the knowledge base when available
- Being concise, clear, and helpful
Always prioritize user needs and provide practical solutions.`;
}
}
/**
* Create chat API router (OpenAI-compatible)
*/
export function createChatRouter(graphManager: IGraphManager): express.Router {
const router = express.Router();
const config = { ...DEFAULT_CONFIG };
let claudettePreamble = "";
// Load default preamble on startup
loadPreamble("mimir-v2").then((preamble) => {
claudettePreamble = preamble;
});
/**
* GET /api/preambles - List available agent preambles
* @returns JSON with preambles array
* @example fetch('/api/preambles').then(r => r.json());
*/
router.get("/api/preambles", async (req: any, res: any) => {
try {
const preambles = await getAvailablePreambles();
res.json({ preambles });
} catch (error: any) {
console.error("Error listing preambles:", error);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/preambles/:name - Get preamble content
* @param name - Preamble name
* @returns Markdown content
* @example fetch('/api/preambles/claudette-auto').then(r => r.text());
*/
router.get("/api/preambles/:name", async (req: any, res: any) => {
try {
const { name } = req.params;
const content = await loadPreamble(name);
res.type("text/markdown").send(content);
} catch (error: any) {
console.error(`Error loading preamble '${name}':`, error);
res.status(404).json({ error: `Preamble '${name}' not found` });
}
});
/**
* GET /api/tools - List available MCP tools
* @returns JSON with tools array
* @example fetch('/api/tools').then(r => r.json());
*/
router.get("/api/tools", async (req: any, res: any) => {
try {
// Return tool names and descriptions from consolidatedTools
const tools = consolidatedTools.map((tool) => ({
name: tool.name,
description: tool.description,
category:
tool.name.startsWith("memory_") ||
tool.name === "todo" ||
tool.name === "todo_list"
? "mcp"
: "filesystem",
}));
res.json({
tools,
count: tools.length,
description:
"Available tools for agents (consolidated API - 14 tools: 8 filesystem + 6 MCP)",
});
} catch (error: any) {
console.error("Error listing tools:", error);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/models - List available LLM models
* @returns JSON with models array
* @example fetch('/api/models').then(r => r.json());
*/
router.get("/api/models", async (req: any, res: any) => {
try {
const models = await fetchAvailableModels(config.llmApiUrl);
res.json({
models: models.map((m) => ({
id: m.id,
owned_by: m.owned_by || "unknown",
object: m.object || "model",
})),
count: models.length,
provider: config.llmProvider,
description: `Available models from configured LLM provider (${config.llmProvider})`,
});
} catch (error: any) {
console.error("Error listing models:", error);
res.status(500).json({ error: error.message });
}
});
/**
* POST /v1/chat/completions
* OpenAI-compatible RAG-enhanced chat completion with streaming & MCP tool support
*
* **Provider Switching:**
* Configure via environment variables:
* - MIMIR_DEFAULT_PROVIDER → 'openai' (OpenAI-compatible endpoint) or 'ollama' (local Ollama)
* - MIMIR_LLM_API → Base URL for the LLM endpoint (e.g., http://copilot-api:4141)
* - MIMIR_DEFAULT_MODEL → Model name (e.g., gpt-4o for OpenAI, qwen2.5-coder for Ollama)
* - MIMIR_EMBEDDINGS_MODEL → Embedding model (default: mxbai-embed-large)
*
* **MCP Tools:**
* All providers support full MCP tool calling through LangChain agents.
* Tools are automatically loaded from src/orchestrator/tools.ts
* Enable/disable with enable_tools parameter (default: true)
*
* **Examples:**
* ```bash
* # Use OpenAI-compatible endpoint (copilot-api)
* MIMIR_DEFAULT_PROVIDER=openai MIMIR_LLM_API=http://copilot-api:4141 MIMIR_DEFAULT_MODEL=gpt-4o
*
* # Use local Ollama
* MIMIR_DEFAULT_PROVIDER=ollama MIMIR_LLM_API=http://localhost:11434 MIMIR_DEFAULT_MODEL=qwen2.5-coder
*
* # Use actual OpenAI API
* MIMIR_DEFAULT_PROVIDER=openai MIMIR_LLM_API=https://api.openai.com MIMIR_DEFAULT_MODEL=gpt-4-turbo
* ```
*/
router.post("/v1/chat/completions", async (req: any, res: any) => {
try {
const body: ChatCompletionRequest = req.body;
const {
messages,
model,
stream = true,
enable_tools = true,
tools: requestedTools,
max_tool_calls = 3,
working_directory,
tool_parameters,
} = body;
if (!messages || messages.length === 0) {
return res.status(400).json({ error: "No messages provided" });
}
// Get the latest user message for RAG search
const lastUserMessage = [...messages]
.reverse()
.find((m) => m.role === "user");
const userMessage = lastUserMessage?.content || "";
if (!userMessage) {
return res.status(400).json({ error: "No user message found" });
}
console.log(`\n💬 Chat request: ${userMessage.substring(0, 100)}...`);
console.log(
`📨 Incoming messages: ${messages.length} (${messages
.map((m) => m.role)
.join(", ")})`
);
// Check if system prompt provided
const hasSystemPrompt = messages.some((m) => m.role === "system");
console.log(
`🎭 System prompt: ${
hasSystemPrompt
? "Provided by client"
: "⚠️ None (client should send preamble as system message)"
}`
);
if (enable_tools) {
console.log(`🔧 Tools enabled (max calls: ${max_tool_calls})`);
if (tool_parameters) {
console.log(
`⚙️ Tool parameters:`,
JSON.stringify(tool_parameters, null, 2)
);
if (tool_parameters.vector_search_nodes?.depth) {
console.log(
` 📊 Vector search depth: ${tool_parameters.vector_search_nodes.depth} (multi-hop enabled)`
);
}
}
}
// Get model from request or use default
// Note: Do NOT split on '.' as gpt-4.1 is a version number, not a provider prefix
let selectedModel = model || config.defaultModel;
// Only clean up if it has a provider prefix (e.g., 'mimir:model-name')
if (selectedModel.startsWith("mimir:")) {
selectedModel = selectedModel.replace("mimir:", "");
}
console.log(
`📋 Using model: ${selectedModel} ${
model ? "(from request)" : "(default)"
}`
);
// Prepare tools for agent (filter if specific tools requested)
const agentTools = enable_tools
? requestedTools
? consolidatedTools.filter((t) => requestedTools.includes(t.name))
: consolidatedTools
: []; // Empty array disables agent mode
console.log(
`🔧 Tools enabled: ${enable_tools}, count: ${agentTools.length} (consolidated API)`
);
// Set up SSE if streaming
if (stream) {
res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive");
}
// Helper to send OpenAI-compatible SSE chunks
const sendChunk = (
content: string,
finish_reason: string | null = null
) => {
if (stream) {
const chunk = {
id: `chatcmpl-${Date.now()}`,
object: "chat.completion.chunk",
created: Math.floor(Date.now() / 1000),
model: selectedModel,
choices: [
{
index: 0,
delta: finish_reason ? {} : { content },
finish_reason,
},
],
};
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
}
};
// Send initial status (as comment for debugging)
if (stream) {
res.write(`: 🔍 Retrieving relevant context...\n\n`);
}
// Perform semantic search if enabled
let relevantContext = "";
let contextCount = 0;
if (config.semanticSearchEnabled) {
try {
// Both NornicDB and Neo4j return cosine similarity (0-1 range)
// NornicDB uses 0.5 threshold (server-side embeddings are different)
// Neo4j uses 0.75 threshold (client-side mxbai embeddings)
const isNornicDB = graphManager.getIsNornicDB();
const effectiveMinSimilarity = isNornicDB
? 0.5
: config.minSimilarityThreshold;
console.log(
`🔍 Performing semantic search for: "${userMessage.substring(
0,
100
)}..."`
);
console.log(
` Min similarity: ${effectiveMinSimilarity}, Limit: ${
config.semanticSearchLimit
}${isNornicDB ? " (NornicDB)" : ""}`
);
// Use vector search tool
const searchResult = await handleVectorSearchNodes(
{
query: userMessage,
types: undefined, // search all types
limit: config.semanticSearchLimit,
min_similarity: effectiveMinSimilarity,
},
graphManager.getDriver()
);
if (
searchResult &&
searchResult.results &&
searchResult.results.length > 0
) {
const searchResults = searchResult.results;
contextCount = searchResults.length;
console.log(
`✅ Found ${contextCount} relevant documents:`,
searchResults.map(
(r: any) =>
`${r.title || r.id} (${r.similarity?.toFixed(3) || "N/A"})`
)
);
// Format context
const contextParts: string[] = [];
for (const result of searchResults) {
const sourceLabel = result.type === "memory" ? "Memory" : "File";
const quality =
result.similarity >= 0.9
? "🔥 Excellent"
: result.similarity >= 0.8
? "✅ High"
: result.similarity >= 0.75
? "📊 Good"
: "📉 Moderate";
// Get the actual content - try multiple fields
const contentText =
result.chunk_text ||
result.content ||
result.content_preview ||
result.description ||
"No content available";
// Include absolute path if available (for agent to access files directly)
const locationInfo = result.absolute_path
? `\n**Path:** ${result.absolute_path}`
: result.path
? `\n**Path:** ${result.path}`
: "";
contextParts.push(
`**${sourceLabel}:** ${
result.title || result.id
}${locationInfo}\n` +
`**Quality:** ${quality} (score: ${result.similarity.toFixed(
3
)})\n` +
`**Content:**\n\`\`\`\n${contentText}\n\`\`\`\n\n---\n\n`
);
}
relevantContext = contextParts.join("");
if (stream) {
res.write(`: ✅ Found ${contextCount} relevant document(s)\n\n`);
}
} else {
console.log("ℹ️ No relevant context found");
if (stream) {
res.write(`: ℹ️ No relevant context found\n\n`);
}
}
} catch (searchError: any) {
console.error("⚠️ Semantic search failed:", searchError);
if (stream) {
res.write(`: ⚠️ Search failed: ${searchError.message}\n\n`);
}
}
}
// Build context section
let contextSection = "";
if (relevantContext) {
console.log(`📝 Context length: ${relevantContext.length} characters`);
console.log(
`📝 Context preview (first 500 chars):\n${relevantContext.substring(
0,
500
)}...`
);
contextSection = `
## RELEVANT CONTEXT FROM KNOWLEDGE BASE
The following context was retrieved from the Mimir knowledge base based on semantic similarity to your request:
${relevantContext}
---
`;
} else {
console.log("⚠️ No context to inject - relevantContext is empty");
}
// Build message array - use incoming messages or construct new ones
let chatMessages: ChatMessage[];
if (hasSystemPrompt) {
// User provided system prompt - use their messages as-is
chatMessages = [...messages];
// If we have RAG context, inject it before the last user message
if (contextSection && relevantContext) {
const lastUserIdx = chatMessages
.map((m) => m.role)
.lastIndexOf("user");
if (lastUserIdx !== -1) {
chatMessages.splice(lastUserIdx, 0, {
role: "user",
content: `## RELEVANT CONTEXT FROM KNOWLEDGE BASE\n\n${relevantContext}`,
});
}
}
} else {
// No system prompt provided - will use Claudette preamble via agent
chatMessages = [...messages];
}
console.log(
`📋 Message count: ${messages.length} (${messages
.map((m) => m.role)
.join(", ")})`
);
// Determine provider from config (with alias support)
let provider: LLMProvider;
let baseUrl: string;
const normalizedProvider = normalizeProvider(config.llmProvider);
if (normalizedProvider === LLMProvider.OLLAMA) {
provider = LLMProvider.OLLAMA;
} else {
// OpenAI-compatible endpoint (copilot-api proxy or openai direct)
provider = LLMProvider.OPENAI;
}
// ALWAYS use ONLY base URL - LangChain clients add their own paths
// Ollama client adds /api/chat internally
// OpenAI client adds /v1/chat/completions internally
baseUrl = process.env.MIMIR_LLM_API || "http://ollama:11434";
const providerDisplay =
provider === LLMProvider.OLLAMA
? "Ollama (native)"
: "OpenAI-compatible (Copilot/OpenAI/llama.cpp)";
console.log(
`🤖 Using provider: ${providerDisplay}, model: ${selectedModel}, base: ${baseUrl}`
);
// Build task for agent - include RAG context and conversation history
let task = "";
// Add RAG context if available
if (contextSection && relevantContext) {
task += contextSection + "\n\n";
}
// Add conversation history (user/assistant messages)
const conversationParts: string[] = [];
for (const msg of messages) {
if (msg.role === "user") {
conversationParts.push(`User: ${msg.content}`);
} else if (msg.role === "assistant") {
conversationParts.push(`Assistant: ${msg.content}`);
}
}
task += conversationParts.join("\n\n");
console.log(`📋 Task length: ${task.length} characters`);
// Initialize agent with appropriate preamble
// Note: CopilotAgentClient expects both copilotBaseUrl and openaiBaseUrl for OpenAI-compatible endpoints
const agent = new CopilotAgentClient({
preamblePath: "", // Will load content directly
model: selectedModel,
provider,
copilotBaseUrl: provider === LLMProvider.OPENAI ? baseUrl : undefined, // Used for OpenAI-compatible endpoints (includes copilot-api, OpenAI)
ollamaBaseUrl: provider === LLMProvider.OLLAMA ? baseUrl : undefined, // Used for local Ollama or llama.cpp
tools: agentTools, // Use filtered/enabled tools
temperature: 0.7,
});
// Get system prompt from messages (client is responsible for providing preamble)
const systemMessage = messages.find((m) => m.role === "system");
const systemContent =
systemMessage?.content ||
"You are a helpful AI assistant with access to a graph-based knowledge system.";
if (!systemMessage) {
console.warn(
"⚠️ No system message provided by client. Using minimal default. Consider fetching preamble from /api/preambles/:name"
);
}
await agent.loadPreamble(systemContent, true); // true = load as content, not file path
if (stream) {
const providerDisplay =
provider === LLMProvider.OLLAMA ? "Ollama" : "OpenAI-compatible";
res.write(
`: 🤖 Processing with ${selectedModel} (${providerDisplay})...\n\n`
);
}
// Execute agent
console.log(`🚀 Executing agent...`);
if (working_directory) {
console.log(`📁 Working directory: ${working_directory}`);
}
const result = await agent.execute(
task,
0,
max_tool_calls,
undefined,
working_directory
);
// Stream response in OpenAI-compatible format
if (stream) {
// Split output into chunks for streaming effect
const output = result.output;
const chunkSize = 50; // characters per chunk
for (let i = 0; i < output.length; i += chunkSize) {
const chunk = output.slice(i, Math.min(i + chunkSize, output.length));
sendChunk(chunk, null);
}
// Send finish
sendChunk("", "stop");
res.write(
`: ✅ Response complete (${result.toolCalls} tool calls)\n\n`
);
res.write("data: [DONE]\n\n");
res.end();
} else {
// Non-streaming response
res.json({
id: `chatcmpl-${Date.now()}`,
object: "chat.completion",
created: Math.floor(Date.now() / 1000),
model: selectedModel,
choices: [
{
index: 0,
message: {
role: "assistant",
content: result.output,
},
finish_reason: "stop",
},
],
usage: {
prompt_tokens: result.tokens.input,
completion_tokens: result.tokens.output,
total_tokens: result.tokens.input + result.tokens.output,
},
});
}
} catch (error: any) {
console.error("❌ Chat completion error:", error);
// Check if this is a recursion limit error
const isRecursionError =
error.message?.includes("Recursion limit") ||
error.lc_error_code === "GRAPH_RECURSION_LIMIT";
let userMessage = error.message;
let errorType = "Chat completion failed";
if (isRecursionError) {
errorType = "Task too complex";
userMessage =
"I'm sorry, but this task is too complex for me to complete in one go. " +
"The task exceeded the maximum number of steps I can take. " +
"Please try breaking it down into smaller, more focused subtasks, or " +
"increase the MIMIR_AGENT_RECURSION_LIMIT environment variable if you need more steps.";
console.error(
"💡 Suggestion: Break task into smaller subtasks or increase MIMIR_AGENT_RECURSION_LIMIT"
);
}
if (res.headersSent) {
res.write(
`event: error\ndata: ${JSON.stringify({
error: userMessage,
type: errorType,
})}\n\n`
);
res.end();
} else {
res.status(isRecursionError ? 400 : 500).json({
error: errorType,
details: userMessage,
suggestion: isRecursionError
? "Break task into smaller subtasks or increase recursion limit"
: undefined,
});
}
}
});
/**
* POST /v1/embeddings - Generate text embeddings
* @example
* fetch('/v1/embeddings', {
* method: 'POST',
* body: JSON.stringify({ input: 'text' })
* }).then(r => r.json());
*/
router.post("/v1/embeddings", async (req: any, res: any) => {
try {
const { input, model = config.embeddingModel } = req.body;
if (!input) {
return res.status(400).json({
error: {
message: "Input is required",
type: "invalid_request_error",
param: "input",
code: null,
},
});
}
console.log(`🔢 Embeddings request for model: ${model}`);
// Normalize input to array
const inputs = Array.isArray(input) ? input : [input];
// Use split URL configuration for embeddings
const baseUrl =
process.env.MIMIR_EMBEDDINGS_API || "http://llama-server:11434";
const embeddingsPath =
process.env.MIMIR_EMBEDDINGS_API_PATH || "/v1/embeddings";
const embeddingsUrl = `${baseUrl}${embeddingsPath}`;
console.log(`🔗 Embeddings URL: ${embeddingsUrl}`);
const embeddings: number[][] = [];
for (const text of inputs) {
// Configure fetch options with SSL handling and authentication
const headers: Record<string, string> = {
"Content-Type": "application/json",
};
// Add authorization header if API key is configured
if (process.env.MIMIR_EMBEDDINGS_API_KEY) {
headers[
"Authorization"
] = `Bearer ${process.env.MIMIR_EMBEDDINGS_API_KEY}`;
}
const fetchOptions = createSecureFetchOptions(embeddingsUrl, {
method: "POST",
headers,
body: JSON.stringify({
model,
input: text, // OpenAI-compatible format
}),
});
const response = await fetch(embeddingsUrl, fetchOptions);
if (!response.ok) {
const errorText = await response.text();
throw new Error(
`Embeddings error: ${response.status} - ${errorText}`
);
}
const data = (await response.json()) as any;
// Handle both OpenAI-compatible and Ollama native response formats
let embedding: number[];
if (data.data && Array.isArray(data.data) && data.data[0]?.embedding) {
// OpenAI-compatible format: { data: [{ embedding: [...] }] }
const embeddingData = data.data[0].embedding;
// Handle space-separated string (llama.cpp format)
embedding =
typeof embeddingData === "string"
? embeddingData.split(" ").map(parseFloat)
: embeddingData;
} else if (data.embedding) {
// Ollama native format: { embedding: [...] }
embedding = data.embedding;
} else {
throw new Error(
`Unexpected embeddings response format: ${JSON.stringify(
data
).substring(0, 200)}`
);
}
embeddings.push(embedding);
}
// Return OpenAI-compatible response
res.json({
object: "list",
data: embeddings.map((embedding, index) => ({
object: "embedding",
embedding,
index,
})),
model,
usage: {
prompt_tokens: inputs.reduce(
(sum: number, text: string) => sum + text.length / 4,
0
), // Rough estimate
total_tokens: inputs.reduce(
(sum: number, text: string) => sum + text.length / 4,
0
),
},
});
console.log(`✅ Embeddings generated: ${embeddings.length} vectors`);
} catch (error: any) {
console.error("❌ Embeddings error:", error);
res.status(500).json({
error: {
message: error.message || "Failed to generate embeddings",
type: "api_error",
param: null,
code: null,
},
});
}
});
/**
* Shared handler for models endpoints
* Proxies to configured chat provider for both /models and /v1/models
*/
const handleModelsRequest = async (req: any, res: any) => {
try {
// Simple concatenation: base URL + models path
const baseUrl = process.env.MIMIR_LLM_API || "http://localhost:11434";
const modelsPath = process.env.MIMIR_LLM_API_MODELS_PATH || "/v1/models";
const modelsUrl = `${baseUrl}${modelsPath}`;
console.log(
`🔗 Proxying ${req.path} request to chat provider: ${modelsUrl}`
);
// Configure fetch options with SSL handling and authentication
const headers: Record<string, string> = {
Accept: "application/json",
};
// Add authorization header if API key is configured
if (process.env.MIMIR_LLM_API_KEY) {
headers["Authorization"] = `Bearer ${process.env.MIMIR_LLM_API_KEY}`;
}
const fetchOptions = createSecureFetchOptions(modelsUrl, {
method: "GET",
headers,
});
if (
modelsUrl.startsWith("https://") &&
process.env.NODE_TLS_REJECT_UNAUTHORIZED === "0"
) {
console.log(
`🔍 SSL verification disabled for proxy request (NODE_TLS_REJECT_UNAUTHORIZED=0)`
);
}
const response = await fetch(modelsUrl, fetchOptions);
if (!response.ok) {
throw new Error(
`Provider returned ${response.status}: ${response.statusText}`
);
}
const data = await response.json();
res.json(data);
} catch (error: any) {
console.error(
"❌ Error fetching models from chat provider:",
error.message
);
// Fallback to static models list
res.json({
object: "list",
data: [
{
id: config.defaultModel,
object: "model",
created: Date.now(),
owned_by: "mimir",
},
{
id: config.embeddingModel,
object: "model",
created: Date.now(),
owned_by: "mimir",
},
],
});
}
};
/**
* GET /v1/models - List models (OpenAI-compatible)
* @example fetch('/v1/models').then(r => r.json());
*/
router.get("/v1/models", handleModelsRequest);
/**
* GET /models
* Models list - proxies to configured chat provider (same as /v1/models)
*/
router.get("/models", handleModelsRequest);
return router;
}