Mistral MCP Server

import { z } from "zod"; import { zodToJsonSchema } from "zod-to-json-schema"; const VISION_MODELS = ["pixtral-large-latest", "pixtral-12b-2409"] as const; const TEXT_MODELS = ["mistral-large-latest", "mistral-small-latest"] as const; const TextChunk = z.object({ type: z.literal("text"), text: z.string(), }); const ImageUrl = z.object({ url: z.string(), detail: z.string().nullable().optional(), }); const ImageUrlChunk = z.object({ type: z.literal("image_url"), imageUrl: z.union([z.string(), ImageUrl]), }).describe( `An image URL to be displayed in the chat. Only valid with vision models: ${ VISION_MODELS.join(", ") }`, ); const SystemMessage = z.object({ role: z.literal("system"), content: z.union([ z.string(), z.array(TextChunk).nonempty(), ]), }).describe( "A system message is an optional message that sets the behavior and context for Mistral in a conversation, such as modifying its personality or providing specific instructions. It is always text-only.", ); const TextModelUserMessage = z.object({ role: z.literal("user"), content: z.union([ z.string(), z.array(TextChunk).nonempty(), ]), }).describe("User message for text-only models - no image content allowed"); const TextModelAssistantMessage = z.object({ role: z.literal("assistant"), content: z.union([ z.string(), z.array(TextChunk).nonempty(), ]), }).describe( "Assistant message for text-only models - no image content allowed", ); const VisionModelUserMessage = z.object({ role: z.literal("user"), content: z.union([ z.string(), z.array(z.union([TextChunk, ImageUrlChunk])).nonempty(), ]), }).describe( "User message for vision models - can include text, images, or both", ); const VisionModelAssistantMessage = z.object({ role: z.literal("assistant"), content: z.union([ z.string(), z.array(z.union([TextChunk, ImageUrlChunk])).nonempty(), ]), }).describe( "Assistant message for vision models - can include text, images, or both", ); export const TextModelRequest = z.object({ model: z.enum(TEXT_MODELS), messages: z.array( z.discriminatedUnion("role", [ SystemMessage, TextModelUserMessage, TextModelAssistantMessage, ]), ).nonempty().refine( (messages) => { return messages.every((msg) => { if (typeof msg.content === "string") return true; return msg.content.every((chunk) => chunk.type === "text"); }); }, "Text-only models cannot process image content in any message", ), }).describe("Request for text-only models"); export const VisionModelRequest = z.object({ model: z.enum(VISION_MODELS), messages: z.array( z.discriminatedUnion("role", [ SystemMessage, VisionModelUserMessage, VisionModelAssistantMessage, ]), ).nonempty(), }).describe("Request for vision-capable models"); const ChatCompletionRequestSchema = z.discriminatedUnion("model", [ TextModelRequest, VisionModelRequest, ]); export type ChatCompletionRequest = z.infer<typeof ChatCompletionRequestSchema>; export const TextChatCompletionRequestJsonSchema = zodToJsonSchema( TextModelRequest, { $refStrategy: "none", }, ); export const VisionChatCompletionRequestJsonSchema = zodToJsonSchema( VisionModelRequest, { $refStrategy: "none", }, );