chat_completion
Generate chat responses using DeepSeek V4 models with support for multi-turn conversations, thinking modes, and customizable parameters for tailored interactions.
Instructions
Primary DeepSeek V4 chat tool for single-turn and multi-turn generation. Defaults to deepseek-v4-flash; use deepseek-v4-pro for higher-capability reasoning. Provide either message (simple single user turn) or messages (full chat history); if both are provided, messages is used. Thinking mode is enabled by DeepSeek by default; pass thinking:{type:"disabled"} for non-thinking mode, and use reasoning_effort:"high"|"max" when thinking is enabled. Use conversation_id to persist context across calls and clear_conversation=true to reset stored state before sending the next turn. Set include_raw_response=true only for debugging because it returns the full provider payload.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| message | No | ||
| messages | No | ||
| model | No | deepseek-v4-flash | |
| conversation_id | No | ||
| clear_conversation | No | ||
| frequency_penalty | No | ||
| max_tokens | No | ||
| presence_penalty | No | ||
| response_format | No | ||
| stop | No | ||
| stream | No | ||
| stream_options | No | ||
| temperature | No | ||
| top_p | No | ||
| tools | No | ||
| tool_choice | No | ||
| logprobs | No | ||
| top_logprobs | No | ||
| thinking | No | ||
| reasoning_effort | No | ||
| include_raw_response | No | ||
| extra_body | No |
Implementation Reference
- src/mcp-server.ts:225-291 (handler)The main handler function for the chat_completion tool. Normalizes input messages, manages conversation history (retrieve/clear/save), builds the API request, calls createChatCompletion on the DeepSeek client, and returns a structured response with text, reasoning, tool calls, usage, and optionally raw response.
async (input) => { try { const normalizedInput = input as ChatCompletionToolInput; const conversationId = normalizedInput.conversation_id; if (conversationId && normalizedInput.clear_conversation) { options.conversations.clear(conversationId); } const newMessages = normalizeInputMessages(normalizedInput); const existingHistory = conversationId ? options.conversations.get(conversationId) : []; const outboundMessages = conversationId ? [...existingHistory, ...newMessages] : newMessages; const request = buildChatCompletionRequest(normalizedInput, outboundMessages, options.defaultModel); const result = await options.client.createChatCompletion(request); const choice = result.response.choices[0]; const assistantMessage = choice?.message; if (conversationId && assistantMessage) { options.conversations.set(conversationId, [ ...outboundMessages, { role: "assistant", content: assistantMessage.content, reasoning_content: assistantMessage.reasoning_content, tool_calls: assistantMessage.tool_calls, }, ]); } const responseText = assistantMessage?.content ?? ""; const reasoning = assistantMessage?.reasoning_content; const toolCalls = assistantMessage?.tool_calls ?? []; const includeRawResponse = normalizedInput.include_raw_response; const summary = [ responseText || "(no assistant content returned)", reasoning ? "\nReasoning:\n" + reasoning : undefined, toolCalls.length > 0 ? "\nTool calls returned by model: " + JSON.stringify(toolCalls, null, 2) : undefined, ] .filter(Boolean) .join("\n"); const structuredContent: Record<string, unknown> = { model: result.response.model, conversation_id: conversationId ?? null, response_text: responseText, reasoning_content: reasoning ?? null, tool_calls: toolCalls, finish_reason: choice?.finish_reason ?? null, usage: result.response.usage ?? null, stream_chunk_count: result.streamChunkCount ?? null, }; if (includeRawResponse) { structuredContent.raw_response = result.response; } return { content: [{ type: "text", text: summary }], structuredContent, }; } catch (error) { return makeToolErrorResult(error); } }, - src/deepseek/schemas.ts:61-105 (schema)Zod input schema for the chat_completion tool. Defines all accepted parameters: message/messages, model, conversation_id, clear_conversation, frequency_penalty, max_tokens, presence_penalty, response_format, stop, stream, stream_options, temperature, top_p, tools, tool_choice, logprobs, top_logprobs, thinking, reasoning_effort, include_raw_response, extra_body. Includes superRefine validation.
export const chatCompletionToolInputSchema = z .object({ message: z.string().min(1).optional(), messages: z.array(chatMessageSchema).min(1).optional(), model: z.string().default("deepseek-v4-flash"), conversation_id: z.string().min(1).optional(), clear_conversation: z.boolean().default(false), frequency_penalty: z.number().min(-2).max(2).optional(), max_tokens: z.number().int().positive().optional(), presence_penalty: z.number().min(-2).max(2).optional(), response_format: z .object({ type: z.enum(["text", "json_object"]), }) .passthrough() .optional(), stop: stopSchema.optional(), stream: z.boolean().default(false), stream_options: streamOptionsSchema.optional(), temperature: z.number().min(0).max(2).optional(), top_p: z.number().min(0).max(1).optional(), tools: z.array(toolDefinitionSchema).optional(), tool_choice: toolChoiceSchema.optional(), logprobs: z.boolean().optional(), top_logprobs: z.number().int().min(0).max(20).optional(), thinking: thinkingSchema.optional(), reasoning_effort: z.enum(["high", "max"]).optional(), include_raw_response: z.boolean().default(false), extra_body: z.record(z.string(), z.unknown()).optional(), }) .superRefine((value, context) => { if (!value.message && !value.messages) { context.addIssue({ code: z.ZodIssueCode.custom, message: "Either `message` or `messages` must be provided", }); } if (value.top_logprobs !== undefined && !value.logprobs) { context.addIssue({ code: z.ZodIssueCode.custom, message: "`top_logprobs` requires `logprobs=true`", }); } }); - src/mcp-server.ts:217-292 (registration)Registration of the 'chat_completion' tool via server.registerTool(). Binds the tool name, description, input schema, and async handler function.
function registerTools(server: McpServer, options: DeepSeekMcpServerOptions): void { server.registerTool( "chat_completion", { description: "Primary DeepSeek V4 chat tool for single-turn and multi-turn generation. Defaults to `deepseek-v4-flash`; use `deepseek-v4-pro` for higher-capability reasoning. Provide either `message` (simple single user turn) or `messages` (full chat history); if both are provided, `messages` is used. Thinking mode is enabled by DeepSeek by default; pass `thinking:{type:\"disabled\"}` for non-thinking mode, and use `reasoning_effort:\"high\"|\"max\"` when thinking is enabled. Use `conversation_id` to persist context across calls and `clear_conversation=true` to reset stored state before sending the next turn. Set `include_raw_response=true` only for debugging because it returns the full provider payload.", inputSchema: chatCompletionToolInputSchema, }, async (input) => { try { const normalizedInput = input as ChatCompletionToolInput; const conversationId = normalizedInput.conversation_id; if (conversationId && normalizedInput.clear_conversation) { options.conversations.clear(conversationId); } const newMessages = normalizeInputMessages(normalizedInput); const existingHistory = conversationId ? options.conversations.get(conversationId) : []; const outboundMessages = conversationId ? [...existingHistory, ...newMessages] : newMessages; const request = buildChatCompletionRequest(normalizedInput, outboundMessages, options.defaultModel); const result = await options.client.createChatCompletion(request); const choice = result.response.choices[0]; const assistantMessage = choice?.message; if (conversationId && assistantMessage) { options.conversations.set(conversationId, [ ...outboundMessages, { role: "assistant", content: assistantMessage.content, reasoning_content: assistantMessage.reasoning_content, tool_calls: assistantMessage.tool_calls, }, ]); } const responseText = assistantMessage?.content ?? ""; const reasoning = assistantMessage?.reasoning_content; const toolCalls = assistantMessage?.tool_calls ?? []; const includeRawResponse = normalizedInput.include_raw_response; const summary = [ responseText || "(no assistant content returned)", reasoning ? "\nReasoning:\n" + reasoning : undefined, toolCalls.length > 0 ? "\nTool calls returned by model: " + JSON.stringify(toolCalls, null, 2) : undefined, ] .filter(Boolean) .join("\n"); const structuredContent: Record<string, unknown> = { model: result.response.model, conversation_id: conversationId ?? null, response_text: responseText, reasoning_content: reasoning ?? null, tool_calls: toolCalls, finish_reason: choice?.finish_reason ?? null, usage: result.response.usage ?? null, stream_chunk_count: result.streamChunkCount ?? null, }; if (includeRawResponse) { structuredContent.raw_response = result.response; } return { content: [{ type: "text", text: summary }], structuredContent, }; } catch (error) { return makeToolErrorResult(error); } }, ); - src/mcp-server.ts:449-459 (helper)Helper function normalizeInputMessages() that converts a ChatCompletionToolInput into an array of DeepSeekChatMessage. If 'messages' array is provided, it's used directly; otherwise a single 'message' string is wrapped as a user message.
function normalizeInputMessages(input: ChatCompletionToolInput): DeepSeekChatMessage[] { if (input.messages && input.messages.length > 0) { return input.messages as DeepSeekChatMessage[]; } if (input.message) { return [{ role: "user", content: input.message }]; } throw new Error("Either `message` or `messages` must be provided"); } - src/mcp-server.ts:461-502 (helper)Helper function buildChatCompletionRequest() that constructs a DeepSeekChatCompletionRequest from tool input and conversation messages. Sets model, messages, and optional fields (frequency_penalty, max_tokens, temperature, tools, thinking, etc.), plus extra_body passthrough.
function buildChatCompletionRequest( input: ChatCompletionToolInput, messages: DeepSeekChatMessage[], defaultModel: string, ): DeepSeekChatCompletionRequest { const request: DeepSeekChatCompletionRequest = { model: input.model ?? defaultModel, messages, }; const optionalFields: (keyof ChatCompletionToolInput)[] = [ "frequency_penalty", "max_tokens", "presence_penalty", "response_format", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "logprobs", "top_logprobs", "thinking", "reasoning_effort", ]; const requestRecord = request as Record<string, unknown>; for (const field of optionalFields) { const value = input[field]; if (value !== undefined) { requestRecord[field] = value; } } if (input.extra_body) { Object.assign(request, input.extra_body); } return request; }