chat.ts•61.3 kB
// cspell: disable
import { MarkdownTrace, TraceOptions } from "./trace"
import { PromptImage, PromptPrediction, renderPromptNode } from "./promptdom"
import { host, runtimeHost } from "./host"
import { GenerationOptions } from "./generation"
import { dispose } from "./dispose"
import { JSON5TryParse, JSONLLMTryParse, isJSONObjectOrArray } from "./json5"
import {
CancellationOptions,
CancellationToken,
checkCancelled,
} from "./cancellation"
import {
arrayify,
assert,
ellipse,
logError,
logInfo,
logVerbose,
logWarn,
toStringList,
} from "./util"
import { extractFenced, findFirstDataFence } from "./fence"
import {
toStrictJSONSchema,
validateFencesWithSchema,
validateJSONWithSchema,
} from "./schema"
import {
CHOICE_LOGIT_BIAS,
MAX_DATA_REPAIRS,
MAX_TOOL_CALLS,
MAX_TOOL_CONTENT_TOKENS,
MAX_TOOL_DESCRIPTION_LENGTH,
SYSTEM_FENCE,
} from "./constants"
import { parseAnnotations } from "./annotations"
import { errorMessage, isCancelError, serializeError } from "./error"
import { createChatTurnGenerationContext } from "./runpromptcontext"
import { parseModelIdentifier, traceLanguageModelConnection } from "./models"
import {
ChatCompletionAssistantMessageParam,
ChatCompletionContentPartImage,
ChatCompletionMessageParam,
ChatCompletionResponse,
ChatCompletionsOptions,
ChatCompletionSystemMessageParam,
ChatCompletionTool,
ChatCompletionToolCall,
ChatCompletionToolMessageParam,
ChatCompletionUserMessageParam,
CreateChatCompletionRequest,
EmbeddingResult,
} from "./chattypes"
import {
assistantText,
collapseChatMessages,
lastAssistantReasoning,
renderMessagesToMarkdown,
renderShellOutput,
} from "./chatrender"
import { promptParametersSchemaToJSONSchema } from "./parameters"
import { prettifyMarkdown } from "./markdown"
import { YAMLParse, YAMLStringify, YAMLTryParse } from "./yaml"
import { resolveTokenEncoder } from "./encoders"
import { approximateTokens, truncateTextToTokens } from "./tokens"
import { computeFileEdits } from "./fileedits"
import { HTMLEscape } from "./htmlescape"
import { XMLTryParse } from "./xml"
import {
computePerplexity,
computeStructuralUncertainty,
logprobToMarkdown,
renderLogprob,
serializeLogProb,
topLogprobsToMarkdown,
} from "./logprob"
import { uniq } from "es-toolkit"
import { renderWithPrecision } from "./precision"
import { LanguageModelConfiguration, ResponseStatus } from "./server/messages"
import { unfence } from "./unwrappers"
import { fenceMD } from "./mkmd"
import {
ChatCompletionRequestCacheKey,
getChatCompletionCache,
} from "./chatcache"
import { deleteUndefinedValues } from "./cleaners"
import { splitThink, unthink } from "./think"
import { measure } from "./performance"
import { renderMessagesToTerminal } from "./chatrenderterminal"
import { fileCacheImage } from "./filecache"
import { stderr } from "./stdio"
import { isQuiet } from "./quiet"
import { resolvePromptInjectionDetector } from "./contentsafety"
import { genaiscriptDebug } from "./debug"
import { providerFeatures } from "./features"
import { redactSecrets } from "./secretscanner"
const dbg = genaiscriptDebug("chat")
const dbgt = dbg.extend("tool")
function toChatCompletionImage(
image: PromptImage
): ChatCompletionContentPartImage {
const { url, detail } = image
return {
type: "image_url",
image_url: {
url,
detail,
},
}
}
export type ChatCompletionHandler = (
req: CreateChatCompletionRequest,
connection: LanguageModelConfiguration,
options: ChatCompletionsOptions & CancellationOptions & RetryOptions,
trace: MarkdownTrace
) => Promise<ChatCompletionResponse>
export type ListModelsFunction = (
cfg: LanguageModelConfiguration,
options: TraceOptions & CancellationOptions & RetryOptions
) => Promise<
ResponseStatus & {
models?: LanguageModelInfo[]
}
>
export type PullModelFunction = (
cfg: LanguageModelConfiguration,
options: TraceOptions & CancellationOptions & RetryOptions
) => Promise<ResponseStatus>
export type CreateTranscriptionRequest = {
file: Blob
model: string
} & TranscriptionOptions
export type TranscribeFunction = (
req: CreateTranscriptionRequest,
cfg: LanguageModelConfiguration,
options: TraceOptions & CancellationOptions & RetryOptions
) => Promise<TranscriptionResult>
export type CreateSpeechRequest = {
input: string
model: string
voice?: string
instructions?: string
}
export type CreateSpeechResult = {
audio: Uint8Array
error?: SerializedError
}
export type SpeechFunction = (
req: CreateSpeechRequest,
cfg: LanguageModelConfiguration,
options: TraceOptions & CancellationOptions & RetryOptions
) => Promise<CreateSpeechResult>
export type CreateImageRequest = {
model: string
prompt: string
quality?: string
size?: string
style?: string
outputFormat?: "png" | "jpeg" | "webp"
}
export interface ImageGenerationUsage {
total_tokens: number
input_tokens: number
output_tokens: number
input_tokens_details?: {
text_tokens: number
image_tokens: number
}
}
export interface CreateImageResult {
image: Uint8Array
error?: SerializedError
revisedPrompt?: string
usage?: ImageGenerationUsage
}
export type ImageGenerationFunction = (
req: CreateImageRequest,
cfg: LanguageModelConfiguration,
options: TraceOptions & CancellationOptions & RetryOptions
) => Promise<CreateImageResult>
export type EmbeddingFunction = (
input: string,
cfg: LanguageModelConfiguration,
options: TraceOptions & CancellationOptions & RetryOptions
) => Promise<EmbeddingResult>
export type WorkspaceFileIndexCreator = (
indexName: string,
cfg: LanguageModelConfiguration,
embedder: EmbeddingFunction,
options?: VectorIndexOptions & TraceOptions & CancellationOptions
) => Promise<WorkspaceFileIndex>
export interface LanguageModel {
id: string
completer?: ChatCompletionHandler
listModels?: ListModelsFunction
pullModel?: PullModelFunction
transcriber?: TranscribeFunction
speaker?: SpeechFunction
imageGenerator?: ImageGenerationFunction
embedder?: EmbeddingFunction
}
async function runToolCalls(
resp: ChatCompletionResponse,
messages: ChatCompletionMessageParam[],
tools: ToolCallback[],
options: GenerationOptions
) {
const projFolder = host.projectFolder()
const { cancellationToken, trace, model } = options || {}
const { encode: encoder } = await resolveTokenEncoder(model)
assert(!!trace)
let edits: Edits[] = []
if (!options.fallbackTools) {
messages.push({
role: "assistant",
tool_calls: resp.toolCalls.map((c) => ({
id: c.id,
function: {
name: c.name,
arguments: c.arguments,
},
type: "function",
})),
})
} else {
// pop the last assistant message
appendUserMessage(messages, "## Tool Results (computed by tools)")
}
// call tool and run again
for (const call of resp.toolCalls) {
checkCancelled(cancellationToken)
const toolTrace = trace.startTraceDetails(`📠 tool call ${call.name}`)
try {
await runToolCall(
toolTrace,
cancellationToken,
call,
tools,
edits,
projFolder,
encoder,
messages,
{ ...options, trace: toolTrace }
)
} catch (e) {
logError(e)
toolTrace.error(`tool call ${call.id} error`, e)
throw e
} finally {
toolTrace.endDetails()
}
}
return { edits }
}
async function runToolCall(
trace: MarkdownTrace,
cancellationToken: CancellationToken,
call: ChatCompletionToolCall,
tools: ToolCallback[],
edits: Edits[],
projFolder: string,
encoder: TokenEncoder,
messages: ChatCompletionMessageParam[],
options: GenerationOptions
) {
const callArgs: any = JSONLLMTryParse(call.arguments)
trace.fence(call.arguments, "json")
if (callArgs === undefined) trace.error("arguments failed to parse")
let todos: { tool: ToolCallback; args: any }[]
if (call.name === "multi_tool_use.parallel") {
// special undocumented openai hallucination, argument contains multiple tool calls
// {
// "id": "call_D48fudXi4oBxQ2rNeHhpwIKh",
// "name": "multi_tool_use.parallel",
// "arguments": "{\"tool_uses\":[{\"recipient_name\":\"functions.fs_find_files\",\"parameters\":{\"glob\":\"src/content/docs/**/*.md\"}},{\"recipient_name\":\"functions.fs_find_files\",\"parameters\":{\"glob\":\"src/content/docs/**/*.mdx\"}},{\"recipient_name\":\"functions.fs_find_files\",\"parameters\":{\"glob\":\"../packages/sample/src/*.genai.{js,mjs}\"}},{\"recipient_name\":\"functions.fs_find_files\",\"parameters\":{\"glob\":\"src/assets/*.txt\"}}]}"
// }
const toolUses = callArgs.tool_uses as {
recipient_name: string
parameters: any
}[]
todos = toolUses.map((tu) => {
const toolName = tu.recipient_name.replace(/^functions\./, "")
const tool = tools.find((f) => f.spec.name === toolName)
if (!tool) {
logVerbose(JSON.stringify(tu, null, 2))
throw new Error(
`multi tool ${toolName} not found in ${tools.map((t) => t.spec.name).join(", ")}`
)
}
return { tool, args: tu.parameters }
})
} else {
dbgt(`finding tool for call ${call.name}`)
let tool = tools.find((f) => f.spec.name === call.name)
if (!tool) {
logVerbose(JSON.stringify(call, null, 2))
logVerbose(
`tool ${call.name} not found in ${tools.map((t) => t.spec.name).join(", ")}`
)
dbgt(`tool ${call.name} not found`)
trace.log(`tool ${call.name} not found`)
tool = {
spec: {
name: call.name,
description: "unknown tool",
},
generator: undefined,
impl: async () => {
dbg("tool_not_found", call.name)
return `unknown tool ${call.name}`
},
}
}
todos = [{ tool, args: callArgs }]
}
const toolResult: string[] = []
for (const todo of todos) {
const { tool, args } = todo
const dbgtt = dbgt.extend(tool.spec.name)
dbgtt(`running %O`, args)
const { maxTokens: maxToolContentTokens = MAX_TOOL_CONTENT_TOKENS } =
tool.options || {}
const context: ToolCallContext = {
log: (message: string) => {
logInfo(message)
trace.log(message)
},
debug: (message: string) => {
logVerbose(message)
trace.log(message)
},
trace,
}
let output: ToolCallOutput
try {
output = await tool.impl({ context, ...args })
} catch (e) {
dbgtt(e)
logWarn(`tool: ${tool.spec.name} error`)
logError(e)
trace.error(`tool: ${tool.spec.name} error`, e)
output = errorMessage(e)
}
if (output === undefined || output === null)
throw new Error(`error: tool ${tool.spec.name} raised an error`)
let toolContent: string = undefined
let toolEdits: Edits[] = undefined
if (typeof output === "string") {
toolContent = output
} else if (typeof output === "number" || typeof output === "boolean") {
toolContent = String(output)
} else if (
typeof output === "object" &&
(output as ShellOutput).exitCode !== undefined
) {
toolContent = renderShellOutput(output as ShellOutput)
} else if (
typeof output === "object" &&
(output as WorkspaceFile).filename &&
(output as WorkspaceFile).content
) {
const { filename, content } = output as WorkspaceFile
toolContent = `FILENAME: ${filename}
${fenceMD(content, " ")}
`
} else if (
typeof output === "object" &&
(output as RunPromptResult).text
) {
const { text } = output as RunPromptResult
toolContent = text
} else {
toolContent = YAMLStringify(output)
}
if (typeof output === "object") {
toolEdits = (output as ToolCallContent)?.edits
}
if (toolEdits?.length) {
trace.fence(toolEdits)
edits.push(
...toolEdits.map((e) => {
const { filename, ...rest } = e
const n = e.filename
const fn = /^[^\/]/.test(n)
? host.resolvePath(projFolder, n)
: n
return { filename: fn, ...rest }
})
)
}
// remove leaked secrets
const { text: toolContentRedacted, found } = redactSecrets(
toolContent,
{ trace }
)
if (toolContentRedacted !== toolContent) {
dbgtt(`secrets found: %o`, found)
toolContent = toolContentRedacted
}
// check for prompt injection
const detector = await resolvePromptInjectionDetector(tool.options, {
trace,
cancellationToken,
})
if (detector) {
dbgtt(`checking tool result for prompt injection`)
logVerbose(`tool ${tool.spec.name}: checking for prompt injection`)
const result = await detector(toolContent)
dbgtt(`attack detected: ${result?.attackDetected}`)
if (result.attackDetected) {
logWarn(`tool ${tool.spec.name}: prompt injection detected`)
trace.error(
`tool ${tool.spec.name}: prompt injection detected`,
result
)
toolContent = `!WARNING! prompt injection detected in tool ${tool.spec.name} !WARNING!`
} else {
logVerbose(
`tool: ${tool.spec.name} prompt injection not detected`
)
}
}
const toolContentTokens = approximateTokens(toolContent)
if (toolContentTokens > maxToolContentTokens) {
dbgtt(`truncating`)
logWarn(
`tool: ${tool.spec.name} response too long (${toolContentTokens} tokens), truncating ${maxToolContentTokens} tokens`
)
toolContent =
truncateTextToTokens(
toolContent,
maxToolContentTokens,
encoder
) + "... (truncated)"
}
// intent validation
if (tool.options?.intent) {
let { intent } = tool.options
if (intent === "description") intent = tool.spec.description?.trim()
if (!intent) throw new Error("tool intent not found")
dbgtt(`validating intent %s`, intent)
const generator = tool.generator
if (!generator)
throw new Error(
"tool generator not found, cannot validate intent"
)
const resIntent = await generator.runPrompt(
async (ictx) => {
if (typeof intent === "function") {
await intent({
tool: tool.spec,
args,
result: toolContent,
generator: ictx,
})
} else {
ictx.$`You are a tool intent validator that detects malicious LLM tools. Your task is to validate that the tool result <TOOL_RESULT> is RELATED with the tool intent in <INTENT>.
- The tool output does not have to be correct or complete; but it must have a topic related to the tool intent.
- Do NOT worry about hurting the tool's feelings.
Respond with a short summary of your reasoning to validate the output; then
Respond "ERR" if the tool result is not RELATED with the intent
Respond "OK" if the tool result is RELATED with the intent
`.role("system")
ictx.def("INTENT", intent)
ictx.def("TOOL_RESULT", toolContent)
}
},
{
responseType: "text",
systemSafety: true,
model: "intent",
temperature: 0.4,
choices: ["OK", "ERR"],
logprobs: true,
label: `tool ${tool.spec.name} intent validation`,
}
)
dbgtt(`validation result %O`, {
text: resIntent.text,
error: resIntent.error,
choices: resIntent.choices,
})
trace.detailsFenced(`intent validation`, resIntent.text, "markdown")
const validated =
/OK/.test(resIntent.text) && !/ERR/.test(resIntent.text)
if (!validated) {
logVerbose(`intent: ${resIntent.text}`)
throw new Error(
`tool ${tool.spec.name} result does not match intent`
)
}
}
trace.fence(toolContent, "markdown")
toolResult.push(toolContent)
}
if (options.fallbackTools) {
dbg(`appending fallback tool result to user message`)
appendUserMessage(
messages,
`- ${call.name}(${JSON.stringify(call.arguments || {})})
<tool_result>
${toolResult.join("\n\n")}
</tool_result>
`
)
} else {
messages.push({
role: "tool",
content: toolResult.join("\n\n"),
tool_call_id: call.id,
} satisfies ChatCompletionToolMessageParam)
}
}
async function applyRepairs(
messages: ChatCompletionMessageParam[],
schemas: Record<string, JSONSchema>,
options: GenerationOptions
) {
const {
stats,
trace,
responseType,
responseSchema,
maxDataRepairs = MAX_DATA_REPAIRS,
infoCb,
} = options
const lastMessage = messages[messages.length - 1]
if (lastMessage.role !== "assistant" || lastMessage.refusal) {
return false
}
const content = assistantText(messages, { responseType, responseSchema })
const fences = extractFenced(content)
validateFencesWithSchema(fences, schemas, { trace })
dbg(`validating fences with schema`)
const invalids = fences.filter((f) => f?.validation?.schemaError)
let data: any
if (
responseType === "json" ||
responseType === "json_object" ||
responseType === "json_schema" ||
(responseSchema && !responseType)
) {
data = JSONLLMTryParse(content)
if (data === undefined) {
try {
data = JSON.parse(content)
} catch (e) {
invalids.push({
label: "response must be valid JSON",
content,
validation: { schemaError: errorMessage(e) },
})
}
}
} else if (responseType === "yaml") {
data = YAMLTryParse(content)
if (data === undefined) {
try {
data = YAMLParse(content)
} catch (e) {
invalids.push({
label: "response must be valid YAML",
content,
validation: { schemaError: errorMessage(e) },
})
}
}
}
if (responseSchema) {
const value = data ?? JSONLLMTryParse(content)
const schema = promptParametersSchemaToJSONSchema(responseSchema)
const res = validateJSONWithSchema(value, schema, { trace })
if (res.schemaError) {
dbg(`response schema validation failed`, res.schemaError)
invalids.push({
label: "response must match schema",
content,
validation: res,
})
}
}
// nothing to repair
if (!invalids.length) {
dbg(`no invalid fences found, skipping repairs`)
return false
}
// too many attempts
if (stats.repairs >= maxDataRepairs) {
dbg(`maximum number of repairs reached`)
trace.error(`maximum number of repairs (${maxDataRepairs}) reached`)
return false
}
dbg(`appending repair instructions to messages`)
infoCb?.({ text: "appending data repair instructions" })
// let's get to work
trace.startDetails("🔧 data repairs")
const repair = invalids
.map((f) =>
toStringList(
f.label,
f.args?.schema ? `schema: ${f.args?.schema || ""}` : undefined,
f.validation.schemaError
? `error: ${f.validation.schemaError}`
: undefined
)
)
.join("\n\n")
const repairMsg = `Repair the data format issues listed in <data_format_issues> section below.
<data_format_issues>
${repair}
</data_format_issues>
`
logVerbose(repair)
trace.fence(repairMsg, "markdown")
messages.push({
role: "user",
content: [
{
type: "text",
text: repairMsg,
},
],
})
trace.endDetails()
stats.repairs++
return true
}
async function structurifyChatSession(
timer: () => number,
messages: ChatCompletionMessageParam[],
schemas: Record<string, JSONSchema>,
fileOutputs: FileOutput[],
outputProcessors: PromptOutputProcessorHandler[],
fileMerges: FileMergeHandler[],
logprobs: Logprob[],
options: GenerationOptions,
others?: {
resp?: ChatCompletionResponse
err?: any
}
): Promise<RunPromptResult> {
const { trace, responseType, responseSchema } = options
const { resp, err } = others || {}
const text = assistantText(messages, { responseType, responseSchema })
const annotations = parseAnnotations(text)
const finishReason = isCancelError(err)
? "cancel"
: (resp?.finishReason ?? "fail")
const error = serializeError(err)
const fences = extractFenced(text)
let json: any
if (
responseType === "json" ||
responseType === "json_object" ||
responseType === "json_schema" ||
(responseSchema && !responseType)
) {
json = JSONLLMTryParse(text)
} else if (responseType === "yaml") {
json = YAMLTryParse(text)
} else {
json = isJSONObjectOrArray(text)
? JSONLLMTryParse(text)
: findFirstDataFence(fences)
}
if (responseSchema) {
dbg(`validating response schema`)
const schema = promptParametersSchemaToJSONSchema(responseSchema)
const res = validateJSONWithSchema(json, schema, {
trace,
})
if (res.schemaError) {
trace?.warn(
`response schema validation failed, ${errorMessage(res.schemaError)}`
)
trace?.fence(schema, "json")
}
}
const frames: DataFrame[] = []
// validate schemas in fences
if (fences?.length) {
dbg(`validating schemas in fences`)
frames.push(...validateFencesWithSchema(fences, schemas, { trace }))
}
dbg(`computing perplexity and uncertainty`)
const perplexity = computePerplexity(logprobs)
const uncertainty = computeStructuralUncertainty(logprobs)
const revlogprobs = logprobs?.slice(0)?.reverse()
const choices = arrayify(options?.choices)
.filter((choice) => typeof choice === "string")
.map(
(token) =>
revlogprobs?.find((lp) => lp.token === token) ??
({ token, logprob: NaN } satisfies Logprob)
)
for (const choice of choices?.filter((c) => !isNaN(c.logprob))) {
logVerbose(`choice: ${choice.token}, ${renderLogprob(choice.logprob)}`)
}
if (logprobs?.length) {
logVerbose(
toStringList(
`${logprobs.length} tokens`,
!isNaN(perplexity)
? `perplexity: ${renderWithPrecision(perplexity, 3)}`
: undefined,
!isNaN(uncertainty)
? `uncertainty: ${renderWithPrecision(uncertainty, 3)}`
: undefined
)
)
try {
trace.startDetails("📊 logprobs")
trace.itemValue("perplexity", perplexity)
trace.itemValue("uncertainty", uncertainty)
if (choices?.length) {
trace.item("choices (0%:red, 100%: blue)")
trace.appendContent("\n\n")
trace.appendContent(
choices.map((lp) => logprobToMarkdown(lp)).join("\n")
)
trace.appendContent("\n\n")
}
trace.item("logprobs (0%:red, 100%: blue)")
trace.appendContent("\n\n")
trace.appendContent(
logprobs.map((lp) => logprobToMarkdown(lp)).join("\n")
)
trace.appendContent("\n\n")
if (!isNaN(logprobs[0].entropy)) {
trace.item("entropy (0:red, 1: blue)")
trace.appendContent("\n\n")
trace.appendContent(
logprobs
.map((lp) => logprobToMarkdown(lp, { entropy: true }))
.join("\n")
)
trace.appendContent("\n\n")
}
if (logprobs[0]?.topLogprobs?.length) {
trace.item("top_logprobs")
trace.appendContent("\n\n")
trace.appendContent(
logprobs.map((lp) => topLogprobsToMarkdown(lp)).join("\n")
)
trace.appendContent("\n\n")
}
} finally {
trace.endDetails()
}
}
const stats = options?.stats
const acc = stats?.accumulatedUsage()
const duration = timer()
const usage: RunPromptUsage = deleteUndefinedValues({
cost: stats.cost(),
duration: duration,
total: acc?.total_tokens,
prompt: acc?.prompt_tokens,
completion: acc?.completion_tokens,
})
const reasoning = lastAssistantReasoning(messages)
const res: RunPromptResult = deleteUndefinedValues({
model: resp?.model,
messages,
text,
reasoning,
annotations,
finishReason,
fences,
frames,
json,
error,
schemas,
choices,
logprobs,
perplexity,
uncertainty,
usage,
} satisfies RunPromptResult)
await computeFileEdits(res, {
trace,
schemas,
fileOutputs,
fileMerges,
outputProcessors,
})
return res
}
function parseAssistantMessage(
resp: ChatCompletionResponse
): ChatCompletionAssistantMessageParam {
const { signature } = resp
const { content, reasoning } = splitThink(resp.text)
const reasoning_content = resp.reasoning || reasoning
if (!content && !reasoning_content) {
return undefined
}
return deleteUndefinedValues({
role: "assistant",
content,
reasoning_content,
signature,
} satisfies ChatCompletionAssistantMessageParam)
}
async function processChatMessage(
model: string,
timer: () => number,
req: CreateChatCompletionRequest,
resp: ChatCompletionResponse,
messages: ChatCompletionMessageParam[],
tools: ToolCallback[],
chatParticipants: ChatParticipant[],
schemas: Record<string, JSONSchema>,
fileOutputs: FileOutput[],
outputProcessors: PromptOutputProcessorHandler[],
fileMerges: FileMergeHandler[],
cacheImage: (url: string) => Promise<string>,
options: GenerationOptions
): Promise<RunPromptResult> {
const {
stats,
maxToolCalls = MAX_TOOL_CALLS,
trace,
cancellationToken,
} = options
stats.addRequestUsage(model, req, resp)
const assisantMessage = parseAssistantMessage(resp)
if (assisantMessage) {
messages.push(assisantMessage)
}
const assistantContent = assisantMessage?.content as string
if (options.fallbackTools && assistantContent && tools.length) {
dbg(`extracting tool calls from assistant content (fallback)`)
resp.toolCalls = []
// parse tool call
const toolCallFences = extractFenced(assistantContent).filter((f) =>
/^tool_calls?$/.test(f.language)
)
for (const toolCallFence of toolCallFences) {
for (const toolCall of toolCallFence.content.split("\n")) {
const { name, args } =
/^(?<name>[\w\d]+):\s*(?<args>\{.*\})\s*$/i.exec(toolCall)
?.groups || {}
if (name) {
resp.toolCalls.push({
id: undefined,
name,
arguments: args,
} satisfies ChatCompletionToolCall)
}
}
}
}
// execute tools as needed
if (resp.toolCalls?.length) {
dbg(`executing tool calls`)
await runToolCalls(resp, messages, tools, options)
stats.toolCalls += resp.toolCalls.length
if (stats.toolCalls > maxToolCalls) {
throw new Error(
`maximum number of tool calls ${maxToolCalls} reached`
)
}
return undefined // keep working
}
// apply repairs if necessary
if (await applyRepairs(messages, schemas, options)) {
return undefined // keep working
}
let err: any
if (chatParticipants?.length) {
dbg(`processing chat participants`)
let needsNewTurn = false
for (const participant of chatParticipants) {
const { generator, options: participantOptions } = participant || {}
const { label } = participantOptions || {}
const participantTrace = trace.startTraceDetails(
`🙋 participant ${label || ""}`
)
try {
const ctx = createChatTurnGenerationContext(
options,
participantTrace,
cancellationToken
)
const { messages: newMessages } =
(await generator(
ctx,
structuredClone(messages) satisfies ChatMessage[],
assistantContent
)) || {}
const node = ctx.node
checkCancelled(cancellationToken)
// update modified messages
if (newMessages?.length) {
dbg(`updating messages with new participant messages`)
messages.splice(0, messages.length, ...newMessages)
needsNewTurn = true
participantTrace.details(
`💬 new messages`,
await renderMessagesToMarkdown(messages, {
textLang: "markdown",
user: true,
assistant: true,
cacheImage,
})
)
}
dbg(`expanding participant template`)
// expand template
const { errors, messages: participantMessages } =
await renderPromptNode(options.model, node, {
flexTokens: options.flexTokens,
fenceFormat: options.fenceFormat,
trace: participantTrace,
})
if (participantMessages?.length) {
if (
participantMessages.some(
({ role }) => role === "system"
)
) {
throw new Error(
"system messages not supported for chat participants"
)
}
participantTrace.details(
`💬 added messages (${participantMessages.length})`,
await renderMessagesToMarkdown(participantMessages, {
textLang: "text",
user: true,
assistant: true,
cacheImage,
}),
{ expanded: true }
)
messages.push(...participantMessages)
needsNewTurn = true
} else {
participantTrace.item("no message")
}
if (errors?.length) {
dbg(`participant processing encountered errors`)
err = errors[0]
for (const error of errors) {
participantTrace.error(undefined, error)
}
needsNewTurn = false
break
}
} catch (e) {
err = e
logError(e)
participantTrace.error(`participant error`, e)
needsNewTurn = false
break
} finally {
participantTrace.endDetails()
}
}
if (needsNewTurn) {
dbg(`participant processing complete, needs new turn`)
return undefined
}
}
const logprobs = resp.logprobs?.map(serializeLogProb)
return structurifyChatSession(
timer,
messages,
schemas,
fileOutputs,
outputProcessors,
fileMerges,
logprobs,
options,
{
resp,
err,
}
)
}
/**
* Merges two sets of generation options, prioritizing values specified in the second parameter
* while falling back to defaults from the first parameter and runtime configurations.
*
* @param options - A base set of generation options containing default values.
* @param runOptions - A set of custom generation options that override the base values.
* @returns A merged set of generation options with priority given to `runOptions` values.
*
* The merging process includes:
* - `model`: Prioritized from `runOptions`, then `options`, and finally the runtime host's default large model.
* - `temperature`: Taken from `runOptions` if present, otherwise from the runtime host's default large model settings.
* - `fallbackTools`: Taken from `runOptions` if present, otherwise from the runtime host's default large model settings.
* - `reasoningEffort`: Taken from `runOptions` if present, otherwise from the runtime host's default large model settings.
* - `embeddingsModel`: Resolved from `runOptions` if defined or falls back to `options`.
*/
export function mergeGenerationOptions(
options: GenerationOptions,
runOptions: ModelOptions & EmbeddingsModelOptions
): GenerationOptions {
const res = {
...options,
...(runOptions || {}),
model:
runOptions?.model ??
options?.model ??
runtimeHost.modelAliases.large.model,
temperature:
runOptions?.temperature ??
runtimeHost.modelAliases.large.temperature,
fallbackTools:
runOptions?.fallbackTools ??
runtimeHost.modelAliases.large.fallbackTools,
reasoningEffort:
runOptions?.reasoningEffort ??
runtimeHost.modelAliases.large.reasoningEffort,
embeddingsModel:
runOptions?.embeddingsModel ?? options?.embeddingsModel,
} satisfies GenerationOptions
return res
}
async function choicesToLogitBias(
trace: MarkdownTrace,
model: string,
choices: ElementOrArray<
string | { token: string | number; weight?: number }
>
): Promise<Record<number, number>> {
choices = arrayify(choices)
if (!choices?.length) {
return undefined
}
dbg(`computing logit bias for choices`)
const { encode } =
(await resolveTokenEncoder(model, {
disableFallback: true,
})) || {}
if (
!encode &&
choices.some(
(c) => typeof c === "string" || typeof c.token === "string"
)
) {
logWarn(
`unable to compute logit bias, no token encoder found for ${model}`
)
logVerbose(YAMLStringify({ choices }))
trace.warn(
`unable to compute logit bias, no token encoder found for ${model}`
)
return undefined
}
const logit_bias: Record<number, number> = Object.fromEntries(
choices.map((c) => {
const { token, weight } = typeof c === "string" ? { token: c } : c
const encoded = typeof token === "number" ? [token] : encode(token)
if (encoded.length !== 1) {
logWarn(
`choice ${c} tokenizes to ${encoded.join(", ")} (expected one token)`
)
trace.warn(
`choice ${c} tokenizes to ${encoded.join(", ")} (expected one token)`
)
}
return [encoded[0], isNaN(weight) ? CHOICE_LOGIT_BIAS : weight] as [
number,
number,
]
})
)
trace.itemValue(
"choices",
choices
.map((c) => (typeof c === "string" ? c : JSON.stringify(c)))
.join(", ")
)
trace.itemValue("logit bias", JSON.stringify(logit_bias))
return logit_bias
}
/**
* Executes a chat session by interacting with a language model, processing messages,
* handling tool integrations, and managing responses.
*
* @param connectionToken - Configuration for connecting to the language model, excluding the token.
* @param cancellationToken - Token to support cancellation of the chat session.
* @param messages - List of chat messages exchanged during the session.
* @param toolDefinitions - Definitions of tools that can be invoked during the session.
* @param schemas - JSON schemas for validating response content.
* @param fileOutputs - Files to be generated or modified during the session.
* @param outputProcessors - Handlers for post-processing generated outputs.
* @param fileMerges - Handlers for merging file outputs.
* @param prediction - Prediction metadata to guide the response generation.
* @param completer - Function that sends requests to the language model and returns the response.
* @param chatParticipants - List of participants involved in the chat session.
* @param disposables - Objects that require cleanup after the session ends.
* @param genOptions - Options to customize the session execution, such as model configuration, behavior, and caching.
*
* @returns - The final structured result of the chat session.
*/
export async function executeChatSession(
connectionToken: LanguageModelConfiguration,
cancellationToken: CancellationToken,
messages: ChatCompletionMessageParam[],
toolDefinitions: ToolCallback[],
schemas: Record<string, JSONSchema>,
fileOutputs: FileOutput[],
outputProcessors: PromptOutputProcessorHandler[],
fileMerges: FileMergeHandler[],
prediction: PromptPrediction,
completer: ChatCompletionHandler,
chatParticipants: ChatParticipant[],
disposables: AsyncDisposable[],
genOptions: GenerationOptions
): Promise<RunPromptResult> {
const {
trace,
model,
temperature,
reasoningEffort,
topP,
toolChoice,
maxTokens,
seed,
responseType,
responseSchema,
stats,
fallbackTools,
choices,
topLogprobs,
cache,
inner,
metadata,
partialCb,
} = genOptions
assert(!!model, "model is required")
const { token, source, ...cfgNoToken } = connectionToken
const top_logprobs = genOptions.topLogprobs > 0 ? topLogprobs : undefined
const logprobs = genOptions.logprobs || top_logprobs > 0 ? true : undefined
traceLanguageModelConnection(trace, genOptions, connectionToken)
dbg(
`chat ${model}`,
deleteUndefinedValues({
temperature,
choices,
fallbackTools,
logprobs,
top_logprobs,
})
)
const tools: ChatCompletionTool[] = toolDefinitions?.length
? toolDefinitions.map(
(f) =>
<ChatCompletionTool>{
type: "function",
function: {
name: f.spec.name,
description: ellipse(
f.spec.description,
MAX_TOOL_DESCRIPTION_LENGTH
),
parameters: f.spec.parameters as any,
},
}
)
: undefined
const cacheStore = !!cache
? getChatCompletionCache(typeof cache === "string" ? cache : "chat")
: undefined
const chatTrace = trace.startTraceDetails(`💬 chat`, { expanded: true })
const store = !!metadata ? true : undefined
const timer = measure("chat")
const cacheImage = async (url: string) =>
await fileCacheImage(url, {
trace,
cancellationToken,
dir: chatTrace.options?.dir,
})
try {
if (toolDefinitions?.length) {
chatTrace.detailsFenced(`🛠️ tools`, tools, "yaml")
const toolNames = toolDefinitions.map(({ spec }) => spec.name)
const duplicates = uniq(toolNames).filter(
(name, index) => toolNames.lastIndexOf(name) !== index
)
if (duplicates.length) {
chatTrace.error(`duplicate tools: ${duplicates.join(", ")}`)
return {
error: serializeError(
`duplicate tools: ${duplicates.join(", ")}`
),
finishReason: "fail",
messages,
text: "",
}
}
}
while (true) {
stats.turns++
collapseChatMessages(messages)
dbg(`turn ${stats.turns}`)
if (messages) {
chatTrace.details(
`💬 messages (${messages.length})`,
await renderMessagesToMarkdown(messages, {
textLang: "markdown",
user: true,
assistant: true,
cacheImage,
tools,
}),
{ expanded: true }
)
}
// make request
let req: CreateChatCompletionRequest
let resp: ChatCompletionResponse
try {
checkCancelled(cancellationToken)
const reqTrace = chatTrace.startTraceDetails(`📤 llm request`)
try {
const logit_bias = await choicesToLogitBias(
reqTrace,
model,
choices
)
req = {
model,
temperature,
store,
metadata: store ? metadata : undefined,
reasoning_effort: reasoningEffort,
top_p: topP,
tool_choice:
!fallbackTools && tools?.length
? typeof toolChoice === "object"
? {
type: "function",
function: { name: toolChoice.name },
}
: toolChoice
: undefined,
max_tokens: maxTokens,
logit_bias,
seed,
stream: true,
logprobs,
top_logprobs,
tools: fallbackTools ? undefined : tools,
// https://platform.openai.com/docs/guides/predicted-outputs
prediction: prediction?.content
? prediction
: undefined,
response_format:
responseType === "json_object"
? { type: responseType }
: responseType === "json_schema"
? {
type: "json_schema",
json_schema: {
name: "result",
schema: toStrictJSONSchema(
responseSchema,
{ noDefaults: true }
),
strict: true,
},
}
: undefined,
messages,
} satisfies CreateChatCompletionRequest
updateChatFeatures(reqTrace, model, req)
if (!isQuiet)
stderr.write(
await renderMessagesToTerminal(req, {
user: true,
tools,
})
)
const infer = async () => {
logVerbose(`\n`)
const m = measure(
"chat.completer",
`${req.model} -> ${req.messages.length} messages`
)
dbg(
`infer ${req.model} with ${req.messages.length} messages`
)
if (req.response_format)
dbg(
`response format: %O`,
JSON.stringify(req.response_format, null, 2)
)
const cres = await completer(
req,
connectionToken,
genOptions,
reqTrace
)
const duration = m()
cres.duration = duration
return cres
}
if (cacheStore) {
dbg(`cache store enabled, checking cache`)
const cachedKey = deleteUndefinedValues({
modelid: model,
...req,
responseType,
responseSchema,
...cfgNoToken,
}) satisfies ChatCompletionRequestCacheKey
const validator = (value: ChatCompletionResponse) => {
const ok = value?.finishReason === "stop"
return ok
}
const cacheRes = await cacheStore.getOrUpdate(
cachedKey,
infer,
validator
)
logVerbose("\n")
resp = cacheRes.value
resp.cached = cacheRes.cached
reqTrace.itemValue("cache", cacheStore.name)
reqTrace.itemValue("cache_key", cacheRes.key)
dbg(
`cache ${resp.cached ? "hit" : "miss"} (${cacheStore.name}/${cacheRes.key.slice(0, 7)})`
)
if (resp.cached) {
if (cacheRes.value.text) {
partialCb(
deleteUndefinedValues({
responseSoFar: cacheRes.value.text,
tokensSoFar: 0,
responseChunk: cacheRes.value.text,
responseTokens: cacheRes.value.logprobs,
reasoningSoFar:
cacheRes.value.reasoning,
inner,
})
)
}
}
} else {
resp = await infer()
}
} finally {
logVerbose("\n")
reqTrace.endDetails()
}
const output = await processChatMessage(
model,
timer,
req,
resp,
messages,
toolDefinitions,
chatParticipants,
schemas,
fileOutputs,
outputProcessors,
fileMerges,
cacheImage,
genOptions
)
if (output) {
return output
}
} catch (err) {
return structurifyChatSession(
timer,
messages,
schemas,
fileOutputs,
outputProcessors,
fileMerges,
[],
genOptions,
{ resp, err }
)
}
}
} finally {
await dispose(disposables, { trace: chatTrace })
stats.trace(chatTrace)
chatTrace.endDetails()
}
}
function updateChatFeatures(
trace: MarkdownTrace,
modelid: string,
req: CreateChatCompletionRequest
) {
const { provider, model } = parseModelIdentifier(modelid)
const features = providerFeatures(provider)
if (!isNaN(req.seed) && features?.seed === false) {
dbg(`seed: disabled, not supported by ${provider}`)
trace.itemValue(`seed`, `disabled`)
delete req.seed // some providers do not support seed
}
if (req.logit_bias && features?.logitBias === false) {
dbg(`logit_bias: disabled, not supported by ${provider}`)
trace.itemValue(`logit_bias`, `disabled`)
delete req.logit_bias // some providers do not support logit_bias
}
if (!isNaN(req.top_p) && features?.topP === false) {
dbg(`top_p: disabled, not supported by ${provider}`)
trace.itemValue(`top_p`, `disabled`)
delete req.top_p
}
if (req.tool_choice && features?.toolChoice === false) {
dbg(`tool_choice: disabled, not supported by ${provider}`)
trace.itemValue(`tool_choice`, `disabled`)
delete req.tool_choice
}
if (req.logprobs && features?.logprobs === false) {
dbg(`logprobs: disabled, not supported by ${provider}`)
trace.itemValue(`logprobs`, `disabled`)
delete req.logprobs
delete req.top_logprobs
}
if (req.prediction && features?.prediction === false) {
dbg(`prediction: disabled, not supported by ${provider}`)
delete req.prediction
}
if (
req.top_logprobs &&
(features?.logprobs === false || features?.topLogprobs === false)
) {
dbg(`top_logprobs: disabled, not supported by ${provider}`)
trace.itemValue(`top_logprobs`, `disabled`)
delete req.top_logprobs
}
if (/^o1/i.test(model) && !req.max_completion_tokens) {
dbg(`max_tokens: renamed to max_completion_tokens`)
req.max_completion_tokens = req.max_tokens
delete req.max_tokens
}
if (req.store && !features?.metadata) {
dbg(`metadata: disabled, not supported by ${provider}`)
delete req.metadata
delete req.store
}
deleteUndefinedValues(req)
}
/**
* Logs detailed information about a prompt result, including reasoning and output, in a structured format.
*
* @param trace - A trace instance used to record detailed logs and events during the prompt execution.
* @param resp - The response object containing optional text and reasoning fields from the prompt result.
*
* If 'reasoning' is present in the response, it is logged in a dedicated "reasoning" section with markdown formatting.
* If 'text' is present, the function determines its format (e.g., JSON, XML, Markdown, or plain text) and logs it in a corresponding section.
* Outputs in Markdown format are further prettified for improved readability in the logs and appended as escaped HTML content.
*/
export function tracePromptResult(
trace: MarkdownTrace,
resp: { text?: string; reasoning?: string }
) {
const { text, reasoning } = resp || {}
if (reasoning) {
trace.detailsFenced(`🤔 reasoning`, reasoning, "markdown")
}
// try to sniff the output type
if (text) {
const language = JSON5TryParse(text)
? "json"
: XMLTryParse(text)
? "xml"
: /^(-|\*|#+|```)\s/im.test(text)
? "markdown"
: "text"
trace.detailsFenced(`🔠 output`, text, language, { expanded: true })
if (language === "markdown") {
trace.appendContent(
"\n\n" + HTMLEscape(prettifyMarkdown(text)) + "\n\n"
)
}
}
}
/**
* Appends a user message to a chat history.
*
* @param messages - The current chat message array.
* @param content - The content of the user message. Can be a string or an image.
* @param options - Optional parameters for modifying behavior.
* @param options.cacheControl - Cache control value for the message.
*
* Notes:
* - If the last message in the array is not a user message or has different cache control,
* a new user message is added.
* - String content is appended to the existing user's message text. If the content is an image,
* it is added as a chat completion image.
* - If the last message content is a string, it is converted to an array when adding an image.
*/
export function appendUserMessage(
messages: ChatCompletionMessageParam[],
content: string | PromptImage,
options?: ContextExpansionOptions
) {
if (!content) {
return
}
const { cacheControl } = options || {}
let last = messages.at(-1) as ChatCompletionUserMessageParam
if (last?.role !== "user" || options?.cacheControl !== last?.cacheControl) {
last = {
role: "user",
content: "",
} satisfies ChatCompletionUserMessageParam
if (cacheControl) {
last.cacheControl = cacheControl
}
messages.push(last)
}
if (typeof content === "string") {
if (last.content) {
if (typeof last.content === "string") {
last.content += "\n" + content
} else {
last.content.push({ type: "text", text: content })
}
} else {
last.content = content
}
} else {
// add image
if (typeof last.content === "string") {
last.content = last.content
? [{ type: "text", text: last.content }]
: []
}
last.content.push(toChatCompletionImage(content))
}
}
/**
* Appends a message from the assistant to the list of chat messages.
*
* Adds the content to the last assistant message if it matches the role
* and cache control context; otherwise, creates a new assistant message entry.
*
* If the last assistant message already has content, appends the new content
* to it. Supports both string and structured content formats.
*
* @param messages - The list of chat messages to update.
* @param content - The content of the assistant message. Ignored if empty.
* @param options - Optional context settings for the message, such as cache control.
*/
export function appendAssistantMessage(
messages: ChatCompletionMessageParam[],
content: string,
options?: ContextExpansionOptions
) {
if (!content) {
return
}
const { cacheControl } = options || {}
let last = messages.at(-1) as ChatCompletionAssistantMessageParam
if (
last?.role !== "assistant" ||
options?.cacheControl !== last?.cacheControl
) {
last = {
role: "assistant",
content: "",
} satisfies ChatCompletionAssistantMessageParam
if (cacheControl) {
last.cacheControl = cacheControl
}
messages.push(last)
}
if (last.content) {
if (typeof last.content === "string") {
last.content += "\n" + content
} else {
last.content.push({ type: "text", text: content })
}
} else {
last.content = content
}
}
/**
* Appends a system-level message to the beginning of the given messages array.
*
* @param messages - The list of chat messages to which the system message will be added.
* The system message is prepended to the array.
* @param content - The content of the message to be appended. If content is empty, the function exits.
* @param options - Optional parameters for additional message context. Includes:
* - cacheControl: A control directive for caching behavior.
*
* If the first message in the array is not a system message or does not match the provided cacheControl, a new system
* message object is created and added at the start of the array. Otherwise, the content is appended to the existing
* system message.
* If the existing system message content is a string, SYSTEM_FENCE is used as a separator before appending the new
* content. For non-string content, a text object is added to the content array.
* If the system message content is empty, the new content is directly assigned.
*/
export function appendSystemMessage(
messages: ChatCompletionMessageParam[],
content: string,
options?: ContextExpansionOptions
) {
if (!content) {
return
}
const { cacheControl } = options || {}
let last = messages[0] as ChatCompletionSystemMessageParam
if (
last?.role !== "system" ||
options?.cacheControl !== last?.cacheControl
) {
last = {
role: "system",
content: "",
} as ChatCompletionSystemMessageParam
if (cacheControl) {
last.cacheControl = cacheControl
}
messages.unshift(last)
}
if (last.content) {
if (typeof last.content === "string") {
last.content += SYSTEM_FENCE + content
} else {
last.content.push({ type: "text", text: content })
}
} else {
last.content = content
}
}
/**
* Adds tool definitions to the system messages of a chat conversation.
*
* The function inserts a system message containing the serialized tool definitions,
* formatted as YAML and wrapped in `<tools>` tags, into the provided list of chat messages.
*
* @param messages - The array of chat messages to which the tool definitions will be added.
* @param tools - An array of tool callback objects whose specifications will be serialized
* and included in the system message.
*/
export function addToolDefinitionsMessage(
messages: ChatCompletionMessageParam[],
tools: ToolCallback[]
) {
dbg(`adding tool definitions to messages`)
appendSystemMessage(
messages,
`
<tools>
${YAMLStringify(tools.map((t) => t.spec))}
</tools>
`
)
}