document.go•11.2 kB
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// SPDX-License-Identifier: Apache-2.0
package ai
import (
"encoding/json"
"fmt"
"strings"
)
// A Document is a piece of data that can be embedded, indexed, or retrieved.
// It includes metadata. It can contain multiple parts.
type Document struct {
// The data that is part of this document.
Content []*Part `json:"content,omitempty"`
// The metadata for this document.
Metadata map[string]any `json:"metadata,omitempty"`
}
// A Part is one part of a [Document]. This may be plain text or it
// may be a URL (possibly a "data:" URL with embedded data).
type Part struct {
Kind PartKind `json:"kind,omitempty"`
ContentType string `json:"contentType,omitempty"` // valid for kind==blob
Text string `json:"text,omitempty"` // valid for kind∈{text,blob}
ToolRequest *ToolRequest `json:"toolRequest,omitempty"` // valid for kind==partToolRequest
ToolResponse *ToolResponse `json:"toolResponse,omitempty"` // valid for kind==partToolResponse
Resource *ResourcePart `json:"resource,omitempty"` // valid for kind==partResource
Custom map[string]any `json:"custom,omitempty"` // valid for plugin-specific custom parts
Metadata map[string]any `json:"metadata,omitempty"` // valid for all kinds
}
type PartKind int8
const (
PartText PartKind = iota
PartMedia
PartData
PartToolRequest
PartToolResponse
PartCustom
PartReasoning
PartResource
)
// NewTextPart returns a Part containing text.
func NewTextPart(text string) *Part {
return &Part{Kind: PartText, ContentType: "plain/text", Text: text}
}
// NewJSONPart returns a Part containing JSON.
func NewJSONPart(text string) *Part {
return &Part{Kind: PartText, ContentType: "application/json", Text: text}
}
// NewMediaPart returns a Part containing structured data described
// by the given mimeType.
func NewMediaPart(mimeType, contents string) *Part {
return &Part{Kind: PartMedia, ContentType: mimeType, Text: contents}
}
// NewDataPart returns a Part containing raw string data.
func NewDataPart(contents string) *Part {
return &Part{Kind: PartData, Text: contents}
}
// NewToolRequestPart returns a Part containing a request from
// the model to the client to run a Tool.
// (Only genkit plugins should need to use this function.)
func NewToolRequestPart(r *ToolRequest) *Part {
return &Part{Kind: PartToolRequest, ToolRequest: r}
}
// NewToolResponsePart returns a Part containing the results
// of applying a Tool that the model requested.
func NewToolResponsePart(r *ToolResponse) *Part {
return &Part{Kind: PartToolResponse, ToolResponse: r}
}
// NewResponseForToolRequest returns a Part containing the results
// of executing the tool request part.
func NewResponseForToolRequest(p *Part, output any) *Part {
if !p.IsToolRequest() {
return nil
}
return &Part{Kind: PartToolResponse, ToolResponse: &ToolResponse{
Name: p.ToolRequest.Name,
Ref: p.ToolRequest.Ref,
Output: output,
}}
}
// NewCustomPart returns a Part containing custom plugin-specific data.
func NewCustomPart(customData map[string]any) *Part {
return &Part{Kind: PartCustom, Custom: customData}
}
// NewReasoningPart returns a Part containing reasoning text
func NewReasoningPart(text string, signature []byte) *Part {
return &Part{
Kind: PartReasoning,
ContentType: "plain/text",
Text: text,
Metadata: map[string]any{
"signature": signature,
},
}
}
// NewResourcePart returns a Part containing a resource reference.
func NewResourcePart(uri string) *Part {
return &Part{Kind: PartResource, Resource: &ResourcePart{Uri: uri}}
}
// IsText reports whether the [Part] contains plain text.
func (p *Part) IsText() bool {
return p != nil && p.Kind == PartText
}
// IsMedia reports whether the [Part] contains structured media data.
func (p *Part) IsMedia() bool {
return p != nil && p.Kind == PartMedia
}
// IsData reports whether the [Part] contains unstructured data.
func (p *Part) IsData() bool {
return p != nil && p.Kind == PartData
}
// IsToolRequest reports whether the [Part] contains a request to run a tool.
func (p *Part) IsToolRequest() bool {
return p != nil && p.Kind == PartToolRequest
}
// IsToolResponse reports whether the [Part] contains the result of running a tool.
func (p *Part) IsToolResponse() bool {
return p != nil && p.Kind == PartToolResponse
}
// IsInterrupt reports whether the [Part] contains a tool request that was interrupted.
func (p *Part) IsInterrupt() bool {
return p != nil && p.IsToolRequest() && p.Metadata != nil && p.Metadata["interrupt"] != nil
}
// IsCustom reports whether the [Part] contains custom plugin-specific data.
func (p *Part) IsCustom() bool {
return p != nil && p.Kind == PartCustom
}
// IsReasoning reports whether the [Part] contains a reasoning text
func (p *Part) IsReasoning() bool {
return p != nil && p.Kind == PartReasoning
}
// IsImage reports whether the [Part] contains an image.
func (p *Part) IsImage() bool {
if p == nil || !p.IsMedia() {
return false
}
return IsImageContentType(p.ContentType) || strings.HasPrefix(p.Text, "data:image/")
}
// IsVideo reports whether the [Part] contains a video.
func (p *Part) IsVideo() bool {
if p == nil || !p.IsMedia() {
return false
}
return IsVideoContentType(p.ContentType) || strings.HasPrefix(p.Text, "data:video/")
}
// IsAudio reports whether the [Part] contains an audio file.
func (p *Part) IsAudio() bool {
if p == nil || !p.IsMedia() {
return false
}
return IsAudioContentType(p.ContentType) || strings.HasPrefix(p.Text, "data:audio/")
}
// IsResource reports whether the [Part] contains a resource reference.
func (p *Part) IsResource() bool {
return p != nil && p.Kind == PartResource
}
// MarshalJSON is called by the JSON marshaler to write out a Part.
func (p *Part) MarshalJSON() ([]byte, error) {
if p == nil {
return nil, fmt.Errorf("part is nil")
}
// This is not handled by the schema generator because
// Part is defined in TypeScript as a union.
switch p.Kind {
case PartText:
v := textPart{
Text: p.Text,
Metadata: p.Metadata,
}
return json.Marshal(v)
case PartMedia:
v := mediaPart{
Media: &Media{
ContentType: p.ContentType,
Url: p.Text,
},
Metadata: p.Metadata,
}
return json.Marshal(v)
case PartData:
v := dataPart{
Data: p.Text,
Metadata: p.Metadata,
}
return json.Marshal(v)
case PartToolRequest:
v := toolRequestPart{
ToolRequest: p.ToolRequest,
Metadata: p.Metadata,
}
return json.Marshal(v)
case PartToolResponse:
v := toolResponsePart{
ToolResponse: p.ToolResponse,
Metadata: p.Metadata,
}
return json.Marshal(v)
case PartResource:
v := resourcePart{
Resource: p.Resource,
Metadata: p.Metadata,
}
return json.Marshal(v)
case PartCustom:
v := customPart{
Custom: p.Custom,
Metadata: p.Metadata,
}
return json.Marshal(v)
case PartReasoning:
v := reasoningPart{
Reasoning: p.Text,
Metadata: p.Metadata,
}
return json.Marshal(v)
default:
return nil, fmt.Errorf("invalid part kind %v", p.Kind)
}
}
type partSchema struct {
Text string `json:"text,omitempty" yaml:"text,omitempty"`
Media *Media `json:"media,omitempty" yaml:"media,omitempty"`
Data string `json:"data,omitempty" yaml:"data,omitempty"`
ToolRequest *ToolRequest `json:"toolRequest,omitempty" yaml:"toolRequest,omitempty"`
ToolResponse *ToolResponse `json:"toolResponse,omitempty" yaml:"toolResponse,omitempty"`
Resource *ResourcePart `json:"resource,omitempty" yaml:"resource,omitempty"`
Custom map[string]any `json:"custom,omitempty" yaml:"custom,omitempty"`
Metadata map[string]any `json:"metadata,omitempty" yaml:"metadata,omitempty"`
Reasoning string `json:"reasoning,omitempty" yaml:"reasoning,omitempty"`
}
// unmarshalPartFromSchema updates Part p based on the schema s.
func (p *Part) unmarshalPartFromSchema(s partSchema) {
switch {
case s.Media != nil:
p.Kind = PartMedia
p.Text = s.Media.Url
p.ContentType = s.Media.ContentType
case s.ToolRequest != nil:
p.Kind = PartToolRequest
p.ToolRequest = s.ToolRequest
case s.ToolResponse != nil:
p.Kind = PartToolResponse
p.ToolResponse = s.ToolResponse
case s.Resource != nil:
p.Kind = PartResource
p.Resource = s.Resource
case s.Custom != nil:
p.Kind = PartCustom
p.Custom = s.Custom
default:
p.Kind = PartText
p.Text = s.Text
p.ContentType = ""
if s.Data != "" {
// Note: if part is completely empty, we use text by default.
p.Kind = PartData
p.Text = s.Data
}
}
p.Metadata = s.Metadata
}
// UnmarshalJSON is called by the JSON unmarshaler to read a Part.
func (p *Part) UnmarshalJSON(b []byte) error {
var s partSchema
if err := json.Unmarshal(b, &s); err != nil {
return err
}
p.unmarshalPartFromSchema(s)
return nil
}
// UnmarshalYAML implements goccy/go-yaml library's InterfaceUnmarshaler interface.
func (p *Part) UnmarshalYAML(unmarshal func(any) error) error {
var s partSchema
if err := unmarshal(&s); err != nil {
return err
}
p.unmarshalPartFromSchema(s)
return nil
}
// JSONSchemaAlias tells the JSON schema reflection code to use a different
// type for the schema for this type. This is needed because the JSON
// marshaling of Part uses a schema that matches the TypeScript code,
// rather than the natural JSON marshaling. This matters because the
// current JSON validation code works by marshaling the JSON.
func (Part) JSONSchemaAlias() any {
return partSchema{}
}
// DocumentFromText returns a [Document] containing a single plain text part.
// This takes ownership of the metadata map.
func DocumentFromText(text string, metadata map[string]any) *Document {
return &Document{
Content: []*Part{
{
Kind: PartText,
Text: text,
},
},
Metadata: metadata,
}
}
// IsImageContentType checks if the content type represents an image.
func IsImageContentType(contentType string) bool {
return strings.HasPrefix(contentType, "image/") || strings.HasPrefix(contentType, "data:image/")
}
// IsVideoContentType checks if the content type represents a video.
func IsVideoContentType(contentType string) bool {
return strings.HasPrefix(contentType, "video/") || strings.HasPrefix(contentType, "data:video/")
}
// IsAudioContentType checks if the content type represents an audio file.
func IsAudioContentType(contentType string) bool {
return strings.HasPrefix(contentType, "audio/") || strings.HasPrefix(contentType, "data:audio/")
}