schemas.ts•15.1 kB
import { z } from 'zod'
import {
  SourceSyncFileStorageType,
  SourceSyncVectorStorageProvider,
  SourceSyncEmbeddingModelProvider,
  SourceSyncOpenAIEmbeddingModel,
  SourceSyncCohereEmbeddingModel,
  SourceSyncJinaEmbeddingModel,
  SourceSyncWebScraperProvider,
  SourceSyncIngestionSource,
  SourceSyncChunkConfig,
  SourceSyncDocumentType,
  SourceSyncIngestionStatus,
  SourceSyncConnector,
  SourceSyncSearchType,
} from './sourcesync.types.js'
// Common schemas
export const apiKeySchema = z.string().optional()
export const tenantIdSchema = z.string().optional()
export const namespaceIdSchema = z.string().optional()
// File storage config schema
export const fileStorageConfigSchema = z.object({
  type: z.nativeEnum(SourceSyncFileStorageType),
  bucket: z.string(),
  region: z.string(),
  endpoint: z.string(),
  credentials: z.object({
    accessKeyId: z.string(),
    secretAccessKey: z.string(),
  }),
})
// Vector storage config schema
export const vectorStorageConfigSchema = z.object({
  provider: z.nativeEnum(SourceSyncVectorStorageProvider),
  apiKey: z.string(),
  indexHost: z.string(),
})
// Embedding model config schema
export const embeddingModelConfigSchema = z.union([
  z.object({
    provider: z.literal(SourceSyncEmbeddingModelProvider.OPENAI),
    model: z.nativeEnum(SourceSyncOpenAIEmbeddingModel),
    apiKey: z.string(),
  }),
  z.object({
    provider: z.literal(SourceSyncEmbeddingModelProvider.COHERE),
    model: z.nativeEnum(SourceSyncCohereEmbeddingModel),
    apiKey: z.string(),
  }),
  z.object({
    provider: z.literal(SourceSyncEmbeddingModelProvider.JINA),
    model: z.nativeEnum(SourceSyncJinaEmbeddingModel),
    apiKey: z.string(),
  }),
])
// Web scraper config schema
export const webScraperConfigSchema = z.object({
  provider: z.nativeEnum(SourceSyncWebScraperProvider),
  apiKey: z.string(),
})
// Connector config schemas
export const notionConfigSchema = z.object({
  clientId: z.string(),
  clientSecret: z.string(),
})
export const googleDriveConfigSchema = z.object({
  clientId: z.string(),
  clientSecret: z.string(),
  apiKey: z.string(),
})
export const dropboxConfigSchema = z.object({
  clientId: z.string(),
  clientSecret: z.string(),
})
export const onedriveConfigSchema = z.object({
  clientId: z.string(),
  clientSecret: z.string(),
})
export const boxConfigSchema = z.object({
  clientId: z.string(),
  clientSecret: z.string(),
})
export const sharepointConfigSchema = z.object({
  clientId: z.string(),
  clientSecret: z.string(),
})
// Chunk config schema
export const chunkConfigSchema = z
  .object({
    chunkSize: z.number(),
    chunkOverlap: z.number(),
  })
  .describe(
    'Optional Chunk config. When not passed, default chunk config will be used.',
  )
// Tool schemas
export const validateApiKeySchema = z.object({})
export const createNamespaceSchema = z.object({
  name: z.string(),
  fileStorageConfig: fileStorageConfigSchema,
  vectorStorageConfig: vectorStorageConfigSchema,
  embeddingModelConfig: embeddingModelConfigSchema,
  webScraperConfig: webScraperConfigSchema.optional(),
  tenantId: tenantIdSchema,
})
export const listNamespacesSchema = z.object({
  tenantId: tenantIdSchema,
})
export const getNamespaceSchema = z.object({
  namespaceId: namespaceIdSchema,
  tenantId: tenantIdSchema,
})
export const updateNamespaceSchema = z.object({
  namespaceId: namespaceIdSchema,
  fileStorageConfig: fileStorageConfigSchema.optional(),
  vectorStorageConfig: vectorStorageConfigSchema.optional(),
  embeddingModelConfig: embeddingModelConfigSchema.optional(),
  webScraperConfig: webScraperConfigSchema.optional(),
  notionConfig: notionConfigSchema.optional(),
  googleDriveConfig: googleDriveConfigSchema.optional(),
  dropboxConfig: dropboxConfigSchema.optional(),
  onedriveConfig: onedriveConfigSchema.optional(),
  boxConfig: boxConfigSchema.optional(),
  sharepointConfig: sharepointConfigSchema.optional(),
  tenantId: tenantIdSchema,
})
export const deleteNamespaceSchema = z.object({
  namespaceId: namespaceIdSchema,
  tenantId: tenantIdSchema,
})
export const ingestTextSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  ingestConfig: z.object({
    source: z.literal(SourceSyncIngestionSource.TEXT),
    config: z.object({
      name: z.string().optional(),
      text: z.string(),
      metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
    }),
    chunkConfig: chunkConfigSchema.optional(),
  }),
  tenantId: tenantIdSchema,
})
// Type inference from schemas
export type ValidateApiKeyParams = z.infer<typeof validateApiKeySchema>
export type CreateNamespaceParams = z.infer<typeof createNamespaceSchema>
export type ListNamespacesParams = z.infer<typeof listNamespacesSchema>
export type GetNamespaceParams = z.infer<typeof getNamespaceSchema>
export type UpdateNamespaceParams = z.infer<typeof updateNamespaceSchema>
export type DeleteNamespaceParams = z.infer<typeof deleteNamespaceSchema>
export type IngestTextParams = z.infer<typeof ingestTextSchema>
export const ScrapeOptionsSchema = z.object({
  includeSelectors: z.array(z.string()).optional(),
  excludeSelectors: z.array(z.string()).optional(),
})
// Authentication schemas
export const ValidateApiKeySchema = validateApiKeySchema
// Namespace schemas
export const FileStorageConfigSchema = fileStorageConfigSchema
export const VectorStorageConfigSchema = vectorStorageConfigSchema
export const EmbeddingModelConfigSchema = embeddingModelConfigSchema
export const WebScraperConfigSchema = webScraperConfigSchema
export const CreateNamespaceSchema = createNamespaceSchema
export const ListNamespacesSchema = listNamespacesSchema
export const GetNamespaceSchema = getNamespaceSchema
export const UpdateNamespaceSchema = updateNamespaceSchema
export const DeleteNamespaceSchema = deleteNamespaceSchema
// Ingestion schemas
export const IngestTextSchema = ingestTextSchema
export const IngestFileSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  file: z.instanceof(Blob),
  metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
  chunkConfig: chunkConfigSchema.optional(),
  tenantId: tenantIdSchema,
})
export const IngestUrlsSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  ingestConfig: z.object({
    source: z.literal(SourceSyncIngestionSource.URLS_LIST),
    config: z.object({
      urls: z.array(z.string()),
      scrapeOptions: ScrapeOptionsSchema.optional(),
      metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
    }),
    chunkConfig: chunkConfigSchema.optional(),
  }),
  tenantId: tenantIdSchema,
})
export const IngestSitemapSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  ingestConfig: z.object({
    source: z.literal(SourceSyncIngestionSource.SITEMAP),
    config: z.object({
      url: z.string(),
      maxLinks: z.number().optional(),
      includePaths: z.array(z.string()).optional(),
      excludePaths: z.array(z.string()).optional(),
      metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
    }),
    chunkConfig: chunkConfigSchema.optional(),
  }),
  tenantId: tenantIdSchema,
})
export const IngestWebsiteSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  ingestConfig: z.object({
    source: z.literal(SourceSyncIngestionSource.WEBSITE),
    config: z.object({
      url: z.string(),
      maxDepth: z.number().optional(),
      maxLinks: z.number().optional(),
      includePaths: z.array(z.string()).optional(),
      excludePaths: z.array(z.string()).optional(),
      metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
    }),
    chunkConfig: chunkConfigSchema.optional(),
  }),
  tenantId: tenantIdSchema,
})
export const IngestGoogleDriveSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  connectionId: z.string().min(1, 'Connection ID is required'),
  folderId: z.string().optional(),
  metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
  apiKey: apiKeySchema.optional(),
  tenantId: tenantIdSchema,
})
export const IngestNotionSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  connectionId: z.string().min(1, 'Connection ID is required'),
  pageId: z.string().optional(),
  metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
  apiKey: apiKeySchema.optional(),
  tenantId: tenantIdSchema,
})
export const IngestConnectorSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  ingestConfig: z.object({
    source: z.string(),
    config: z.object({
      connectionId: z.string(),
      metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
    }),
    chunkConfig: chunkConfigSchema.optional(),
  }),
  tenantId: tenantIdSchema,
})
export const IngestJobRunStatusSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  ingestJobRunId: z.string(),
  tenantId: tenantIdSchema,
})
// Document schemas
export const DocumentFilterConfigSchema = z.object({
  documentIds: z.array(z.string()).optional(),
  documentExternalIds: z.array(z.string()).optional(),
  documentConnectionIds: z.array(z.string()).optional(),
  documentTypes: z.array(z.nativeEnum(SourceSyncDocumentType)).optional(),
  documentIngestionSources: z
    .array(z.nativeEnum(SourceSyncIngestionSource))
    .optional(),
  documentIngestionStatuses: z
    .array(z.nativeEnum(SourceSyncIngestionStatus))
    .optional(),
  metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
})
export const DocumentIncludeConfigSchema = z.object({
  documents: z.boolean().optional(),
  rawFileUrl: z.boolean().optional(),
  parsedTextFileUrl: z.boolean().optional(),
  statsBySource: z.boolean().optional(),
  statsByStatus: z.boolean().optional(),
})
export const PaginationSchema = z.object({
  pageSize: z.number().optional(),
  cursor: z.string().optional(),
})
export const FilterConfigSchema = z.object({
  documentIds: z.array(z.string()).optional(),
  documentExternalIds: z.array(z.string()).optional(),
  documentConnectionIds: z.array(z.string()).optional(),
  documentTypes: z
    .array(
      z.enum([
        'TEXT',
        'URL',
        'FILE',
        'NOTION_DOCUMENT',
        'GOOGLE_DRIVE_DOCUMENT',
        'DROPBOX_DOCUMENT',
        'ONEDRIVE_DOCUMENT',
        'BOX_DOCUMENT',
        'SHAREPOINT_DOCUMENT',
      ]),
    )
    .optional(),
  documentIngestionSources: z
    .array(
      z.enum([
        'TEXT',
        'URLS_LIST',
        'SITEMAP',
        'WEBSITE',
        'LOCAL_FILE',
        'NOTION',
        'GOOGLE_DRIVE',
        'DROPBOX',
        'ONEDRIVE',
        'BOX',
        'SHAREPOINT',
      ]),
    )
    .optional(),
  documentIngestionStatuses: z
    .array(
      z.enum([
        'BACKLOG',
        'QUEUED',
        'QUEUED_FOR_RESYNC',
        'PROCESSING',
        'SUCCESS',
        'FAILED',
        'CANCELLED',
      ]),
    )
    .optional(),
  metadata: z.record(z.string()).optional(),
})
export const FetchDocumentsSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  documentIds: z.array(z.string()).optional(),
  pagination: PaginationSchema.optional(),
  tenantId: tenantIdSchema,
  filterConfig: FilterConfigSchema,
  includeConfig: z
    .object({
      documents: z.boolean().optional(),
      stats: z.boolean().optional(),
      statsBySource: z.boolean().optional(),
      statsByStatus: z.boolean().optional(),
      rawFileUrl: z.boolean().optional(),
      parsedTextFileUrl: z.boolean().optional(),
    })
    .optional(),
})
export const UpdateDocumentsSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  documents: z.array(
    z.object({
      documentId: z.string(),
      metadata: z.record(z.string()).optional(),
    }),
  ),
  tenantId: tenantIdSchema,
  filterConfig: FilterConfigSchema,
  data: z.object({
    metadata: z.record(z.string()).optional(),
    $metadata: z
      .object({
        $set: z.record(z.union([z.string(), z.array(z.string())])).optional(),
        $append: z.record(z.array(z.string())).optional(),
        $remove: z.record(z.array(z.string())).optional(),
      })
      .optional(),
  }),
})
export const DeleteDocumentsSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  documentIds: z.array(z.string()).optional(),
  tenantId: tenantIdSchema,
  filterConfig: FilterConfigSchema,
})
export const ResyncDocumentsSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  documentIds: z.array(z.string()).optional(),
  tenantId: tenantIdSchema,
  filterConfig: FilterConfigSchema,
})
// Search schemas
export const SearchTypeEnum = z.nativeEnum(SourceSyncSearchType)
export const SearchFilterSchema = z.object({
  metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
})
export const SemanticSearchSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  query: z.string(),
  topK: z.number().optional(),
  scoreThreshold: z.number().optional(),
  filter: z
    .object({
      metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
    })
    .optional(),
  tenantId: tenantIdSchema,
  searchType: SearchTypeEnum.optional(),
})
export const HybridConfigSchema = z.object({
  semanticWeight: z.number(),
  keywordWeight: z.number(),
})
export const HybridSearchSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  query: z.string(),
  topK: z.number().optional(),
  scoreThreshold: z.number().optional(),
  filter: z
    .object({
      metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
    })
    .optional(),
  hybridConfig: z.object({
    semanticWeight: z.number(),
    keywordWeight: z.number(),
  }),
  tenantId: tenantIdSchema,
  searchType: SearchTypeEnum.optional(),
})
// Connection schemas
export const ConnectorEnum = z.nativeEnum(SourceSyncConnector)
export const CreateConnectionSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  name: z.string(),
  connector: ConnectorEnum,
  clientRedirectUrl: z.string().optional(),
  tenantId: tenantIdSchema,
})
export const ListConnectionsSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  connector: ConnectorEnum.optional(),
  tenantId: tenantIdSchema,
})
export const GetConnectionSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  connectionId: z.string(),
  tenantId: tenantIdSchema,
})
export const UpdateConnectionSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  connectionId: z.string(),
  name: z.string().optional(),
  clientRedirectUrl: z.string().optional(),
  tenantId: tenantIdSchema,
})
export const RevokeConnectionSchema = z.object({
  namespaceId: namespaceIdSchema.optional(),
  connectionId: z.string(),
  tenantId: tenantIdSchema,
})
// Utility schemas
export const FetchUrlContentSchema = z.object({
  url: z.string().url(),
  // Authentication might be needed for some SourceSync URLs
  apiKey: apiKeySchema,
  tenantId: tenantIdSchema,
})
export type FetchUrlContentParams = z.infer<typeof FetchUrlContentSchema>