SourceSync.ai MCP Server
by scmdr
- src
import { z } from 'zod'
import {
SourceSyncFileStorageType,
SourceSyncVectorStorageProvider,
SourceSyncEmbeddingModelProvider,
SourceSyncOpenAIEmbeddingModel,
SourceSyncCohereEmbeddingModel,
SourceSyncJinaEmbeddingModel,
SourceSyncWebScraperProvider,
SourceSyncIngestionSource,
SourceSyncChunkConfig,
SourceSyncDocumentType,
SourceSyncIngestionStatus,
SourceSyncConnector,
SourceSyncSearchType,
} from './sourcesync.types.js'
// Common schemas
export const apiKeySchema = z.string().optional()
export const tenantIdSchema = z.string().optional()
export const namespaceIdSchema = z.string().optional()
// File storage config schema
export const fileStorageConfigSchema = z.object({
type: z.nativeEnum(SourceSyncFileStorageType),
bucket: z.string(),
region: z.string(),
endpoint: z.string(),
credentials: z.object({
accessKeyId: z.string(),
secretAccessKey: z.string(),
}),
})
// Vector storage config schema
export const vectorStorageConfigSchema = z.object({
provider: z.nativeEnum(SourceSyncVectorStorageProvider),
apiKey: z.string(),
indexHost: z.string(),
})
// Embedding model config schema
export const embeddingModelConfigSchema = z.union([
z.object({
provider: z.literal(SourceSyncEmbeddingModelProvider.OPENAI),
model: z.nativeEnum(SourceSyncOpenAIEmbeddingModel),
apiKey: z.string(),
}),
z.object({
provider: z.literal(SourceSyncEmbeddingModelProvider.COHERE),
model: z.nativeEnum(SourceSyncCohereEmbeddingModel),
apiKey: z.string(),
}),
z.object({
provider: z.literal(SourceSyncEmbeddingModelProvider.JINA),
model: z.nativeEnum(SourceSyncJinaEmbeddingModel),
apiKey: z.string(),
}),
])
// Web scraper config schema
export const webScraperConfigSchema = z.object({
provider: z.nativeEnum(SourceSyncWebScraperProvider),
apiKey: z.string(),
})
// Connector config schemas
export const notionConfigSchema = z.object({
clientId: z.string(),
clientSecret: z.string(),
})
export const googleDriveConfigSchema = z.object({
clientId: z.string(),
clientSecret: z.string(),
apiKey: z.string(),
})
export const dropboxConfigSchema = z.object({
clientId: z.string(),
clientSecret: z.string(),
})
export const onedriveConfigSchema = z.object({
clientId: z.string(),
clientSecret: z.string(),
})
export const boxConfigSchema = z.object({
clientId: z.string(),
clientSecret: z.string(),
})
export const sharepointConfigSchema = z.object({
clientId: z.string(),
clientSecret: z.string(),
})
// Chunk config schema
export const chunkConfigSchema = z
.object({
chunkSize: z.number(),
chunkOverlap: z.number(),
})
.describe(
'Optional Chunk config. When not passed, default chunk config will be used.',
)
// Tool schemas
export const validateApiKeySchema = z.object({})
export const createNamespaceSchema = z.object({
name: z.string(),
fileStorageConfig: fileStorageConfigSchema,
vectorStorageConfig: vectorStorageConfigSchema,
embeddingModelConfig: embeddingModelConfigSchema,
webScraperConfig: webScraperConfigSchema.optional(),
tenantId: tenantIdSchema,
})
export const listNamespacesSchema = z.object({
tenantId: tenantIdSchema,
})
export const getNamespaceSchema = z.object({
namespaceId: namespaceIdSchema,
tenantId: tenantIdSchema,
})
export const updateNamespaceSchema = z.object({
namespaceId: namespaceIdSchema,
fileStorageConfig: fileStorageConfigSchema.optional(),
vectorStorageConfig: vectorStorageConfigSchema.optional(),
embeddingModelConfig: embeddingModelConfigSchema.optional(),
webScraperConfig: webScraperConfigSchema.optional(),
notionConfig: notionConfigSchema.optional(),
googleDriveConfig: googleDriveConfigSchema.optional(),
dropboxConfig: dropboxConfigSchema.optional(),
onedriveConfig: onedriveConfigSchema.optional(),
boxConfig: boxConfigSchema.optional(),
sharepointConfig: sharepointConfigSchema.optional(),
tenantId: tenantIdSchema,
})
export const deleteNamespaceSchema = z.object({
namespaceId: namespaceIdSchema,
tenantId: tenantIdSchema,
})
export const ingestTextSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
ingestConfig: z.object({
source: z.literal(SourceSyncIngestionSource.TEXT),
config: z.object({
name: z.string().optional(),
text: z.string(),
metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
}),
chunkConfig: chunkConfigSchema.optional(),
}),
tenantId: tenantIdSchema,
})
// Type inference from schemas
export type ValidateApiKeyParams = z.infer<typeof validateApiKeySchema>
export type CreateNamespaceParams = z.infer<typeof createNamespaceSchema>
export type ListNamespacesParams = z.infer<typeof listNamespacesSchema>
export type GetNamespaceParams = z.infer<typeof getNamespaceSchema>
export type UpdateNamespaceParams = z.infer<typeof updateNamespaceSchema>
export type DeleteNamespaceParams = z.infer<typeof deleteNamespaceSchema>
export type IngestTextParams = z.infer<typeof ingestTextSchema>
export const ScrapeOptionsSchema = z.object({
includeSelectors: z.array(z.string()).optional(),
excludeSelectors: z.array(z.string()).optional(),
})
// Authentication schemas
export const ValidateApiKeySchema = validateApiKeySchema
// Namespace schemas
export const FileStorageConfigSchema = fileStorageConfigSchema
export const VectorStorageConfigSchema = vectorStorageConfigSchema
export const EmbeddingModelConfigSchema = embeddingModelConfigSchema
export const WebScraperConfigSchema = webScraperConfigSchema
export const CreateNamespaceSchema = createNamespaceSchema
export const ListNamespacesSchema = listNamespacesSchema
export const GetNamespaceSchema = getNamespaceSchema
export const UpdateNamespaceSchema = updateNamespaceSchema
export const DeleteNamespaceSchema = deleteNamespaceSchema
// Ingestion schemas
export const IngestTextSchema = ingestTextSchema
export const IngestFileSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
file: z.instanceof(Blob),
metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
chunkConfig: chunkConfigSchema.optional(),
tenantId: tenantIdSchema,
})
export const IngestUrlsSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
ingestConfig: z.object({
source: z.literal(SourceSyncIngestionSource.URLS_LIST),
config: z.object({
urls: z.array(z.string()),
scrapeOptions: ScrapeOptionsSchema.optional(),
metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
}),
chunkConfig: chunkConfigSchema.optional(),
}),
tenantId: tenantIdSchema,
})
export const IngestSitemapSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
ingestConfig: z.object({
source: z.literal(SourceSyncIngestionSource.SITEMAP),
config: z.object({
url: z.string(),
maxLinks: z.number().optional(),
includePaths: z.array(z.string()).optional(),
excludePaths: z.array(z.string()).optional(),
metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
}),
chunkConfig: chunkConfigSchema.optional(),
}),
tenantId: tenantIdSchema,
})
export const IngestWebsiteSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
ingestConfig: z.object({
source: z.literal(SourceSyncIngestionSource.WEBSITE),
config: z.object({
url: z.string(),
maxDepth: z.number().optional(),
maxLinks: z.number().optional(),
includePaths: z.array(z.string()).optional(),
excludePaths: z.array(z.string()).optional(),
metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
}),
chunkConfig: chunkConfigSchema.optional(),
}),
tenantId: tenantIdSchema,
})
export const IngestGoogleDriveSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
connectionId: z.string().min(1, 'Connection ID is required'),
folderId: z.string().optional(),
metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
apiKey: apiKeySchema.optional(),
tenantId: tenantIdSchema,
})
export const IngestNotionSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
connectionId: z.string().min(1, 'Connection ID is required'),
pageId: z.string().optional(),
metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
apiKey: apiKeySchema.optional(),
tenantId: tenantIdSchema,
})
export const IngestConnectorSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
ingestConfig: z.object({
source: z.string(),
config: z.object({
connectionId: z.string(),
metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
}),
chunkConfig: chunkConfigSchema.optional(),
}),
tenantId: tenantIdSchema,
})
export const IngestJobRunStatusSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
ingestJobRunId: z.string(),
tenantId: tenantIdSchema,
})
// Document schemas
export const DocumentFilterConfigSchema = z.object({
documentIds: z.array(z.string()).optional(),
documentExternalIds: z.array(z.string()).optional(),
documentConnectionIds: z.array(z.string()).optional(),
documentTypes: z.array(z.nativeEnum(SourceSyncDocumentType)).optional(),
documentIngestionSources: z
.array(z.nativeEnum(SourceSyncIngestionSource))
.optional(),
documentIngestionStatuses: z
.array(z.nativeEnum(SourceSyncIngestionStatus))
.optional(),
metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
})
export const DocumentIncludeConfigSchema = z.object({
documents: z.boolean().optional(),
rawFileUrl: z.boolean().optional(),
parsedTextFileUrl: z.boolean().optional(),
statsBySource: z.boolean().optional(),
statsByStatus: z.boolean().optional(),
})
export const PaginationSchema = z.object({
pageSize: z.number().optional(),
cursor: z.string().optional(),
})
export const FilterConfigSchema = z.object({
documentIds: z.array(z.string()).optional(),
documentExternalIds: z.array(z.string()).optional(),
documentConnectionIds: z.array(z.string()).optional(),
documentTypes: z
.array(
z.enum([
'TEXT',
'URL',
'FILE',
'NOTION_DOCUMENT',
'GOOGLE_DRIVE_DOCUMENT',
'DROPBOX_DOCUMENT',
'ONEDRIVE_DOCUMENT',
'BOX_DOCUMENT',
'SHAREPOINT_DOCUMENT',
]),
)
.optional(),
documentIngestionSources: z
.array(
z.enum([
'TEXT',
'URLS_LIST',
'SITEMAP',
'WEBSITE',
'LOCAL_FILE',
'NOTION',
'GOOGLE_DRIVE',
'DROPBOX',
'ONEDRIVE',
'BOX',
'SHAREPOINT',
]),
)
.optional(),
documentIngestionStatuses: z
.array(
z.enum([
'BACKLOG',
'QUEUED',
'QUEUED_FOR_RESYNC',
'PROCESSING',
'SUCCESS',
'FAILED',
'CANCELLED',
]),
)
.optional(),
metadata: z.record(z.string()).optional(),
})
export const FetchDocumentsSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
documentIds: z.array(z.string()).optional(),
pagination: PaginationSchema.optional(),
tenantId: tenantIdSchema,
filterConfig: FilterConfigSchema,
includeConfig: z
.object({
documents: z.boolean().optional(),
stats: z.boolean().optional(),
statsBySource: z.boolean().optional(),
statsByStatus: z.boolean().optional(),
rawFileUrl: z.boolean().optional(),
parsedTextFileUrl: z.boolean().optional(),
})
.optional(),
})
export const UpdateDocumentsSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
documents: z.array(
z.object({
documentId: z.string(),
metadata: z.record(z.string()).optional(),
}),
),
tenantId: tenantIdSchema,
filterConfig: FilterConfigSchema,
data: z.object({
metadata: z.record(z.string()).optional(),
$metadata: z
.object({
$set: z.record(z.union([z.string(), z.array(z.string())])).optional(),
$append: z.record(z.array(z.string())).optional(),
$remove: z.record(z.array(z.string())).optional(),
})
.optional(),
}),
})
export const DeleteDocumentsSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
documentIds: z.array(z.string()).optional(),
tenantId: tenantIdSchema,
filterConfig: FilterConfigSchema,
})
export const ResyncDocumentsSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
documentIds: z.array(z.string()).optional(),
tenantId: tenantIdSchema,
filterConfig: FilterConfigSchema,
})
// Search schemas
export const SearchTypeEnum = z.nativeEnum(SourceSyncSearchType)
export const SearchFilterSchema = z.object({
metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
})
export const SemanticSearchSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
query: z.string(),
topK: z.number().optional(),
scoreThreshold: z.number().optional(),
filter: z
.object({
metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
})
.optional(),
tenantId: tenantIdSchema,
searchType: SearchTypeEnum.optional(),
})
export const HybridConfigSchema = z.object({
semanticWeight: z.number(),
keywordWeight: z.number(),
})
export const HybridSearchSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
query: z.string(),
topK: z.number().optional(),
scoreThreshold: z.number().optional(),
filter: z
.object({
metadata: z.record(z.union([z.string(), z.array(z.string())])).optional(),
})
.optional(),
hybridConfig: z.object({
semanticWeight: z.number(),
keywordWeight: z.number(),
}),
tenantId: tenantIdSchema,
searchType: SearchTypeEnum.optional(),
})
// Connection schemas
export const ConnectorEnum = z.nativeEnum(SourceSyncConnector)
export const CreateConnectionSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
name: z.string(),
connector: ConnectorEnum,
clientRedirectUrl: z.string().optional(),
tenantId: tenantIdSchema,
})
export const ListConnectionsSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
connector: ConnectorEnum.optional(),
tenantId: tenantIdSchema,
})
export const GetConnectionSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
connectionId: z.string(),
tenantId: tenantIdSchema,
})
export const UpdateConnectionSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
connectionId: z.string(),
name: z.string().optional(),
clientRedirectUrl: z.string().optional(),
tenantId: tenantIdSchema,
})
export const RevokeConnectionSchema = z.object({
namespaceId: namespaceIdSchema.optional(),
connectionId: z.string(),
tenantId: tenantIdSchema,
})
// Utility schemas
export const FetchUrlContentSchema = z.object({
url: z.string().url(),
// Authentication might be needed for some SourceSync URLs
apiKey: apiKeySchema,
tenantId: tenantIdSchema,
})
export type FetchUrlContentParams = z.infer<typeof FetchUrlContentSchema>