/**
* DatabaseValidator - Validates data against Notion database schemas
*
* This validator checks data before insertion to catch errors early and
* provide clear feedback to users.
*
* Can work in two modes:
* 1. With Notion client - fetches schema directly from Notion API
* 2. Schema-only mode - validates against provided schema (recommended for MCP)
*/
import type { Client } from '@notionhq/client'
export interface ValidationError {
field: string
error: string
expected?: string
received?: string
}
export interface ValidationResult {
valid: boolean
errors: ValidationError[]
warnings: string[]
schema?: any // Optional: return schema for reference
}
export interface DatabaseSchema {
database_id: string
properties: Record<string, any>
title?: any
url?: string
}
export class DatabaseValidator {
private notion?: Client
private schemaCache: Map<string, { schema: any; timestamp: number }> = new Map()
private CACHE_TTL = 5 * 60 * 1000 // 5 minutes
constructor(notionOrSchema?: Client) {
// If a Notion client is provided, use it for fetching schemas
if (notionOrSchema) {
this.notion = notionOrSchema
}
}
/**
* Validate data against a provided schema (recommended for MCP)
*/
validateWithSchema(
schema: DatabaseSchema,
rowData: Record<string, any>
): ValidationResult {
const errors: ValidationError[] = []
const warnings: string[] = []
try {
const schemaProperties = schema.properties
// 1. Check for missing required fields
const requiredFields = this.getRequiredFields(schemaProperties)
for (const field of requiredFields) {
if (!(field in rowData) || rowData[field] === null || rowData[field] === undefined) {
errors.push({
field,
error: 'Required field is missing',
expected: 'non-null value',
received: 'undefined'
})
}
}
// 2. Validate each provided field
for (const [key, value] of Object.entries(rowData)) {
const schemaProp = schemaProperties[key]
// Check if field exists in schema
if (!schemaProp) {
warnings.push(`Property "${key}" not found in database schema - will be ignored`)
continue
}
// Validate based on property type
const fieldError = this.validateField(key, value, schemaProp)
if (fieldError) {
errors.push(fieldError)
}
}
// 3. Check data size limits
const dataSize = JSON.stringify(rowData).length
if (dataSize > 1_000_000) {
errors.push({
field: '_data',
error: 'Data payload too large',
expected: '< 1MB',
received: `${(dataSize / 1_000_000).toFixed(2)}MB`
})
}
// 4. Additional validation rules
this.validateBusinessRules(rowData, schemaProperties, warnings)
return {
valid: errors.length === 0,
errors,
warnings,
schema: Object.keys(schemaProperties).reduce((acc, key) => {
acc[key] = {
type: schemaProperties[key].type,
required: requiredFields.includes(key)
}
return acc
}, {} as any)
}
} catch (error) {
return {
valid: false,
errors: [{
field: '_system',
error: `Validation failed: ${error instanceof Error ? error.message : 'Unknown error'}`
}],
warnings: []
}
}
}
/**
* Validate data against a Notion database schema (legacy method, requires Notion client)
*/
async validateInsertData(
databaseId: string,
rowData: Record<string, any>
): Promise<ValidationResult> {
if (!this.notion) {
return {
valid: false,
errors: [{
field: '_system',
error: 'Notion client not provided. Use validateWithSchema() instead or provide a Notion client.'
}],
warnings: []
}
}
try {
// 1. Get database schema (with caching)
const database = await this.getDatabaseSchema(databaseId)
const schemaProperties = database.properties
// 2. Use the schema-based validation
return this.validateWithSchema({
database_id: databaseId,
properties: schemaProperties
}, rowData)
} catch (error) {
return {
valid: false,
errors: [{
field: '_system',
error: `Validation failed: ${error instanceof Error ? error.message : 'Unknown error'}`
}],
warnings: []
}
}
}
/**
* Get database schema with caching (only works with Notion client)
*/
private async getDatabaseSchema(databaseId: string): Promise<any> {
if (!this.notion) {
throw new Error('Notion client not available')
}
const cached = this.schemaCache.get(databaseId)
const now = Date.now()
if (cached && (now - cached.timestamp) < this.CACHE_TTL) {
return cached.schema
}
const database = await this.notion.databases.retrieve({
database_id: databaseId
})
this.schemaCache.set(databaseId, {
schema: database,
timestamp: now
})
return database
}
/**
* Get list of required fields from schema
*/
private getRequiredFields(schemaProperties: any): string[] {
const required: string[] = []
for (const [key, prop] of Object.entries(schemaProperties) as any) {
// Title fields are always required in Notion
if (prop.type === 'title') {
required.push(key)
}
}
return required
}
/**
* Validate individual field based on its type
*/
private validateField(
key: string,
value: any,
schemaProp: any
): ValidationError | null {
const propType = schemaProp.type
switch (propType) {
case 'title':
case 'rich_text':
if (typeof value !== 'string') {
return {
field: key,
error: 'Invalid type',
expected: 'string',
received: typeof value
}
}
if (value.length > 2000) {
return {
field: key,
error: 'Text too long',
expected: '≤ 2000 characters',
received: `${value.length} characters`
}
}
break
case 'number':
if (typeof value !== 'number' || isNaN(value)) {
return {
field: key,
error: 'Invalid type',
expected: 'number',
received: typeof value
}
}
break
case 'checkbox':
if (typeof value !== 'boolean') {
return {
field: key,
error: 'Invalid type',
expected: 'boolean',
received: typeof value
}
}
break
case 'select':
if (typeof value !== 'string') {
return {
field: key,
error: 'Invalid type',
expected: 'string',
received: typeof value
}
}
// Validate against allowed options
const selectOptions = schemaProp.select?.options || []
const validOptions = selectOptions.map((opt: any) => opt.name)
if (validOptions.length > 0 && !validOptions.includes(value)) {
return {
field: key,
error: 'Invalid select option',
expected: `One of: ${validOptions.join(', ')}`,
received: value
}
}
break
case 'multi_select':
if (!Array.isArray(value)) {
return {
field: key,
error: 'Invalid type',
expected: 'array of strings',
received: typeof value
}
}
// Validate each option
const multiSelectOptions = schemaProp.multi_select?.options || []
const validMultiOptions = multiSelectOptions.map((opt: any) => opt.name)
for (const item of value) {
if (typeof item !== 'string') {
return {
field: key,
error: 'Invalid multi-select item type',
expected: 'array of strings',
received: `array containing ${typeof item}`
}
}
if (validMultiOptions.length > 0 && !validMultiOptions.includes(item)) {
return {
field: key,
error: 'Invalid multi-select option',
expected: `Array of: ${validMultiOptions.join(', ')}`,
received: JSON.stringify(value)
}
}
}
break
case 'email':
if (typeof value !== 'string' || !this.isValidEmail(value)) {
return {
field: key,
error: 'Invalid email format',
expected: 'valid email address',
received: String(value)
}
}
break
case 'url':
if (typeof value !== 'string' || !this.isValidUrl(value)) {
return {
field: key,
error: 'Invalid URL format',
expected: 'valid URL (e.g., https://example.com)',
received: String(value)
}
}
break
case 'date':
if (typeof value !== 'string' || !this.isValidDate(value)) {
return {
field: key,
error: 'Invalid date format',
expected: 'ISO 8601 date (YYYY-MM-DD or YYYY-MM-DDTHH:mm:ss)',
received: String(value)
}
}
break
case 'phone_number':
if (typeof value !== 'string') {
return {
field: key,
error: 'Invalid type',
expected: 'string',
received: typeof value
}
}
break
// Read-only fields that shouldn't be set
case 'created_time':
case 'created_by':
case 'last_edited_time':
case 'last_edited_by':
case 'formula':
case 'rollup':
return {
field: key,
error: 'Field is read-only and cannot be set',
expected: 'omit this field'
}
case 'relation':
if (!Array.isArray(value)) {
return {
field: key,
error: 'Invalid type',
expected: 'array of page IDs',
received: typeof value
}
}
break
case 'people':
if (!Array.isArray(value)) {
return {
field: key,
error: 'Invalid type',
expected: 'array of user IDs',
received: typeof value
}
}
break
case 'files':
if (!Array.isArray(value)) {
return {
field: key,
error: 'Invalid type',
expected: 'array of file URLs or objects',
received: typeof value
}
}
break
default:
console.warn(`Unknown property type: ${propType} for field: ${key}`)
}
return null
}
/**
* Additional business rule validations
*/
private validateBusinessRules(
rowData: Record<string, any>,
schemaProperties: any,
warnings: string[]
): void {
// Check if at least one title field is present
const titleFields = Object.entries(schemaProperties)
.filter(([_, prop]: [string, any]) => prop.type === 'title')
.map(([key]) => key)
const hasTitleValue = titleFields.some(
field => field in rowData && rowData[field] && String(rowData[field]).trim().length > 0
)
if (!hasTitleValue) {
warnings.push('No title field with value found - entry may be difficult to identify in Notion')
}
// Check for common naming patterns
const commonTitleFields = ['Title', 'Name', 'title', 'name']
const hasCommonTitle = commonTitleFields.some(field =>
field in rowData && rowData[field] && String(rowData[field]).trim().length > 0
)
if (!hasCommonTitle && titleFields.length > 0) {
warnings.push(`Consider using one of these title fields: ${titleFields.join(', ')}`)
}
}
// Validation helpers
private isValidEmail(email: string): boolean {
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
return emailRegex.test(email)
}
private isValidUrl(url: string): boolean {
try {
new URL(url)
return true
} catch {
return false
}
}
private isValidDate(date: string): boolean {
// ISO 8601 format: YYYY-MM-DD or full datetime
const dateRegex = /^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{3})?Z?)?$/
if (!dateRegex.test(date)) return false
const parsed = new Date(date)
return !isNaN(parsed.getTime())
}
/**
* Clear schema cache
*/
clearCache(): void {
this.schemaCache.clear()
}
}