/**
* Calculate the size of a Convex value in bytes.
*
* This matches the Rust implementation in crates/value/src/ which is used
* to compute bandwidth and document size limits.
*
* Size formula by type:
* - Null: 1 byte (type marker)
* - Boolean: 1 byte (type marker, value stored in marker)
* - Int64 (bigint): 9 bytes (1 type marker + 8 bytes for 64-bit value)
* - Float64 (number): 9 bytes (1 type marker + 8 bytes for 64-bit value)
* - String: 2 + string.length bytes (1 type marker + UTF-8 bytes + 1 null terminator)
* - Bytes (ArrayBuffer): 2 + bytes.length bytes (1 type marker + bytes + 1 terminator)
* - Array: 2 + sum(element sizes) bytes (1 type marker + elements + 1 terminator)
* - Object: 2 + sum(field_name.length + 1 + value.size) bytes
* (1 type marker + (field_name + null terminator + value)* + 1 terminator)
*
* For documents with system fields (_id and _creationTime), the size includes:
* - _id field: 4 bytes (field name + null) + string size (2 + 31+ chars)
* - _creationTime field: 14 bytes (field name + null) + 9 bytes (Float64)
*
* @module
*/
import type { Value } from "./value.js";
import { isSimpleObject } from "../common/index.js";
/**
* Calculate the size in bytes of a Convex value.
*
* This matches how Convex calculates document size for bandwidth tracking
* and size limit enforcement.
*
* @param value - A Convex value to measure
* @returns The size in bytes
*
* @public
*/
export function getConvexSize(value: Value): number {
if (value === null) {
return 1;
}
if (typeof value === "boolean") {
return 1;
}
if (typeof value === "bigint") {
// Int64: 1 byte type marker + 8 bytes value
return 9;
}
if (typeof value === "number") {
// Float64: 1 byte type marker + 8 bytes value
return 9;
}
if (typeof value === "string") {
// String: 1 byte type marker + UTF-8 bytes + 1 byte null terminator
// Use TextEncoder to get the actual UTF-8 byte length
return 2 + getUtf8ByteLength(value);
}
if (value instanceof ArrayBuffer) {
// Bytes: 1 byte type marker + byte array length + 1 byte terminator
return 2 + value.byteLength;
}
if (Array.isArray(value)) {
// Array: 1 byte type marker + sum of element sizes + 1 byte terminator
let size = 2; // marker + terminator
for (const element of value) {
size += getConvexSize(element);
}
return size;
}
if (isSimpleObject(value)) {
// Object: 1 byte type marker + sum(field_name + null + value) + 1 byte terminator
let size = 2; // marker + terminator
for (const [key, val] of Object.entries(value)) {
if (val !== undefined) {
// field name length + null terminator + value size
size += getUtf8ByteLength(key) + 1 + getConvexSize(val);
}
}
return size;
}
throw new Error(`Unsupported value type: ${typeof value}`);
}
/**
* Threshold above which we use TextEncoder instead of manual counting.
* For short strings, the JS loop is ~15x faster due to avoiding allocation.
* For long strings (500+ chars), TextEncoder's native implementation wins.
*/
const UTF8_LENGTH_THRESHOLD = 500;
/**
* Get the UTF-8 byte length of a string.
*
* For short strings, counts bytes directly from UTF-16 code units to avoid allocation.
* For long strings, uses native TextEncoder which is faster despite allocation.
*
* @internal
*/
function getUtf8ByteLength(str: string): number {
// For long strings, native TextEncoder is faster despite allocation
if (str.length > UTF8_LENGTH_THRESHOLD) {
return new TextEncoder().encode(str).length;
}
// For short strings, avoid allocation overhead with manual counting
let bytes = 0;
for (let i = 0; i < str.length; i++) {
const code = str.charCodeAt(i);
if (code < 0x80) {
// ASCII: 1 byte
bytes += 1;
} else if (code < 0x800) {
// 2-byte UTF-8
bytes += 2;
} else if (code >= 0xd800 && code <= 0xdbff) {
// High surrogate - part of a surrogate pair encoding a code point >= U+10000
// These are encoded as 4 bytes in UTF-8
bytes += 4;
i++; // Skip the low surrogate
} else {
// 3-byte UTF-8 (includes low surrogates if encountered alone, which is invalid but handled)
bytes += 3;
}
}
return bytes;
}
// Note: The exact _id size varies based on the table number:
// - Tables 1-127: 31 char ID
// - Tables 128-16383: 32 char ID
// We use 32 chars as a typical value (tables 128-16383).
export const SYSTEM_FIELD_ID_ESTIMATE = 38;
// _creationTime: field name (14) + Float64 (9)
export const SYSTEM_FIELD_CREATION_TIME_SIZE = 23;
/**
* Calculate the size of a document including system fields.
*
* If your value already has _id and _creationTime fields, this will count them
* in the normal size calculation. Otherwise, it adds the constant overhead
* for system fields.
*
* @param value - A Convex object (document body)
* @param options - Options for size calculation
* @returns The size in bytes
*
* @public
*/
export function getDocumentSize(
value: Record<string, Value>,
options?: {
/**
* @internal
* Length of the _id field if it is missing. Defaults to standard length.
*/
customIdLength?: number;
},
): number {
const baseSize = getConvexSize(value);
// Check if system fields are already present
const hasId = "_id" in value && value["_id"] !== undefined;
const hasCreationTime =
"_creationTime" in value && value["_creationTime"] !== undefined;
if (hasId && hasCreationTime) {
return baseSize;
}
// Add size for missing system fields
let additionalSize = 0;
if (!hasId) {
if (options?.customIdLength) {
// 4 bytes for _id field name + 2 bytes for string overhead.
additionalSize += options.customIdLength + 6;
} else {
additionalSize += SYSTEM_FIELD_ID_ESTIMATE;
}
}
if (!hasCreationTime) {
additionalSize += SYSTEM_FIELD_CREATION_TIME_SIZE;
}
// The base size includes 2 bytes for object markers, but we need to account
// for adding new fields to an existing object structure
return baseSize + additionalSize;
}