/**
* Extensible schema builder for custom fields
*/
import type { Schema } from '@orama/orama';
import { baseSchema, createEmptyDocument, type BaseDocument } from './base-schema.js';
import type {
ExtractedType,
ExtractedMethod,
ExtractedFunction,
PropertyInfo,
} from '../extractor/types.js';
import type { CachedScript } from '../script/types.js';
import { splitCamelCase } from '../extractor/ast-parser.js';
/**
* Extension schema type - additional fields to add to base schema
*/
export type ExtensionSchema = Record<string, 'string' | 'enum' | 'number' | 'boolean'>;
/**
* Options for building an extended schema
*/
export interface SchemaBuilderOptions<T extends ExtensionSchema> {
/** Additional fields to add to the base schema */
extensions?: T;
/** Tokenizer options */
tokenizerOptions?: {
stemming?: boolean;
stemmerSkipProperties?: string[];
};
}
/**
* Build a schema with optional extensions
*/
export function buildSchema<T extends ExtensionSchema = Record<string, never>>(
options?: SchemaBuilderOptions<T>
): typeof baseSchema & T {
if (!options?.extensions) {
return baseSchema as typeof baseSchema & T;
}
return {
...baseSchema,
...options.extensions,
} as typeof baseSchema & T;
}
/**
* Build a document from an extracted type
*/
export function buildTypeDocument(type: ExtractedType): BaseDocument {
const searchTokens = [
type.name,
splitCamelCase(type.name),
type.kind,
type.description,
...type.properties.map((p) => p.name),
...type.nestedTypes,
].join(' ');
const typeDefinition = formatTypeDefinition(type);
return {
...createEmptyDocument(),
id: `type:${type.library}:${type.name}`,
documentType: 'type',
name: type.name,
description: type.description,
searchTokens,
library: type.library,
category: type.kind,
properties: JSON.stringify(type.properties),
typeDefinition,
nestedTypes: type.nestedTypes.join(', '),
typeKind: type.kind,
};
}
/**
* Build a document from an extracted method
*/
export function buildMethodDocument(method: ExtractedMethod): BaseDocument {
const searchTokens = [
method.name,
splitCamelCase(method.name),
method.className,
method.description,
...method.parameters.map((p) => p.name),
method.returnType,
].join(' ');
const signature = buildMethodSignature(method);
const category = inferMethodCategory(method.name);
return {
...createEmptyDocument(),
id: `method:${method.library}:${method.className}:${method.name}`,
documentType: 'method',
name: method.name,
description: method.description,
searchTokens,
library: method.library,
category,
parameters: JSON.stringify(method.parameters),
returnType: method.returnType,
returnTypeDefinition: method.returnTypeDefinition || '',
signature,
className: method.className,
};
}
/**
* Build a document from an extracted function
*/
export function buildFunctionDocument(func: ExtractedFunction): BaseDocument {
const searchTokens = [
func.name,
splitCamelCase(func.name),
func.description,
...func.parameters.map((p) => p.name),
func.returnType,
].join(' ');
const category = inferFunctionCategory(func.name, func.library);
return {
...createEmptyDocument(),
id: `function:${func.library}:${func.name}`,
documentType: 'function',
name: func.name,
description: func.description,
searchTokens,
library: func.library,
category,
parameters: JSON.stringify(func.parameters),
returnType: func.returnType,
signature: func.signature,
};
}
/**
* Build a document from a cached script
*/
export function buildScriptDocument(script: CachedScript): BaseDocument {
const isAutoGenerated = /^script-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-[a-f0-9]+\.ts$/.test(
script.filename
);
const searchTokens = [
'script',
// Only include filename tokens for manually named scripts
...(isAutoGenerated
? []
: [script.filename.replace(/\.ts$/, '').replace(/[-_]/g, ' ')]),
...script.resourceTypes,
script.description,
...script.apiClasses,
...script.keywords,
].join(' ');
const displayName = isAutoGenerated
? script.apiClasses.length > 0
? script.apiClasses[0]!.toLowerCase()
: 'sandbox-script'
: script.filename.replace(/\.ts$/, '');
return {
...createEmptyDocument(),
id: `script:${script.filename}`,
documentType: 'script',
name: displayName,
description: script.description,
searchTokens,
library: script.apiClasses.length > 0 ? script.apiClasses[0]! : 'CachedScript',
category: 'script',
filePath: script.filePath,
keywords: script.keywords.join(' '),
};
}
/**
* Build a document from any extracted item
*/
export function buildDocument(
item: ExtractedType | ExtractedMethod | ExtractedFunction | CachedScript
): BaseDocument {
if ('kind' in item) {
return buildTypeDocument(item as ExtractedType);
}
if ('className' in item) {
return buildMethodDocument(item as ExtractedMethod);
}
if ('signature' in item) {
return buildFunctionDocument(item as ExtractedFunction);
}
return buildScriptDocument(item as CachedScript);
}
/**
* Format a type definition for display
*/
function formatTypeDefinition(type: ExtractedType, maxProperties: number = 20): string {
let result = `${type.kind} ${type.name} {\n`;
const propsToShow = type.properties.slice(0, maxProperties);
const hasMore = type.properties.length > maxProperties;
for (const prop of propsToShow) {
const optionalMarker = prop.optional ? '?' : '';
result += ` ${prop.name}${optionalMarker}: ${prop.type}\n`;
}
if (hasMore) {
result += ` ... ${type.properties.length - maxProperties} more properties\n`;
}
result += `}`;
return result;
}
/**
* Build a method signature string
*/
function buildMethodSignature(method: ExtractedMethod): string {
const params = method.parameters
.map((p) => `${p.name}${p.optional ? '?' : ''}: ${p.type}`)
.join(', ');
const asyncPrefix = method.isAsync ? 'async ' : '';
const staticPrefix = method.isStatic ? 'static ' : '';
return `${staticPrefix}${asyncPrefix}${method.name}(${params}): ${method.returnType}`;
}
/**
* Infer category from method name
*/
function inferMethodCategory(methodName: string): string {
const lowerName = methodName.toLowerCase();
if (lowerName.startsWith('list')) return 'list';
if (lowerName.startsWith('get') || lowerName.startsWith('read')) return 'read';
if (lowerName.startsWith('create') || lowerName.startsWith('add')) return 'create';
if (lowerName.startsWith('delete') || lowerName.startsWith('remove')) return 'delete';
if (lowerName.startsWith('patch')) return 'patch';
if (lowerName.startsWith('update')) return 'update';
if (lowerName.startsWith('replace') || lowerName.startsWith('set')) return 'replace';
if (lowerName.startsWith('watch')) return 'watch';
if (lowerName.startsWith('query')) return 'query';
return 'other';
}
/**
* Infer category from function name and library
*/
function inferFunctionCategory(funcName: string, library: string): string {
const lowerName = funcName.toLowerCase();
const lowerLib = library.toLowerCase();
// Statistics functions
if (lowerLib.includes('statistics') || lowerLib.includes('stats')) {
if (['mean', 'median', 'mode', 'variance', 'std'].some((s) => lowerName.includes(s))) {
return 'descriptive';
}
if (lowerName.includes('regress')) return 'regression';
if (['normal', 'poisson', 'binomial'].some((s) => lowerName.includes(s))) {
return 'distribution';
}
}
// Math functions
if (lowerLib.includes('math')) {
if (['matrix', 'transpose', 'inverse', 'det'].some((s) => lowerName.includes(s))) {
return 'matrix';
}
if (['sin', 'cos', 'tan', 'log', 'exp'].some((s) => lowerName.includes(s))) {
return 'math';
}
}
// Signal processing
if (lowerLib.includes('fft') || lowerName.includes('fft')) {
return 'signal';
}
return 'utility';
}