import { Chunker, MetadataChunk, ChunkingContext, Reference } from './types';
import { v4 as uuidv4 } from 'uuid';
export abstract class BaseChunker implements Chunker {
protected context: ChunkingContext;
constructor(context: ChunkingContext) {
this.context = context;
}
abstract chunkMetadata(metadata: any): MetadataChunk[];
protected createBaseChunk(
name: string,
content: string,
path: string = '',
symbols: string[] = [],
references: Reference[] = []
): MetadataChunk {
return {
id: this.generateChunkId(name, path),
orgId: this.context.orgId,
type: this.context.metadataType,
name,
content: content.trim(),
symbols,
references,
path,
raw: this.context.originalMetadata,
metadata: {
size: content.length,
lineCount: content.split('\n').length,
createdDate: this.context.originalMetadata.createdDate,
lastModifiedDate: this.context.originalMetadata.lastModifiedDate
}
};
}
protected generateChunkId(name: string, path: string): string {
const prefix = `${this.context.orgId}_${this.context.metadataType}_${name}`;
const pathSuffix = path ? `_${path.replace(/[^a-zA-Z0-9]/g, '_')}` : '';
return `${prefix}${pathSuffix}_${uuidv4().substring(0, 8)}`;
}
protected extractBasicSymbols(content: string): string[] {
const symbols = new Set<string>();
// Extract Salesforce object references (e.g., Account, Contact)
const objectMatches = content.match(/\b[A-Z][a-zA-Z0-9_]*__c\b/g) || [];
objectMatches.forEach(match => symbols.add(match));
// Extract standard objects
const standardObjects = content.match(/\b(Account|Contact|Lead|Opportunity|Case|User|Profile)\b/g) || [];
standardObjects.forEach(match => symbols.add(match));
// Extract field references (Object.Field or Object__c.Field__c)
const fieldMatches = content.match(/\b[A-Z][a-zA-Z0-9_]*(__c)?\.[A-Za-z][a-zA-Z0-9_]*(__c)?\b/g) || [];
fieldMatches.forEach(match => symbols.add(match));
return Array.from(symbols);
}
protected extractBasicReferences(content: string): Reference[] {
const references: Reference[] = [];
// Extract object references
const objectMatches = content.match(/\b[A-Z][a-zA-Z0-9_]*(__c)?\b/g) || [];
objectMatches.forEach(match => {
if (match.endsWith('__c') || ['Account', 'Contact', 'Lead', 'Opportunity', 'Case'].includes(match)) {
references.push({
type: 'object',
name: match,
fullName: match
});
}
});
// Extract field references
const fieldMatches = content.match(/\b([A-Z][a-zA-Z0-9_]*(__c)?)\.([A-Za-z][a-zA-Z0-9_]*(__c)?)\b/g) || [];
fieldMatches.forEach(match => {
const [object, field] = match.split('.');
references.push({
type: 'field',
name: field,
fullName: match,
context: object
});
});
return references;
}
}