/**
* Core Random Tree Model builder
* Implements the recursive partitioning algorithm for narrative encoding
* @module core/rtm-builder
*/
import { v4 as uuidv4 } from 'uuid';
import {
Clause,
RTMNode,
RandomTree,
RTMParameters,
Narrative,
TemporalScale
} from '../types/rtm.js';
import {
calculateCompressionRatio
} from './rtm-math.js';
/**
* Builds a Random Tree from a narrative using the RTM algorithm
*/
export class RTMTreeBuilder {
private nodeCounter: number = 0;
private nodes: Map<string, RTMNode> = new Map();
constructor(private parameters: RTMParameters) {}
/**
* Build a complete Random Tree from a narrative
* @param narrative - The narrative to encode
* @returns A complete Random Tree structure
*/
buildTree(narrative: Narrative): RandomTree {
this.nodeCounter = 0;
this.nodes.clear();
// Create root node representing entire narrative
const rootNode = this.createNode(
narrative.clauses.map(c => c.id),
1, // Level 1 is root
'document'
);
// Recursively partition the narrative
this.recursivePartition(rootNode, narrative.clauses, 1);
return {
id: uuidv4(),
rootNodeId: rootNode.id,
nodes: this.nodes,
parameters: this.parameters,
created: new Date(),
modified: new Date(),
sourceNarrativeId: narrative.id
};
}
/**
* Create a new RTM node
* @param clauseIds - IDs of clauses this node represents
* @param level - Hierarchical level (1 = root)
* @param scale - Temporal scale
* @returns The created node
*/
private createNode(
clauseIds: string[],
level: number,
scale: TemporalScale
): RTMNode {
const nodeId = `node_${this.nodeCounter++}`;
const node: RTMNode = {
id: nodeId,
level,
clauses: clauseIds,
children: [],
compressionRatio: calculateCompressionRatio(
clauseIds.length,
scale,
this.parameters
),
temporalScale: scale
};
this.nodes.set(nodeId, node);
return node;
}
/**
* Recursively partition a node into child nodes
* @param parentNode - Node to partition
* @param clauses - Clauses represented by this node
* @param currentDepth - Current depth in the tree
*/
private recursivePartition(
parentNode: RTMNode,
clauses: Clause[],
currentDepth: number
): void {
// Stop if we've reached max depth or minimum segment size
if (currentDepth >= this.parameters.maxRecallDepth ||
clauses.length <= this.parameters.minSegmentSize) {
return;
}
// Determine number of partitions (1 to K)
const k = this.selectPartitionCount(
clauses.length,
this.parameters.maxBranchingFactor
);
if (k === 1) {
// No further partitioning
return;
}
// Generate random partition boundaries
const partitions = this.generatePartitions(clauses, k);
// Create child nodes
const childScale = this.getChildScale(parentNode.temporalScale);
partitions.forEach(partition => {
const childNode = this.createNode(
partition.map(c => c.id),
parentNode.level + 1,
childScale
);
// Link parent and child
parentNode.children.push(childNode.id);
childNode.parent = parentNode.id;
// Recursively partition the child
this.recursivePartition(childNode, partition, currentDepth + 1);
});
}
/**
* Select the number of partitions for a segment
* Uses probabilistic selection based on segment size
* @param segmentSize - Number of clauses in segment
* @param maxK - Maximum branching factor
* @returns Number of partitions
*/
private selectPartitionCount(segmentSize: number, maxK: number): number {
if (segmentSize <= 1) return 1;
// Bias towards more partitions for larger segments
const avgK = Math.min(Math.ceil(Math.sqrt(segmentSize)), maxK);
// Add some randomness
const variance = Math.floor(avgK * 0.3);
const k = avgK + Math.floor(Math.random() * variance * 2 - variance);
return Math.max(1, Math.min(k, maxK, segmentSize));
}
/**
* Generate random partitions of clauses
* @param clauses - Clauses to partition
* @param k - Number of partitions
* @returns Array of clause partitions
*/
private generatePartitions(clauses: Clause[], k: number): Clause[][] {
if (k === 1) return [clauses];
const n = clauses.length;
const partitions: Clause[][] = [];
// Generate k-1 random boundaries
const boundaries = new Set<number>();
boundaries.add(0);
boundaries.add(n);
while (boundaries.size < k + 1) {
boundaries.add(Math.floor(Math.random() * (n - 1)) + 1);
}
// Sort boundaries and create partitions
const sortedBoundaries = Array.from(boundaries).sort((a, b) => a - b);
for (let i = 0; i < sortedBoundaries.length - 1; i++) {
const start = sortedBoundaries[i];
const end = sortedBoundaries[i + 1];
partitions.push(clauses.slice(start, end));
}
return partitions.filter(p => p.length > 0);
}
/**
* Determine the temporal scale for child nodes
* @param parentScale - Parent's temporal scale
* @returns Child temporal scale
*/
private getChildScale(parentScale: TemporalScale): TemporalScale {
const scaleHierarchy: TemporalScale[] = [
'document',
'chapter',
'section',
'paragraph',
'sentence',
'clause'
];
const parentIndex = scaleHierarchy.indexOf(parentScale);
const childIndex = Math.min(parentIndex + 1, scaleHierarchy.length - 1);
return scaleHierarchy[childIndex]!;
}
/**
* Generate a summary for a node (placeholder for LLM integration)
* @param node - Node to summarize
* @param clauses - Full clause text
* @returns Summary text
*/
generateNodeSummary(node: RTMNode, clauses: Map<string, Clause>): string {
// For now, concatenate first few clauses
// This is where LLM integration would generate actual summaries
const nodeClauses = node.clauses
.slice(0, 3)
.map(id => clauses.get(id)?.text || '')
.filter(text => text.length > 0);
if (nodeClauses.length === 0) return '[Empty segment]';
return nodeClauses.join(' ') + (node.clauses.length > 3 ? '...' : '');
}
}
/**
* Parse text into clauses
* @param text - Raw text to parse
* @returns Array of clauses
*/
export function parseTextToClauses(text: string): Clause[] {
// Simple sentence-based parsing for now
// Could be enhanced with NLP libraries
const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
return sentences.map((sentence, index) => ({
id: `clause_${index}`,
text: sentence.trim(),
position: index
}));
}
/**
* Create a narrative from text
* @param text - Raw text
* @param title - Narrative title
* @param type - Narrative type
* @returns Narrative structure
*/
export function createNarrative(
text: string,
title: string,
type: Narrative['metadata']['type'] = 'other'
): Narrative {
const clauses = parseTextToClauses(text);
return {
id: uuidv4(),
title,
text,
clauses,
metadata: {
type,
length: clauses.length,
created: new Date(),
modified: new Date()
}
};
}