EnhancedIndexManager.ts•76.5 kB
/**
* Enhanced Index Manager - Persistent YAML index with extensible schema
*
* Features:
* - Extensible schema supporting arbitrary element types and metadata
* - Persistent YAML storage for human readability
* - Incremental updates without full regeneration
* - Backward compatible with schema evolution
* - Server-side semantic intelligence
*
* This manager creates and maintains a capability index that enables:
* - Verb-based action triggers
* - Cross-element relationships
* - Semantic scoring with Jaccard/entropy
* - Context-aware element discovery
*
* FIXES IMPLEMENTED (Issue #1099):
* - Uses centralized element ID parsing utilities
* - Consistent ID format handling throughout
*/
import * as fs from 'fs/promises';
import * as path from 'path';
import { dump as yamlDump, load as yamlLoad } from 'js-yaml';
import { logger } from '../utils/logger.js';
import { PortfolioIndexManager, IndexEntry } from './PortfolioIndexManager.js';
import { SecurityMonitor } from '../security/securityMonitor.js';
import { UnicodeValidator } from '../security/validators/unicodeValidator.js';
import { NLPScoringManager } from './NLPScoringManager.js';
import { VerbTriggerManager } from './VerbTriggerManager.js';
import { ConfigManager } from '../config/ConfigManager.js';
import { IndexConfigManager, IndexConfiguration } from './config/IndexConfig.js';
import { FileLock } from '../utils/FileLock.js';
import { parseElementId, parseElementIdStrict, formatElementId } from '../utils/elementId.js';
import { RelationshipManager, ElementPath } from './RelationshipManager.js';
import {
BaseRelationship,
createRelationship,
RelationshipTypes
} from './types/RelationshipTypes.js';
/**
* Enhanced index schema - fully extensible
*/
export interface EnhancedIndex {
// Metadata about the index itself
metadata: IndexMetadata;
// Verb-based action triggers (extensible)
action_triggers: Record<string, string[]>;
// Element definitions (extensible types and fields)
elements: Record<string, Record<string, ElementDefinition>>;
// Context tracking for smart injection
context?: ContextTracking;
// Scoring configuration
scoring?: ScoringConfig;
// Extension point for future features
extensions?: Record<string, any>;
}
export interface IndexMetadata {
version: string; // Schema version for compatibility
created: string; // ISO timestamp
last_updated: string; // ISO timestamp
total_elements: number;
// Extensible metadata
[key: string]: any;
}
export interface ElementDefinition {
// Core fields (always present)
core: {
name: string;
type: string;
version?: string;
description?: string;
created?: string;
updated?: string;
};
// Search optimization
search?: {
keywords?: string[];
tags?: string[];
triggers?: string[];
};
// Verb-based actions (extensible)
actions?: Record<string, ActionDefinition>;
// USE_WHEN patterns for automatic activation
use_when?: UseWhenPattern[];
// Cross-element relationships
relationships?: Record<string, Relationship[]>;
// Context snippets for injection
context_snippets?: Record<string, string>;
// Semantic scoring data
semantic?: SemanticData;
// Extension point - any custom fields
custom?: Record<string, any>;
}
export interface ActionDefinition {
verb: string;
behavior?: string;
confidence?: number;
// Extensible action properties
[key: string]: any;
}
export interface UseWhenPattern {
pattern: string; // Regex pattern
confidence: number; // 0-1 confidence score
description?: string;
// Extensible pattern properties
[key: string]: any;
}
/**
* Re-export BaseRelationship as Relationship for backward compatibility
*
* This maintains API compatibility with existing code while the internal
* implementation now uses type-safe variants (ParsedRelationship, InvalidRelationship).
* Existing code using the 'Relationship' type will continue to work without changes.
*/
export type Relationship = BaseRelationship;
export interface SemanticData {
entropy?: number; // Shannon entropy
unique_terms?: number;
total_terms?: number;
jaccard_scores?: Record<string, number>; // Pairwise similarities
// Extensible semantic properties
[key: string]: any;
}
export interface ContextTracking {
recent_keywords: KeywordTracking[];
active_relationships?: RelationshipTracking[];
// Extensible context properties
[key: string]: any;
}
export interface KeywordTracking {
term: string;
frequency: number;
last_seen: string;
weight: number;
}
export interface RelationshipTracking {
from: string;
to: string;
strength: number;
reason?: string;
}
export interface ScoringConfig {
jaccard_weights?: Record<string, number>;
entropy_thresholds?: Record<string, number>;
context_decay?: {
half_life_minutes: number;
minimum_weight: number;
};
// Extensible scoring properties
[key: string]: any;
}
/**
* Options for index operations
*/
export interface IndexOptions {
forceRebuild?: boolean;
updateOnly?: string[]; // Only update specific elements
preserveCustom?: boolean; // Preserve custom fields during update
}
export class EnhancedIndexManager {
private static instance: EnhancedIndexManager | null = null;
private index: EnhancedIndex | null = null;
private indexPath: string;
private lastLoaded: Date | null = null;
private TTL_MS: number;
private isBuilding = false; // Track if index is being built
private nlpScoring: NLPScoringManager;
private verbTriggers: VerbTriggerManager;
private relationshipManager: RelationshipManager;
private config: IndexConfigManager;
private fileLock: FileLock;
private memoryCleanupInterval: NodeJS.Timeout | null = null;
private lastMemoryCleanup: Date = new Date();
// Batch metrics update support for high-volume scenarios
private metricsBatch: Map<string, number> = new Map();
private metricsFlushTimer: NodeJS.Timeout | null = null;
private readonly METRICS_BATCH_SIZE = 10;
private readonly METRICS_FLUSH_INTERVAL = 5000; // 5 seconds
private constructor() {
const portfolioPath = path.join(process.env.HOME || '', '.dollhouse', 'portfolio');
this.indexPath = path.join(portfolioPath, 'capability-index.yaml');
// Initialize configuration
this.config = IndexConfigManager.getInstance();
const config = this.config.getConfig();
this.TTL_MS = config.index.ttlMinutes * 60 * 1000;
// Load enhanced index config from global ConfigManager
this.loadEnhancedIndexConfig();
// Initialize components with config
this.nlpScoring = new NLPScoringManager({
cacheExpiry: config.nlp.cacheExpiryMinutes * 60 * 1000,
minTokenLength: config.nlp.minTokenLength,
entropyBands: config.nlp.entropyBands,
jaccardThresholds: config.nlp.jaccardThresholds
});
this.verbTriggers = VerbTriggerManager.getInstance({
confidenceThreshold: config.verbs.confidenceThreshold,
maxElementsPerVerb: config.verbs.maxElementsPerVerb,
includeSynonyms: config.verbs.includeSynonyms
});
// Initialize relationship manager
this.relationshipManager = RelationshipManager.getInstance({
minConfidence: config.performance.similarityThreshold,
enableAutoDiscovery: true
});
// Initialize file lock
this.fileLock = new FileLock(this.indexPath);
logger.debug('EnhancedIndexManager initialized', {
indexPath: this.indexPath,
config: {
ttlMinutes: config.index.ttlMinutes,
maxElements: config.performance.maxElementsForFullMatrix
}
});
// Start automatic memory cleanup to prevent leaks
this.startMemoryCleanup();
}
public static getInstance(): EnhancedIndexManager {
if (!this.instance) {
this.instance = new EnhancedIndexManager();
}
return this.instance;
}
/**
* Get the current index, loading or building as needed
*/
public async getIndex(options: IndexOptions = {}): Promise<EnhancedIndex> {
try {
// Add performance tracking
const startTime = Date.now();
const operation = options.forceRebuild ? 'rebuild' :
!this.index ? 'load' : 'cached';
if (options.forceRebuild) {
logger.info('Force rebuild requested for Enhanced Index');
await this.buildIndex(options);
} else if (await this.needsRebuild()) {
logger.info('Enhanced Index needs rebuild');
await this.buildIndex(options);
} else if (!this.index) {
// Try to load from file first
logger.info('Loading Enhanced Index from cache file');
await this.loadIndex();
} else {
logger.debug('Using cached Enhanced Index from memory');
}
const elapsed = Date.now() - startTime;
logger.info('Enhanced Index operation completed', {
operation,
elapsedMs: elapsed,
elements: this.index?.metadata?.total_elements || 0
});
if (elapsed > 1000) {
logger.warn('Enhanced Index operation took longer than expected', {
elapsedMs: elapsed,
operation
});
}
return this.index!;
} catch (error) {
logger.error('Failed to get Enhanced Index', error);
throw error;
}
}
/**
* Load index from YAML file
*/
private async loadIndex(): Promise<void> {
try {
const yamlContent = await fs.readFile(this.indexPath, 'utf-8');
let loadedData;
try {
loadedData = yamlLoad(yamlContent);
} catch (yamlError) {
// Handle YAML parse errors gracefully
logger.warn('Failed to parse YAML, rebuilding index', yamlError);
await this.buildIndex();
return;
}
// FIX: Add defensive checks for malformed YAML with undefined/null index
// Previously: Assumed yamlLoad always returns valid data
// Now: Validate structure deeply to ensure all required fields exist
if (!loadedData || typeof loadedData !== 'object') {
logger.warn('Loaded YAML is null or not an object, rebuilding index');
await this.buildIndex();
return;
}
// Validate required structure
const indexData = loadedData as any;
if (!indexData.metadata || !indexData.elements || !indexData.action_triggers) {
logger.warn('Invalid index structure (missing metadata, elements, or action_triggers), rebuilding', {
hasMetadata: !!indexData.metadata,
hasElements: !!indexData.elements,
hasActionTriggers: !!indexData.action_triggers
});
await this.buildIndex();
return;
}
this.index = loadedData as EnhancedIndex;
this.lastLoaded = new Date();
// FIX: Add defensive checks for malformed YAML with undefined metadata
// Previously: Assumed metadata always exists, causing test failures
// Now: Safely handle cases where metadata might be undefined
logger.info('Enhanced index loaded', {
elements: this.index?.metadata?.total_elements ?? 0,
version: this.index?.metadata?.version ?? 'unknown'
});
} catch (error) {
if ((error as any).code === 'ENOENT') {
logger.info('No existing index found, will build new one');
await this.buildIndex();
return; // Return early since buildIndex will set up the index
} else {
logger.error('Failed to load index', error);
throw error;
}
}
}
/**
* Build or rebuild the index from portfolio
*/
private async buildIndex(options: IndexOptions = {}): Promise<void> {
// Use file locking to prevent concurrent builds
const config = this.config.getConfig();
const lockAcquired = await this.fileLock.acquire({
timeout: config.index.lockTimeoutMs,
stale: 60000 // 1 minute
});
if (!lockAcquired) {
logger.warn('Could not acquire lock for index build');
return;
}
this.isBuilding = true;
const startTime = Date.now();
try {
// Get existing index for preservation of custom fields
const existingIndex = options.preserveCustom && this.index ? this.index : null;
// Initialize new index structure
const newIndex: EnhancedIndex = {
metadata: {
version: '2.0.0',
created: existingIndex?.metadata.created || new Date().toISOString(),
last_updated: new Date().toISOString(),
total_elements: 0
},
action_triggers: {},
elements: {}
};
// Get portfolio index for element discovery
const portfolioIndex = PortfolioIndexManager.getInstance();
const portfolioData = await portfolioIndex.getIndex();
// Process each element type
for (const [elementType, entries] of portfolioData.byType.entries()) {
if (!newIndex.elements[elementType]) {
newIndex.elements[elementType] = {};
}
for (const entry of entries) {
// Skip if not in update list (when specified)
// FIX: Add defensive check for entry.metadata
const entryName = entry.metadata?.name;
if (!entryName) {
logger.warn('Skipping entry with undefined metadata.name');
continue;
}
if (options.updateOnly && !options.updateOnly.includes(entryName)) {
// Preserve existing entry
if (existingIndex?.elements[elementType]?.[entryName]) {
newIndex.elements[elementType][entryName] =
existingIndex.elements[elementType][entryName];
continue;
}
}
// Build element definition
const elementDef = await this.buildElementDefinition(entry, existingIndex);
newIndex.elements[elementType][entryName] = elementDef;
newIndex.metadata.total_elements++;
// Extract action triggers
this.extractActionTriggers(elementDef, entryName, newIndex.action_triggers);
}
}
// Calculate semantic relationships using NLP
await this.calculateSemanticRelationships(newIndex);
// Discover additional relationship types
await this.relationshipManager.discoverRelationships(newIndex);
// Preserve extensions from existing index
if (existingIndex?.extensions) {
newIndex.extensions = existingIndex.extensions;
}
// Save to file
await this.writeToFile(newIndex);
this.index = newIndex;
this.lastLoaded = new Date();
const duration = Date.now() - startTime;
logger.info('Enhanced index built', {
duration: `${duration}ms`,
elements: newIndex.metadata.total_elements,
triggers: Object.keys(newIndex.action_triggers).length
});
// Log security event
SecurityMonitor.logSecurityEvent({
type: 'PORTFOLIO_CACHE_INVALIDATION',
severity: 'LOW',
source: 'enhanced_index',
details: `Index rebuilt with ${newIndex.metadata.total_elements} elements in ${duration}ms`
});
} finally {
this.isBuilding = false;
await this.fileLock.release();
}
}
/**
* Build element definition from portfolio entry
*/
private async buildElementDefinition(
entry: IndexEntry,
existingIndex: EnhancedIndex | null
): Promise<ElementDefinition> {
// FIX: Add defensive checks for entry.metadata properties
const entryName = entry.metadata?.name || 'unknown';
const existing = existingIndex?.elements[entry.elementType]?.[entryName];
const definition: ElementDefinition = {
core: {
name: entryName,
type: entry.elementType,
version: entry.metadata?.version,
description: entry.metadata?.description,
created: entry.metadata?.created,
updated: entry.metadata?.updated || new Date().toISOString()
}
};
// Add search fields if present
// FIX: Add defensive checks for metadata properties
if (entry.metadata?.keywords || entry.metadata?.tags || entry.metadata?.triggers) {
definition.search = {
keywords: entry.metadata?.keywords,
tags: entry.metadata?.tags,
triggers: entry.metadata?.triggers
};
// Debug logging for trigger extraction
if (entry.metadata?.triggers && entry.metadata.triggers.length > 0) {
logger.debug('Found triggers for element', {
name: entryName,
type: entry.elementType,
triggers: entry.metadata.triggers
});
}
}
// Preserve custom fields from existing
if (existing?.custom) {
definition.custom = existing.custom;
}
// Preserve relationships from existing
if (existing?.relationships) {
definition.relationships = existing.relationships;
}
// Preserve actions from existing
if (existing?.actions) {
definition.actions = existing.actions;
}
// Auto-generate basic action triggers from name/description
if (!definition.actions) {
definition.actions = this.generateDefaultActions(entry);
}
return definition;
}
/**
* Generate default actions based on element type and metadata
*/
private generateDefaultActions(entry: IndexEntry): Record<string, ActionDefinition> | undefined {
const actions: Record<string, ActionDefinition> = {};
// FIX: Add defensive check for metadata.name
const entryName = entry.metadata?.name || '';
if (!entryName) {
return undefined;
}
// Generate based on element type
switch (entry.elementType) {
case 'personas':
if (entryName.includes('debug')) {
actions.debug = { verb: 'debug', behavior: 'activate', confidence: 0.8 };
actions.fix = { verb: 'fix', behavior: 'activate', confidence: 0.7 };
}
if (entryName.includes('creative')) {
actions.write = { verb: 'write', behavior: 'activate', confidence: 0.8 };
actions.create = { verb: 'create', behavior: 'activate', confidence: 0.8 };
}
break;
case 'memories':
if (entryName.includes('session')) {
actions.recall = { verb: 'recall', behavior: 'retrieve', confidence: 0.7 };
actions.remember = { verb: 'remember', behavior: 'retrieve', confidence: 0.7 };
}
break;
case 'skills':
actions.use = { verb: 'use', behavior: 'execute', confidence: 0.6 };
actions.apply = { verb: 'apply', behavior: 'execute', confidence: 0.6 };
break;
}
return Object.keys(actions).length > 0 ? actions : undefined;
}
// Configuration for verb extraction (will be overridden by ConfigManager)
private static VERB_EXTRACTION_CONFIG = {
// Security limits for DoS protection
limits: {
maxTriggersPerElement: 50, // Maximum triggers to extract per element
maxTriggerLength: 50, // Maximum length for a single trigger
maxKeywordsToCheck: 100, // Maximum keywords to process for verb detection
},
// Common verb prefixes broken down by category for maintainability
verbPrefixes: {
actions: ['create', 'build', 'make', 'generate', 'produce', 'write', 'compose'],
analysis: ['analyze', 'review', 'examine', 'investigate', 'inspect', 'evaluate', 'assess'],
debugging: ['debug', 'fix', 'troubleshoot', 'solve', 'resolve', 'repair', 'patch'],
operations: ['run', 'execute', 'start', 'stop', 'deploy', 'configure', 'install'],
modification: ['update', 'modify', 'change', 'edit', 'alter', 'transform', 'refactor'],
removal: ['delete', 'remove', 'clear', 'clean', 'purge', 'destroy', 'eliminate'],
information: ['explain', 'describe', 'document', 'search', 'find', 'check', 'validate'],
optimization: ['optimize', 'improve', 'enhance', 'streamline', 'accelerate'],
testing: ['test', 'verify', 'validate', 'confirm', 'assert', 'ensure'],
},
// Common verb suffixes that indicate action words
verbSuffixes: ['ify', 'ize', 'ate', 'en', 'fy'],
// Noun suffixes that indicate non-verbs (to filter out)
nounSuffixes: ['tion', 'sion', 'ment', 'ness', 'ance', 'ence', 'ity', 'ism', 'ship', 'hood', 'dom', 'ery', 'ing'],
// Telemetry settings
telemetry: {
enabled: false, // Will be configurable via environment variable
sampleRate: 0.1, // Sample 10% of operations when enabled
metricsInterval: 60000, // Report metrics every 60 seconds
}
};
// Pre-compiled regex patterns built from config (can be updated from ConfigManager)
private static VERB_PREFIX_PATTERN = new RegExp(
`^(${Object.values(EnhancedIndexManager.VERB_EXTRACTION_CONFIG.verbPrefixes)
.flat()
.join('|')})`
);
private static VERB_SUFFIX_PATTERN = new RegExp(
`(${EnhancedIndexManager.VERB_EXTRACTION_CONFIG.verbSuffixes.join('|')})$`
);
private static NOUN_SUFFIX_PATTERN = new RegExp(
`(${EnhancedIndexManager.VERB_EXTRACTION_CONFIG.nounSuffixes.join('|')})$`
);
/**
* Extract action triggers from element definition
*
* FIX: Enhanced verb extraction from multiple sources
* Previously: Only checked elementDef.actions which personas don't have
* Now: Checks search.triggers (personas), actions field, and keywords
*
* Security improvements:
* - Added trigger count limits
* - Added trigger length validation
* - Using Sets for O(1) duplicate checking
* - Pre-compiled regex patterns
*
* Future enhancements:
* - Background deep content analysis for dynamic verb extraction
* - This could scan element descriptions and content to find action words
* - Would run asynchronously to avoid blocking main operations
* - Results would progressively enhance the index over time
*/
private extractActionTriggers(
elementDef: ElementDefinition,
elementName: string,
triggers: Record<string, string[]>
): void {
// Null safety check
if (!elementDef) return;
// Start telemetry tracking if enabled
const telemetryStartTime = this.startTelemetry('extractActionTriggers');
// Track unique triggers for this element to prevent duplicates
const elementTriggers = new Set<string>();
const triggerCountRef = { count: 0 }; // Use object reference to track count across methods
// Extract from search.triggers field
this.extractTriggersFromSearchField(elementDef, elementName, triggers, elementTriggers, triggerCountRef);
// Extract from actions field
this.extractTriggersFromActions(elementDef, elementName, triggers, elementTriggers, triggerCountRef);
// Extract from keywords (limited to prevent DoS)
this.extractTriggersFromKeywords(elementDef, elementName, triggers, elementTriggers, triggerCountRef);
// Record telemetry
this.recordTelemetry('extractActionTriggers', telemetryStartTime, {
elementName,
elementType: elementDef.core?.type,
triggersExtracted: triggerCountRef.count,
uniqueTriggers: elementTriggers.size,
});
}
/**
* Extract triggers from search.triggers field
*/
private extractTriggersFromSearchField(
elementDef: ElementDefinition,
elementName: string,
triggers: Record<string, string[]>,
elementTriggers: Set<string>,
triggerCountRef: { count: number }
): void {
if (!elementDef.search?.triggers) return;
const triggerArray = this.normalizeToArray(elementDef.search.triggers);
for (const trigger of triggerArray) {
if (triggerCountRef.count >= EnhancedIndexManager.VERB_EXTRACTION_CONFIG.limits.maxTriggersPerElement) {
logger.warn('Trigger limit exceeded for element', {
elementName,
limit: EnhancedIndexManager.VERB_EXTRACTION_CONFIG.limits.maxTriggersPerElement
});
break;
}
const normalizedTrigger = this.normalizeTrigger(trigger);
if (!normalizedTrigger) continue;
if (!elementTriggers.has(normalizedTrigger)) {
elementTriggers.add(normalizedTrigger);
this.addTriggerMapping(normalizedTrigger, elementName, triggers);
triggerCountRef.count++;
}
}
}
/**
* Extract triggers from actions field
*/
private extractTriggersFromActions(
elementDef: ElementDefinition,
elementName: string,
triggers: Record<string, string[]>,
elementTriggers: Set<string>,
triggerCountRef: { count: number }
): void {
if (!elementDef.actions) return;
for (const [actionKey, action] of Object.entries(elementDef.actions)) {
if (triggerCountRef.count >= EnhancedIndexManager.VERB_EXTRACTION_CONFIG.limits.maxTriggersPerElement) {
logger.warn('Trigger limit exceeded for element', {
elementName,
limit: EnhancedIndexManager.VERB_EXTRACTION_CONFIG.limits.maxTriggersPerElement
});
break;
}
const verb = action.verb || actionKey;
const normalizedVerb = this.normalizeTrigger(verb);
if (!normalizedVerb) continue;
if (!elementTriggers.has(normalizedVerb)) {
elementTriggers.add(normalizedVerb);
this.addTriggerMapping(normalizedVerb, elementName, triggers);
triggerCountRef.count++;
}
}
}
/**
* Extract verb-like keywords as triggers
*/
private extractTriggersFromKeywords(
elementDef: ElementDefinition,
elementName: string,
triggers: Record<string, string[]>,
elementTriggers: Set<string>,
triggerCountRef: { count: number }
): void {
if (!elementDef.search?.keywords) return;
const keywords = this.normalizeToArray(elementDef.search.keywords);
for (const keyword of keywords) {
if (triggerCountRef.count >= EnhancedIndexManager.VERB_EXTRACTION_CONFIG.limits.maxTriggersPerElement) {
logger.warn('Trigger limit exceeded for element', {
elementName,
limit: EnhancedIndexManager.VERB_EXTRACTION_CONFIG.limits.maxTriggersPerElement
});
break;
}
const normalizedKeyword = this.normalizeTrigger(keyword);
if (!normalizedKeyword || !this.looksLikeVerb(normalizedKeyword)) continue;
if (!elementTriggers.has(normalizedKeyword)) {
elementTriggers.add(normalizedKeyword);
this.addTriggerMapping(normalizedKeyword, elementName, triggers);
triggerCountRef.count++;
}
}
}
/**
* Normalize a value to an array of strings
*/
private normalizeToArray(value: any): string[] {
if (!value) return [];
if (Array.isArray(value)) {
return value.filter(v => typeof v === 'string');
}
if (typeof value === 'string') {
return [value];
}
return [];
}
/**
* Normalize and validate a trigger string
*/
private normalizeTrigger(trigger: any): string | null {
if (typeof trigger !== 'string') return null;
// Trim and lowercase
const normalized = trigger.trim().toLowerCase();
// Validate
if (!normalized ||
normalized.length > EnhancedIndexManager.VERB_EXTRACTION_CONFIG.limits.maxTriggerLength ||
!/^[a-z][a-z-]*$/.test(normalized)) {
return null;
}
return normalized;
}
/**
* Add a trigger to element mapping
* Preserves original element name casing for proper resolution
*
* Note: Triggers are persisted in the index file under action_triggers
* Usage metrics are tracked via trackTriggerUsage() and can be retrieved with getTriggerMetrics()
*/
private addTriggerMapping(
verb: string,
elementName: string,
triggers: Record<string, string[]>
): void {
// Store verb in lowercase for consistent lookup
const normalizedVerb = verb.toLowerCase();
if (!triggers[normalizedVerb]) {
triggers[normalizedVerb] = [];
}
// Preserve original element name casing for accurate resolution
// This supports various naming conventions users might use:
// - lowercase: debug-detective
// - kebab-case: Debug-Detective
// - snake_case: debug_detective
// - CamelCase: DebugDetective
// - Custom: DeBuG-DeTecTiVe
if (!triggers[normalizedVerb].includes(elementName)) {
triggers[normalizedVerb].push(elementName);
}
}
/**
* Check if a word looks like a verb using pre-compiled patterns
* Avoid false positives like "documentation" which ends in "ation" (noun suffix)
*/
private looksLikeVerb(word: string): boolean {
const lowerWord = word.toLowerCase();
// Check for noun suffixes that should NOT be considered verbs
if (EnhancedIndexManager.NOUN_SUFFIX_PATTERN.test(lowerWord)) {
return false;
}
// Check for verb patterns
return EnhancedIndexManager.VERB_PREFIX_PATTERN.test(lowerWord) ||
EnhancedIndexManager.VERB_SUFFIX_PATTERN.test(lowerWord);
}
/**
* Load enhanced index configuration from ConfigManager
*/
private loadEnhancedIndexConfig(): void {
try {
const configManager = ConfigManager.getInstance();
const config = configManager.getConfig();
// Update limits from config
if (config.elements?.enhanced_index) {
const enhancedConfig = config.elements.enhanced_index;
// Update limits
if (enhancedConfig.limits) {
EnhancedIndexManager.VERB_EXTRACTION_CONFIG.limits = {
...EnhancedIndexManager.VERB_EXTRACTION_CONFIG.limits,
...enhancedConfig.limits
};
}
// Update telemetry settings
if (enhancedConfig.telemetry) {
EnhancedIndexManager.VERB_EXTRACTION_CONFIG.telemetry = {
...EnhancedIndexManager.VERB_EXTRACTION_CONFIG.telemetry,
...enhancedConfig.telemetry
};
}
// Add custom verb patterns if provided
if (enhancedConfig.verbPatterns) {
const patterns = enhancedConfig.verbPatterns;
// Add custom prefixes
if (patterns.customPrefixes && patterns.customPrefixes.length > 0) {
const allPrefixes = [
...Object.values(EnhancedIndexManager.VERB_EXTRACTION_CONFIG.verbPrefixes).flat(),
...patterns.customPrefixes
];
EnhancedIndexManager.VERB_PREFIX_PATTERN = this.compileAndValidateRegex(
`^(${allPrefixes.join('|')})`,
'verb prefix'
);
}
// Add custom suffixes
if (patterns.customSuffixes && patterns.customSuffixes.length > 0) {
const allSuffixes = [
...EnhancedIndexManager.VERB_EXTRACTION_CONFIG.verbSuffixes,
...patterns.customSuffixes
];
EnhancedIndexManager.VERB_SUFFIX_PATTERN = this.compileAndValidateRegex(
`(${allSuffixes.join('|')})$`,
'verb suffix'
);
}
// Add excluded nouns
if (patterns.excludedNouns && patterns.excludedNouns.length > 0) {
const allNouns = [
...EnhancedIndexManager.VERB_EXTRACTION_CONFIG.nounSuffixes,
...patterns.excludedNouns
];
EnhancedIndexManager.NOUN_SUFFIX_PATTERN = this.compileAndValidateRegex(
`(${allNouns.join('|')})$`,
'noun suffix'
);
}
}
// Validate all regex patterns at startup
this.validateRegexPatterns();
logger.info('Loaded enhanced index configuration', {
limits: EnhancedIndexManager.VERB_EXTRACTION_CONFIG.limits,
telemetryEnabled: EnhancedIndexManager.VERB_EXTRACTION_CONFIG.telemetry.enabled
});
}
} catch (error) {
logger.warn('Failed to load enhanced index configuration, using defaults', {
error: error instanceof Error ? error.message : String(error)
});
}
}
/**
* Compile and validate a regex pattern
* Provides clear error messages if pattern is invalid
*/
private compileAndValidateRegex(pattern: string, name: string): RegExp {
try {
const regex = new RegExp(pattern);
// Test the regex with sample data to ensure it works
const testStrings = ['test', 'debug', 'create', 'ify', 'tion'];
for (const str of testStrings) {
try {
// Execute test to validate regex pattern (result not needed, just checking for errors)
void regex.test(str);
// Reset lastIndex for global regexes to ensure consistent behavior
if (regex.global) {
regex.lastIndex = 0;
}
} catch (testError) {
throw new Error(`Regex pattern fails on test string '${str}': ${testError}`);
}
}
return regex;
} catch (error) {
const errorMsg = `Invalid ${name} pattern: ${pattern}`;
logger.error(errorMsg, {
error: error instanceof Error ? error.message : String(error),
pattern
});
throw new Error(errorMsg);
}
}
/**
* Validate all regex patterns at startup
* Ensures patterns are valid and can handle expected input
*/
private validateRegexPatterns(): void {
const validationTests = [
{
pattern: EnhancedIndexManager.VERB_PREFIX_PATTERN,
name: 'VERB_PREFIX_PATTERN',
shouldMatch: ['debug', 'create', 'analyze'],
shouldNotMatch: ['xdebug', 'created', '123debug']
},
{
pattern: EnhancedIndexManager.VERB_SUFFIX_PATTERN,
name: 'VERB_SUFFIX_PATTERN',
shouldMatch: ['simplify', 'organize', 'automate'],
shouldNotMatch: ['simple', 'organ', 'auto']
},
{
pattern: EnhancedIndexManager.NOUN_SUFFIX_PATTERN,
name: 'NOUN_SUFFIX_PATTERN',
shouldMatch: ['documentation', 'management', 'happiness'],
shouldNotMatch: ['document', 'manage', 'happy']
}
];
for (const test of validationTests) {
// Validate pattern exists
if (!test.pattern) {
throw new Error(`${test.name} pattern is not initialized`);
}
// Test expected matches
for (const str of test.shouldMatch) {
if (!test.pattern.test(str)) {
logger.warn(`Pattern validation warning: ${test.name} should match '${str}' but doesn't`);
}
}
// Test expected non-matches
for (const str of test.shouldNotMatch) {
if (test.pattern.test(str)) {
logger.warn(`Pattern validation warning: ${test.name} should not match '${str}' but does`);
}
}
}
logger.debug('Regex pattern validation completed successfully');
}
/**
* Telemetry tracking infrastructure
*/
private telemetryMetrics: Map<string, any> = new Map();
private telemetryTimer: NodeJS.Timeout | null = null;
/**
* Start telemetry tracking for an operation
*/
private startTelemetry(operationName: string): number | null {
if (!this.isTelemetryEnabled()) return null;
// Sample based on configured rate
if (Math.random() > EnhancedIndexManager.VERB_EXTRACTION_CONFIG.telemetry.sampleRate) {
return null;
}
return Date.now();
}
/**
* Record telemetry metrics for an operation
*/
private recordTelemetry(
operationName: string,
startTime: number | null,
metrics: Record<string, any>
): void {
if (!startTime || !this.isTelemetryEnabled()) return;
const duration = Date.now() - startTime;
// Aggregate metrics
if (!this.telemetryMetrics.has(operationName)) {
this.telemetryMetrics.set(operationName, {
count: 0,
totalDuration: 0,
avgDuration: 0,
maxDuration: 0,
minDuration: Infinity,
lastMetrics: {},
});
}
const stats = this.telemetryMetrics.get(operationName);
stats.count++;
stats.totalDuration += duration;
stats.avgDuration = stats.totalDuration / stats.count;
stats.maxDuration = Math.max(stats.maxDuration, duration);
stats.minDuration = Math.min(stats.minDuration, duration);
stats.lastMetrics = { ...metrics, duration };
// Log detailed metrics in debug mode
logger.debug(`Telemetry: ${operationName}`, {
duration,
...metrics,
});
// Schedule periodic reporting
this.scheduleTelemetryReport();
}
/**
* Check if telemetry is enabled
*/
private isTelemetryEnabled(): boolean {
// Check environment variable or config
return process.env.DOLLHOUSE_TELEMETRY_ENABLED === 'true' ||
EnhancedIndexManager.VERB_EXTRACTION_CONFIG.telemetry.enabled;
}
/**
* Schedule periodic telemetry reporting
*/
private scheduleTelemetryReport(): void {
if (this.telemetryTimer) return;
this.telemetryTimer = setTimeout(() => {
this.reportTelemetry();
this.telemetryTimer = null;
}, EnhancedIndexManager.VERB_EXTRACTION_CONFIG.telemetry.metricsInterval);
}
/**
* Report aggregated telemetry metrics
*/
private reportTelemetry(): void {
if (this.telemetryMetrics.size === 0) return;
const report = {
timestamp: new Date().toISOString(),
metrics: Object.fromEntries(this.telemetryMetrics),
};
// Log summary report
logger.info('Telemetry Report', report);
// Future: Send to telemetry endpoint if configured
// if (process.env.DOLLHOUSE_TELEMETRY_ENDPOINT) {
// this.sendTelemetryToEndpoint(report);
// }
// Clear metrics after reporting
this.telemetryMetrics.clear();
}
/**
* Write index data to YAML file on disk
* Private implementation detail
*/
private async writeToFile(index: EnhancedIndex): Promise<void> {
try {
// Ensure directory exists
const dir = path.dirname(this.indexPath);
await fs.mkdir(dir, { recursive: true });
// Convert to YAML with nice formatting
const yamlContent = yamlDump(index, {
indent: 2,
lineWidth: 120,
noRefs: true,
sortKeys: false // Preserve our logical ordering
});
// Validate Unicode before saving
const validation = UnicodeValidator.normalize(yamlContent);
if (validation.detectedIssues && validation.detectedIssues.length > 0) {
throw new Error(`Unicode issues in index: ${validation.detectedIssues.join(', ')}`);
}
await fs.writeFile(this.indexPath, yamlContent, 'utf-8');
logger.debug('Index saved to file', { path: this.indexPath });
} catch (error) {
logger.error('Failed to save index', error);
throw error;
}
}
/**
* Check if index needs rebuilding
*/
private async needsRebuild(): Promise<boolean> {
try {
// Check if index file exists
const indexStats = await fs.stat(this.indexPath).catch(() => null);
if (!indexStats) {
logger.info('Enhanced index file does not exist, rebuild needed');
return true;
}
// Check file age FIRST - this is the key fix
const fileAge = Date.now() - indexStats.mtime.getTime();
const ttlMs = this.TTL_MS;
if (fileAge > ttlMs) {
logger.info('Enhanced index file is stale', {
ageMinutes: Math.round(fileAge / 60000),
ttlMinutes: Math.round(ttlMs / 60000)
});
return true; // File is too old, rebuild needed
}
// If we reach here, file exists and is fresh
// If not in memory, we can load it
if (!this.index) {
logger.debug('Enhanced index not in memory but file is fresh, will load from file');
return false; // We can load the fresh file, no rebuild needed
}
// File is fresh and we have it in memory
logger.debug('Enhanced index is current, no rebuild needed');
return false;
} catch (error) {
logger.error('Error checking if rebuild needed', error);
return true; // Safer to rebuild on error
}
}
/**
* Update specific elements in the index
*/
public async updateElements(elementNames: string[], options: IndexOptions = {}): Promise<void> {
await this.getIndex({
...options,
updateOnly: elementNames,
preserveCustom: true
});
}
/**
* Add or update a relationship between elements
*/
public async addRelationship(
fromElement: string,
toElement: string,
relationship: Relationship
): Promise<void> {
const index = await this.getIndex();
// Find the element
let found = false;
for (const [, elements] of Object.entries(index.elements)) {
if (elements[fromElement]) {
if (!elements[fromElement].relationships) {
elements[fromElement].relationships = {};
}
const relType = relationship.type || 'related_to';
if (!elements[fromElement].relationships[relType]) {
elements[fromElement].relationships[relType] = [];
}
// Add or update relationship
const existing = elements[fromElement].relationships[relType]
.findIndex(r => r.element === toElement);
if (existing >= 0) {
elements[fromElement].relationships[relType][existing] = relationship;
} else {
elements[fromElement].relationships[relType].push(relationship);
}
found = true;
break;
}
}
if (found) {
index.metadata.last_updated = new Date().toISOString();
await this.writeToFile(index);
}
}
/**
* Add custom extension data
*/
public async addExtension(key: string, data: any): Promise<void> {
const index = await this.getIndex();
if (!index.extensions) {
index.extensions = {};
}
index.extensions[key] = data;
index.metadata.last_updated = new Date().toISOString();
await this.writeToFile(index);
}
/**
* Persist the current in-memory index to disk
* Public method for tests and external callers to save current state
*/
public async persist(): Promise<void> {
if (!this.index) {
throw new Error('No index loaded to persist');
}
await this.writeToFile(this.index);
}
/**
* Get elements by action verb
* Tracks usage metrics for trigger optimization
*/
public async getElementsByAction(verb: string): Promise<string[]> {
const index = await this.getIndex();
// Track trigger usage metrics
await this.trackTriggerUsage(verb);
return index.action_triggers[verb] || [];
}
/**
* Track trigger usage for optimization metrics
* Supports batching for high-volume scenarios to reduce disk writes
*
* @param trigger - The trigger verb to track
* @param immediate - Force immediate write (bypass batching)
*/
private async trackTriggerUsage(trigger: string, immediate: boolean = false): Promise<void> {
try {
// Add to batch
this.metricsBatch.set(trigger, (this.metricsBatch.get(trigger) || 0) + 1);
// Check if we should flush immediately
const shouldFlush = immediate ||
this.metricsBatch.size >= this.METRICS_BATCH_SIZE;
if (shouldFlush) {
await this.flushMetricsBatch();
} else {
// Schedule a flush if not already scheduled
if (!this.metricsFlushTimer) {
this.metricsFlushTimer = setTimeout(() => {
this.flushMetricsBatch().catch(error => {
logger.warn('Failed to flush metrics batch', { error });
});
}, this.METRICS_FLUSH_INTERVAL);
}
}
} catch (error) {
// Don't fail the operation if metrics tracking fails
logger.warn('Failed to track trigger usage', { trigger, error });
}
}
/**
* Flush batched metrics to disk
* Combines multiple metric updates into a single disk write for efficiency
*/
private async flushMetricsBatch(): Promise<void> {
if (this.metricsBatch.size === 0) {
return;
}
// Clear the timer
if (this.metricsFlushTimer) {
clearTimeout(this.metricsFlushTimer);
this.metricsFlushTimer = null;
}
try {
const index = await this.getIndex();
// Initialize trigger metrics if not present
if (!index.metadata.trigger_metrics) {
index.metadata.trigger_metrics = {
usage_count: {},
last_used: {},
first_used: {},
daily_usage: {}
};
}
const metrics = index.metadata.trigger_metrics;
const today = new Date().toISOString().split('T')[0];
const now = new Date().toISOString();
// Initialize daily usage for today
if (!metrics.daily_usage[today]) {
metrics.daily_usage[today] = {};
}
// Process all batched metrics
for (const [trigger, count] of this.metricsBatch.entries()) {
// Update usage count
metrics.usage_count[trigger] = (metrics.usage_count[trigger] || 0) + count;
// Update last used timestamp
metrics.last_used[trigger] = now;
// Set first used if not present
if (!metrics.first_used[trigger]) {
metrics.first_used[trigger] = now;
}
// Track daily usage
metrics.daily_usage[today][trigger] = (metrics.daily_usage[today][trigger] || 0) + count;
logger.debug('Flushing batched metrics', {
trigger,
batch_count: count,
total_uses: metrics.usage_count[trigger]
});
}
// Clean up old daily usage (keep last 30 days)
const cutoffDate = new Date();
cutoffDate.setDate(cutoffDate.getDate() - 30);
const cutoff = cutoffDate.toISOString().split('T')[0];
for (const date in metrics.daily_usage) {
if (date < cutoff) {
delete metrics.daily_usage[date];
}
}
// Update metadata timestamp
index.metadata.last_updated = now;
// Persist the updated metrics
await this.writeToFile(index);
logger.info('Metrics batch flushed', {
triggers_updated: this.metricsBatch.size,
total_updates: Array.from(this.metricsBatch.values()).reduce((a, b) => a + b, 0)
});
// Clear the batch
this.metricsBatch.clear();
} catch (error) {
logger.error('Failed to flush metrics batch', { error });
// Don't clear batch on error - will retry on next trigger
}
}
/**
* Get comprehensive trigger usage metrics for optimization analysis
*
* @returns Promise resolving to sorted array of trigger metrics
* @returns {Array<Object>} metrics - Array of trigger metric objects sorted by usage frequency (descending)
* @returns {string} metrics[].trigger - The trigger word/verb
* @returns {number} metrics[].usage_count - Total number of times this trigger has been used
* @returns {string} metrics[].last_used - ISO timestamp of most recent usage
* @returns {string} metrics[].first_used - ISO timestamp of first recorded usage
* @returns {number} metrics[].daily_average - Average daily usage based on historical data
* @returns {'increasing'|'stable'|'decreasing'} metrics[].trend - Usage trend based on last 7 days
*
* @example
* const metrics = await indexManager.getTriggerMetrics();
* // Returns: [
* // { trigger: 'debug', usage_count: 45, trend: 'increasing', ... },
* // { trigger: 'analyze', usage_count: 32, trend: 'stable', ... }
* // ]
*
* @public
* @since 1.9.9
*/
public async getTriggerMetrics(): Promise<{
trigger: string;
usage_count: number;
last_used: string;
first_used: string;
daily_average: number;
trend: 'increasing' | 'stable' | 'decreasing';
}[]> {
const index = await this.getIndex();
if (!index.metadata.trigger_metrics) {
return [];
}
const metrics = index.metadata.trigger_metrics;
const results: any[] = [];
// Calculate metrics for each trigger
for (const trigger in metrics.usage_count) {
// Calculate daily average
let totalDailyUsage = 0;
let daysWithUsage = 0;
const recentUsage: number[] = [];
// Get last 7 days of usage for trend analysis
const today = new Date();
for (let i = 0; i < 7; i++) {
const date = new Date(today);
date.setDate(date.getDate() - i);
const dateStr = date.toISOString().split('T')[0];
if (metrics.daily_usage[dateStr] && metrics.daily_usage[dateStr][trigger]) {
recentUsage.push(metrics.daily_usage[dateStr][trigger]);
} else {
recentUsage.push(0);
}
}
// Calculate trend (simple comparison of first and last 3 days)
const firstHalf = recentUsage.slice(4, 7).reduce((a, b) => a + b, 0);
const secondHalf = recentUsage.slice(0, 3).reduce((a, b) => a + b, 0);
let trend: 'increasing' | 'stable' | 'decreasing' = 'stable';
if (secondHalf > firstHalf * 1.2) trend = 'increasing';
else if (secondHalf < firstHalf * 0.8) trend = 'decreasing';
// Calculate overall daily average
for (const date in metrics.daily_usage) {
if (metrics.daily_usage[date][trigger]) {
totalDailyUsage += metrics.daily_usage[date][trigger];
daysWithUsage++;
}
}
results.push({
trigger,
usage_count: metrics.usage_count[trigger],
last_used: metrics.last_used[trigger],
first_used: metrics.first_used[trigger],
daily_average: daysWithUsage > 0 ? totalDailyUsage / daysWithUsage : 0,
trend
});
}
// Sort by usage count (descending)
return results.sort((a, b) => b.usage_count - a.usage_count);
}
/**
* Export trigger metrics for external analytics systems
* Provides data in a format suitable for analytics platforms
*
* @param format - Export format ('json' | 'csv' | 'prometheus')
* @returns Formatted metrics data
*
* @example
* // Export for Prometheus monitoring
* const prometheusMetrics = await indexManager.exportMetrics('prometheus');
*
* // Export as CSV for data analysis
* const csvData = await indexManager.exportMetrics('csv');
*/
public async exportMetrics(format: 'json' | 'csv' | 'prometheus' = 'json'): Promise<string> {
const metrics = await this.getTriggerMetrics();
switch (format) {
case 'csv': {
// CSV header
let csv = 'trigger,usage_count,last_used,first_used,daily_average,trend\n';
// CSV rows
for (const metric of metrics) {
csv += `"${metric.trigger}",${metric.usage_count},"${metric.last_used}","${metric.first_used}",${metric.daily_average.toFixed(2)},"${metric.trend}"\n`;
}
return csv;
}
case 'prometheus': {
let output = '';
const timestamp = Date.now();
// Prometheus metrics format
output += '# HELP enhanced_index_trigger_usage Total usage count for each trigger\n';
output += '# TYPE enhanced_index_trigger_usage counter\n';
for (const metric of metrics) {
output += `enhanced_index_trigger_usage{trigger="${metric.trigger}",trend="${metric.trend}"} ${metric.usage_count} ${timestamp}\n`;
}
output += '\n# HELP enhanced_index_trigger_daily_avg Average daily usage for each trigger\n';
output += '# TYPE enhanced_index_trigger_daily_avg gauge\n';
for (const metric of metrics) {
output += `enhanced_index_trigger_daily_avg{trigger="${metric.trigger}"} ${metric.daily_average.toFixed(2)} ${timestamp}\n`;
}
return output;
}
case 'json':
default: {
return JSON.stringify({
timestamp: new Date().toISOString(),
metrics,
summary: {
total_triggers: metrics.length,
total_usage: metrics.reduce((sum, m) => sum + m.usage_count, 0),
trending_up: metrics.filter(m => m.trend === 'increasing').length,
trending_down: metrics.filter(m => m.trend === 'decreasing').length
}
}, null, 2);
}
}
}
/**
* Search for elements using enhanced criteria
*/
public async searchEnhanced(criteria: {
verbs?: string[];
keywords?: string[];
type?: string;
hasRelationships?: boolean;
}): Promise<ElementDefinition[]> {
const index = await this.getIndex();
const results: ElementDefinition[] = [];
for (const [type, elements] of Object.entries(index.elements)) {
// Filter by type if specified
if (criteria.type && type !== criteria.type) continue;
for (const [, element] of Object.entries(elements)) {
let matches = true;
// Check verb matches
if (criteria.verbs && element.actions) {
const elementVerbs = Object.values(element.actions).map(a => a.verb);
matches = criteria.verbs.some(v => elementVerbs.includes(v));
}
// Check keyword matches
if (matches && criteria.keywords && element.search?.keywords) {
matches = criteria.keywords.some(k =>
element.search!.keywords!.includes(k)
);
}
// Check relationship requirement
if (matches && criteria.hasRelationships) {
matches = !!element.relationships &&
Object.keys(element.relationships).length > 0;
}
if (matches) {
results.push(element);
}
}
}
return results;
}
/**
* Calculate semantic relationships between elements using NLP
* Optimized for large numbers of elements
*/
private async calculateSemanticRelationships(index: EnhancedIndex): Promise<void> {
const startTime = Date.now();
const config = this.config.getConfig();
// FIX: Add timeout circuit breaker to prevent infinite loops
// FIX: Use configuration instead of hardcoded value
const MAX_EXECUTION_TIME = config.performance.circuitBreakerTimeoutMs;
// Prepare text content for each element
const elementTexts = new Map<string, string>();
const elementCount = Object.values(index.elements)
.reduce((sum, elements) => sum + Object.keys(elements).length, 0);
logger.info('Starting semantic relationship calculation', {
elementCount,
maxForFullMatrix: config.performance.maxElementsForFullMatrix
});
// First pass: Calculate entropy for all elements
for (const [elementType, elements] of Object.entries(index.elements)) {
for (const [name, element] of Object.entries(elements)) {
// FIX: Check for timeout
if (Date.now() - startTime > MAX_EXECUTION_TIME) {
logger.warn('Semantic relationship calculation timeout', {
elapsed: Date.now() - startTime,
processed: elementTexts.size
});
return;
}
// Combine relevant text fields for analysis
const textParts = [
element.core.name,
element.core.description || '',
...(element.search?.keywords || []),
...(element.search?.tags || []),
...(element.search?.triggers || [])
];
const fullText = textParts.join(' ');
const key = `${elementType}:${name}`;
elementTexts.set(key, fullText);
// Calculate entropy for this element
if (!element.semantic) {
element.semantic = {};
}
element.semantic.entropy = this.nlpScoring.calculateEntropy(fullText);
element.semantic.unique_terms = fullText.split(/\s+/).filter(t => t.length > 1).length;
}
}
const keys = Array.from(elementTexts.keys());
// FIX: Use configuration for safety limits
// These are hard safety limits to prevent runaway memory usage in tests
const MAX_SAFE_ELEMENTS = 50; // Hard safety limit for full matrix
const MAX_SAFE_COMPARISONS = 500; // Hard safety limit for total comparisons
// Override config if it's too high
const safeConfig = {
...config,
performance: {
...config.performance,
maxElementsForFullMatrix: Math.min(config.performance.maxElementsForFullMatrix, MAX_SAFE_ELEMENTS),
maxSimilarityComparisons: Math.min(config.performance.maxSimilarityComparisons, MAX_SAFE_COMPARISONS)
}
};
// Decide strategy based on element count
if (elementCount <= safeConfig.performance.maxElementsForFullMatrix) {
// Small dataset: Calculate all relationships
await this.calculateFullMatrix(index, elementTexts, keys, safeConfig);
} else {
// Large dataset: Use smart sampling and batching
await this.calculateSampledRelationships(index, elementTexts, keys, safeConfig);
}
const duration = Date.now() - startTime;
logger.info('Semantic relationships calculated', {
elements: elementTexts.size,
duration: `${duration}ms`,
strategy: elementCount <= config.performance.maxElementsForFullMatrix ? 'full' : 'sampled',
timedOut: duration > MAX_EXECUTION_TIME
});
}
/**
* Calculate full similarity matrix for small datasets
*/
private async calculateFullMatrix(
index: EnhancedIndex,
elementTexts: Map<string, string>,
keys: string[],
config: IndexConfiguration
): Promise<void> {
let comparisons = 0;
const batchSize = config.performance.similarityBatchSize;
const threshold = config.performance.similarityThreshold;
const startTime = Date.now();
// FIX: Use configuration instead of hardcoded value
const MAX_EXECUTION_TIME = config.performance.circuitBreakerTimeoutMs;
// Process in batches to allow event loop to breathe
for (let i = 0; i < keys.length; i++) {
// FIX: Check for timeout
if (Date.now() - startTime > MAX_EXECUTION_TIME) {
logger.warn('Full matrix calculation timeout', {
elapsed: Date.now() - startTime,
processed: `${i}/${keys.length}`,
comparisons
});
return;
}
const key1 = keys[i];
// FIX: Use centralized element ID parsing
const parsed1 = parseElementIdStrict(key1);
const text1 = elementTexts.get(key1)!;
// Process batch of comparisons
const batch: Array<{ key2: string; type2: string; name2: string }> = [];
for (let j = i + 1; j < keys.length && batch.length < batchSize; j++) {
const key2 = keys[j];
// FIX: Use centralized element ID parsing
const parsed2 = parseElementIdStrict(key2);
batch.push({ key2, type2: parsed2.type, name2: parsed2.name });
}
// Process batch asynchronously
await Promise.all(batch.map(async ({ key2, type2, name2 }) => {
const text2 = elementTexts.get(key2)!;
const scoring = this.nlpScoring.scoreRelevance(text1, text2);
// Store high-confidence relationships
if (scoring.combinedScore > threshold) {
// Get elements safely
const element1 = index.elements[parsed1.type]?.[parsed1.name];
const element2 = index.elements[type2]?.[name2];
if (!element1 || !element2) return;
// Add relationship to element1
if (!element1.relationships) {
element1.relationships = {};
}
if (!element1.relationships.similar) {
element1.relationships.similar = [];
}
element1.relationships.similar.push(createRelationship(
type2,
name2,
RelationshipTypes.SEMANTIC_SIMILARITY,
scoring.combinedScore,
{
jaccard: scoring.jaccard,
entropy_diff: Math.abs(
(element1.semantic?.entropy || 0) -
(element2.semantic?.entropy || 0)
)
}
));
// Add reverse relationship to element2
if (!element2.relationships) {
element2.relationships = {};
}
if (!element2.relationships.similar) {
element2.relationships.similar = [];
}
element2.relationships.similar.push(createRelationship(
parsed1.type,
parsed1.name,
RelationshipTypes.SEMANTIC_SIMILARITY,
scoring.combinedScore,
{
jaccard: scoring.jaccard,
entropy_diff: Math.abs(
(element1.semantic?.entropy || 0) -
(element2.semantic?.entropy || 0)
)
}
));
// Store Jaccard scores in semantic data
if (element1.semantic) {
if (!element1.semantic.jaccard_scores) {
element1.semantic.jaccard_scores = {};
}
element1.semantic.jaccard_scores[formatElementId(type2, name2)] = scoring.jaccard;
}
if (element2.semantic) {
if (!element2.semantic.jaccard_scores) {
element2.semantic.jaccard_scores = {};
}
element2.semantic.jaccard_scores[formatElementId(parsed1.type, parsed1.name)] = scoring.jaccard;
}
}
}));
comparisons += batch.length;
// Yield to event loop periodically
if (comparisons % 100 === 0) {
await new Promise(resolve => setImmediate(resolve));
}
}
}
/**
* Calculate sampled relationships for large datasets
* Uses proportional sampling and keyword clustering
*/
private async calculateSampledRelationships(
index: EnhancedIndex,
elementTexts: Map<string, string>,
keys: string[],
config: IndexConfiguration
): Promise<void> {
const threshold = config.performance.similarityThreshold;
const maxComparisons = config.performance.maxSimilarityComparisons;
logger.info('Using sampled relationship calculation', {
elements: keys.length,
maxComparisons
});
// FIX: Early return if no keys to process
if (keys.length === 0) {
logger.debug('No elements to calculate relationships for');
return;
}
// First Pass: Keyword-based clustering for high-probability relationships
const keywordClusters = await this.buildKeywordClusters(index, keys);
let comparisons = 0;
// Compare within clusters first (high probability of relationships)
// FIX: Make cluster budget ratio configurable
const clusterBudgetRatio = 0.6; // 60% of budget for clusters (could be made configurable)
const clusterComparisons = Math.floor(maxComparisons * clusterBudgetRatio);
for (const [, clusterKeys] of keywordClusters.entries()) {
if (comparisons >= clusterComparisons) break;
// Within-cluster comparisons
for (let i = 0; i < clusterKeys.length - 1; i++) {
if (comparisons >= clusterComparisons) break;
const key1 = clusterKeys[i];
// FIX: Use centralized element ID parsing
const parsed1 = parseElementIdStrict(key1);
const text1 = elementTexts.get(key1)!;
// Sample from rest of cluster
const sampleSize = Math.min(
Math.ceil(Math.sqrt(clusterKeys.length - i - 1)),
config.sampling.clusterSampleLimit // Configurable cluster limit
);
const sampledIndices = this.randomSample(
Array.from({ length: clusterKeys.length - i - 1 }, (_, j) => i + j + 1),
sampleSize
);
for (const j of sampledIndices) {
if (comparisons >= clusterComparisons) break;
const key2 = clusterKeys[j];
// FIX: Use centralized element ID parsing
const parsed2 = parseElementIdStrict(key2);
const text2 = elementTexts.get(key2)!;
const scoring = this.nlpScoring.scoreRelevance(text1, text2);
comparisons++;
if (scoring.combinedScore > threshold) {
this.storeRelationship(index, parsed1.type, parsed1.name, parsed2.type, parsed2.name, scoring);
}
}
}
// Yield to event loop
if (comparisons % 100 === 0) {
await new Promise(resolve => setImmediate(resolve));
}
}
// Second Pass: Proportional cross-type sampling for unexpected relationships
const crossTypeComparisons = maxComparisons - comparisons; // Remaining budget
// FIX: Skip second pass if we've already hit our comparison limit
if (comparisons >= maxComparisons || crossTypeComparisons <= 0) {
logger.debug('Skipping cross-type sampling, comparison budget exhausted', {
comparisons,
maxComparisons
});
return;
}
// Build type distribution
const elementsByType = new Map<string, string[]>();
const typeCounts = new Map<string, number>();
for (const key of keys) {
// FIX: Use centralized element ID parsing
const parsed = parseElementId(key);
if (!parsed) continue;
if (!elementsByType.has(parsed.type)) {
elementsByType.set(parsed.type, []);
typeCounts.set(parsed.type, 0);
}
elementsByType.get(parsed.type)!.push(key);
typeCounts.set(parsed.type, typeCounts.get(parsed.type)! + 1);
}
// Calculate proportional sample sizes
const totalElements = keys.length;
const typeSampleSizes = new Map<string, number>();
for (const [type, count] of typeCounts.entries()) {
const proportion = count / totalElements;
// Allocate comparisons proportionally, with minimum of 1
const allocatedComparisons = Math.max(1, Math.floor(crossTypeComparisons * proportion));
// Sample size is sqrt of allocated comparisons for efficiency
const sampleSize = Math.ceil(Math.sqrt(allocatedComparisons));
typeSampleSizes.set(type, sampleSize);
}
logger.debug('Proportional sampling distribution', {
typeCounts: Object.fromEntries(typeCounts),
sampleSizes: Object.fromEntries(typeSampleSizes)
});
// FIX: Perform LIMITED cross-type sampling to prevent O(n²) explosion
// Previously: for each key1, sample from EVERY type - this creates n * types * sampleSize comparisons!
// Now: Sample a subset of keys first, then process those
// Sample a limited number of keys to process (sqrt of total for balanced coverage)
const maxKeysToProcess = Math.min(
Math.ceil(Math.sqrt(keys.length)),
Math.ceil(crossTypeComparisons / typeSampleSizes.size)
);
const sampledKeys1 = this.randomSample(keys, maxKeysToProcess);
logger.debug('Cross-type sampling with limited key set', {
totalKeys: keys.length,
sampledKeys: sampledKeys1.length,
maxKeysToProcess
});
// Perform proportional cross-type sampling on LIMITED key set
for (const key1 of sampledKeys1) {
if (comparisons >= maxComparisons) break;
// FIX: Use centralized element ID parsing
const parsed1 = parseElementIdStrict(key1);
const text1 = elementTexts.get(key1)!;
// Sample from each type proportionally
for (const [type, typeKeys] of elementsByType.entries()) {
if (comparisons >= maxComparisons) break;
const sampleSize = typeSampleSizes.get(type) || 1;
const sampledKeys = this.randomSample(typeKeys, Math.min(sampleSize, typeKeys.length))
.filter(k => k !== key1);
for (const key2 of sampledKeys) {
if (comparisons >= maxComparisons) break;
// FIX: Use centralized element ID parsing
const parsed2 = parseElementIdStrict(key2);
const text2 = elementTexts.get(key2)!;
const scoring = this.nlpScoring.scoreRelevance(text1, text2);
comparisons++;
if (scoring.combinedScore > threshold) {
this.storeRelationship(index, parsed1.type, parsed1.name, parsed2.type, parsed2.name, scoring);
}
}
}
// Yield to event loop
if (comparisons % 100 === 0) {
await new Promise(resolve => setImmediate(resolve));
}
}
logger.info('Sampled relationships calculated', {
comparisons,
maxComparisons,
clusterComparisons,
crossTypeComparisons: comparisons - clusterComparisons
});
}
/**
* Build keyword clusters for first-pass relationship discovery
*/
private async buildKeywordClusters(
index: EnhancedIndex,
keys: string[]
): Promise<Map<string, string[]>> {
const clusters = new Map<string, string[]>();
const keywordFrequency = new Map<string, number>();
// Extract keywords from all elements
for (const key of keys) {
// FIX: Use centralized element ID parsing
const parsed = parseElementIdStrict(key);
const element = index.elements[parsed.type][parsed.name];
const keywords = [
...(element.search?.keywords || []),
...(element.search?.tags || [])
];
for (const keyword of keywords) {
const normalized = keyword.toLowerCase();
keywordFrequency.set(normalized, (keywordFrequency.get(normalized) || 0) + 1);
if (!clusters.has(normalized)) {
clusters.set(normalized, []);
}
clusters.get(normalized)!.push(key);
}
}
// Keep only significant clusters (appears in at least 2 elements but not more than 50% of elements)
const significantClusters = new Map<string, string[]>();
const maxFrequency = Math.floor(keys.length * 0.5);
for (const [keyword, elementKeys] of clusters.entries()) {
if (elementKeys.length >= 2 && elementKeys.length <= maxFrequency) {
significantClusters.set(keyword, elementKeys);
}
}
logger.debug('Keyword clusters built', {
totalClusters: clusters.size,
significantClusters: significantClusters.size,
largestCluster: Math.max(...Array.from(significantClusters.values()).map(v => v.length))
});
return significantClusters;
}
/**
* Store a bidirectional relationship between elements
*/
private storeRelationship(
index: EnhancedIndex,
type1: string,
name1: string,
type2: string,
name2: string,
scoring: any
): void {
// Add relationship to element1
if (!index.elements[type1][name1].relationships) {
index.elements[type1][name1].relationships = {};
}
if (!index.elements[type1][name1].relationships.similar) {
index.elements[type1][name1].relationships.similar = [];
}
// Check if relationship already exists to avoid duplicates
const targetElement = formatElementId(type2, name2);
const existing1 = index.elements[type1][name1].relationships.similar
.find(r => r.element === targetElement);
if (!existing1) {
index.elements[type1][name1].relationships.similar.push(createRelationship(
type2,
name2,
RelationshipTypes.SEMANTIC_SIMILARITY,
scoring.combinedScore,
{
jaccard: scoring.jaccard,
entropy_diff: Math.abs(
(index.elements[type1][name1].semantic?.entropy || 0) -
(index.elements[type2][name2].semantic?.entropy || 0)
)
}
));
}
// Add reverse relationship
if (!index.elements[type2][name2].relationships) {
index.elements[type2][name2].relationships = {};
}
if (!index.elements[type2][name2].relationships.similar) {
index.elements[type2][name2].relationships.similar = [];
}
const sourceElement = formatElementId(type1, name1);
const existing2 = index.elements[type2][name2].relationships.similar
.find(r => r.element === sourceElement);
if (!existing2) {
index.elements[type2][name2].relationships.similar.push(createRelationship(
type1,
name1,
RelationshipTypes.SEMANTIC_SIMILARITY,
scoring.combinedScore,
{
jaccard: scoring.jaccard,
entropy_diff: Math.abs(
(index.elements[type1][name1].semantic?.entropy || 0) -
(index.elements[type2][name2].semantic?.entropy || 0)
)
}
));
}
}
/**
* Random sample from array
*/
private randomSample<T>(array: T[], size: number): T[] {
const shuffled = [...array].sort(() => 0.5 - Math.random());
return shuffled.slice(0, size);
}
/**
* Find shortest path between two elements
*/
public async findElementPath(
fromElement: string,
toElement: string,
options?: {
relationshipTypes?: string[];
minStrength?: number;
maxDepth?: number;
}
): Promise<ElementPath | null> {
const index = await this.getIndex();
return this.relationshipManager.findPath(fromElement, toElement, index, options as any);
}
/**
* Get all elements connected to a given element
*/
public async getConnectedElements(
element: string,
options?: {
maxDepth?: number;
relationshipTypes?: string[];
minStrength?: number;
}
): Promise<Map<string, ElementPath>> {
const index = await this.getIndex();
return this.relationshipManager.getConnectedElements(element, index, options as any);
}
/**
* Get relationship statistics
*/
public async getRelationshipStats(): Promise<Record<string, number>> {
const index = await this.getIndex();
return this.relationshipManager.getRelationshipStats(index);
}
/**
* Get all relationships for an element
*/
public async getElementRelationships(elementId: string): Promise<Record<string, Relationship[]>> {
const index = await this.getIndex();
// FIX: Use centralized element ID parsing
const parsed = parseElementId(elementId);
if (!parsed) {
return {};
}
const element = index.elements[parsed.type]?.[parsed.name];
if (!element) {
return {};
}
return element.relationships || {};
}
/**
* Clean up memory by clearing caches and old data
* FIX: Added to prevent memory leaks as identified in PR review
*/
public clearMemoryCache(): void {
const now = new Date();
const timeSinceLastCleanup = now.getTime() - this.lastMemoryCleanup.getTime();
// Only cleanup if it's been more than 5 minutes
// FIX: Use configuration for cleanup interval check
const config = this.config.getConfig();
const minCleanupInterval = config.memory.cleanupIntervalMinutes * 60 * 1000;
if (timeSinceLastCleanup < minCleanupInterval) {
return;
}
logger.debug('Performing memory cleanup for Enhanced Index');
// Clear NLP scoring caches
if (this.nlpScoring) {
(this.nlpScoring as any).clearCache?.();
}
// Clear verb trigger caches
if (this.verbTriggers) {
(this.verbTriggers as any).clearCache?.();
}
// Clear relationship manager caches
if (this.relationshipManager) {
(this.relationshipManager as any).clearCache?.();
}
// If index is stale, clear it from memory
// FIX: Use configuration for stale index multiplier
if (this.index && this.lastLoaded) {
const indexAge = now.getTime() - this.lastLoaded.getTime();
const staleThreshold = this.TTL_MS * config.memory.staleIndexMultiplier;
if (indexAge > staleThreshold) {
logger.debug('Clearing stale index from memory', {
indexAge,
staleThreshold,
multiplier: config.memory.staleIndexMultiplier
});
this.index = null;
this.lastLoaded = null;
}
}
this.lastMemoryCleanup = now;
}
/**
* Start automatic memory cleanup
*/
// FIX: Use configuration for default cleanup interval
public startMemoryCleanup(intervalMs?: number): void {
const config = this.config.getConfig();
const actualInterval = intervalMs || config.memory.cleanupIntervalMinutes * 60 * 1000;
if (this.memoryCleanupInterval) {
clearInterval(this.memoryCleanupInterval);
}
this.memoryCleanupInterval = setInterval(() => {
this.clearMemoryCache();
}, actualInterval);
logger.debug('Started automatic memory cleanup', { intervalMs: actualInterval });
}
/**
* Stop automatic memory cleanup
*/
public stopMemoryCleanup(): void {
if (this.memoryCleanupInterval) {
clearInterval(this.memoryCleanupInterval);
this.memoryCleanupInterval = null;
logger.debug('Stopped automatic memory cleanup');
}
}
/**
* Clean up all resources (for testing and shutdown)
*/
public async cleanup(): Promise<void> {
this.stopMemoryCleanup();
this.clearMemoryCache();
// Release file lock if held
if (this.fileLock) {
await this.fileLock.release().catch(() => {});
}
// Clear the singleton instance
if (EnhancedIndexManager.instance === this) {
EnhancedIndexManager.instance = null;
}
}
/**
* Reset singleton instance (mainly for testing)
*/
public static resetInstance(): void {
if (this.instance) {
this.instance.cleanup().catch(() => {});
this.instance = null;
}
}
}