PortfolioIndexManager.tsโข29.6 kB
/**
* Portfolio Index Manager - Maps element names to file paths
*
* Solves critical issues:
* 1. submit_collection_content can't find elements by metadata name (e.g., "Safe Roundtrip Tester" -> "safe-roundtrip-tester.md")
* 2. search_collection doesn't search local portfolio content
*
* Features:
* - In-memory index mapping metadata.name โ file path
* - Keywords/tags โ file paths mapping
* - Element type โ file paths mapping
* - Fast O(1) lookups with Maps
* - Lazy loading with 5-minute TTL cache
* - Unicode normalization for security
* - Error handling and logging
*/
import * as fs from 'fs/promises';
import * as path from 'path';
import * as yaml from 'js-yaml';
import { logger } from '../utils/logger.js';
import { ElementType } from './types.js';
import { PortfolioManager } from './PortfolioManager.js';
import { SecureYamlParser } from '../security/secureYamlParser.js';
import { UnicodeValidator } from '../security/validators/unicodeValidator.js';
import { SecurityMonitor } from '../security/securityMonitor.js';
import { ErrorHandler, ErrorCategory } from '../utils/ErrorHandler.js';
import { IndexConfigManager } from './config/IndexConfig.js';
export interface IndexEntry {
filePath: string;
elementType: ElementType;
metadata: {
name: string;
description?: string;
version?: string;
author?: string;
tags?: string[];
keywords?: string[];
triggers?: string[];
category?: string;
created?: string;
updated?: string;
};
lastModified: Date;
filename: string; // Base filename without extension
}
// Extended interface for sharded memory entries
export interface ShardedMemoryIndexEntry extends IndexEntry {
shardInfo: {
shardCount: number;
shardDir: string;
metadataFile: string;
};
}
export interface PortfolioIndex {
byName: Map<string, IndexEntry>;
byFilename: Map<string, IndexEntry>;
byType: Map<ElementType, IndexEntry[]>;
byKeyword: Map<string, IndexEntry[]>;
byTag: Map<string, IndexEntry[]>;
byTrigger: Map<string, IndexEntry[]>;
}
export interface SearchOptions {
elementType?: ElementType;
fuzzyMatch?: boolean;
maxResults?: number;
includeKeywords?: boolean;
includeTags?: boolean;
includeTriggers?: boolean;
includeDescriptions?: boolean;
}
export interface SearchResult {
entry: IndexEntry;
matchType: 'name' | 'filename' | 'keyword' | 'tag' | 'trigger' | 'description';
score: number; // For future ranking
}
export class PortfolioIndexManager {
private static instance: PortfolioIndexManager | null = null;
private static instanceLock = false;
private index: PortfolioIndex | null = null;
private lastBuilt: Date | null = null;
private readonly TTL_MS = IndexConfigManager.getInstance().getConfig().index.ttlMinutes * 60 * 1000;
private isBuilding = false;
private buildPromise: Promise<void> | null = null;
// Retry configuration for file operations
private readonly MAX_RETRIES = 3;
private readonly RETRY_DELAY_MS = 100;
private constructor() {
logger.debug('PortfolioIndexManager created');
}
/**
* Retry wrapper for file system operations
* Handles transient file system errors with exponential backoff
*/
private async retryFileOperation<T>(
operation: () => Promise<T>,
context: string,
retries: number = this.MAX_RETRIES
): Promise<T | null> {
for (let attempt = 1; attempt <= retries; attempt++) {
try {
return await operation();
} catch (error) {
const isLastAttempt = attempt === retries;
const errorMessage = error instanceof Error ? error.message : String(error);
// Check if error is retryable (transient file system errors)
const isRetryable = errorMessage.includes('EBUSY') ||
errorMessage.includes('EAGAIN') ||
errorMessage.includes('ENOENT') ||
errorMessage.includes('ETIMEDOUT');
if (isLastAttempt || !isRetryable) {
logger.warn(`File operation failed after ${attempt} attempts: ${context}`, {
error: errorMessage,
attempt,
context
});
return null;
}
// Exponential backoff
const delay = this.RETRY_DELAY_MS * Math.pow(2, attempt - 1);
logger.debug(`Retrying file operation: ${context}`, {
attempt,
nextDelay: delay,
error: errorMessage
});
await new Promise(resolve => setTimeout(resolve, delay));
}
}
return null;
}
public static getInstance(): PortfolioIndexManager {
if (!this.instance) {
if (this.instanceLock) {
throw new Error('PortfolioIndexManager instance is being created by another thread');
}
try {
this.instanceLock = true;
this.instance = new PortfolioIndexManager();
} finally {
this.instanceLock = false;
}
}
return this.instance;
}
/**
* Get the current index, building it if necessary
*/
public async getIndex(): Promise<PortfolioIndex> {
// Check if we need to rebuild
if (this.needsRebuild()) {
await this.buildIndex();
}
return this.index!;
}
/**
* Search the portfolio index by name with fuzzy matching
*/
public async findByName(name: string, options: SearchOptions = {}): Promise<IndexEntry | null> {
const index = await this.getIndex();
// Normalize input for security
const normalizedName = UnicodeValidator.normalize(name);
if (!normalizedName.isValid) {
logger.warn('Invalid Unicode in search name', {
issues: normalizedName.detectedIssues
});
return null;
}
const safeName = normalizedName.normalizedContent;
// Try exact match first (case insensitive)
const exactMatch = index.byName.get(safeName.toLowerCase());
if (exactMatch) {
logger.debug('Found exact name match', { name: safeName, filePath: exactMatch.filePath });
return exactMatch;
}
// Try filename match
const filenameMatch = index.byFilename.get(safeName.toLowerCase());
if (filenameMatch) {
logger.debug('Found filename match', { name: safeName, filePath: filenameMatch.filePath });
return filenameMatch;
}
// Try fuzzy matching if enabled
if (options.fuzzyMatch !== false) {
const fuzzyMatch = this.findFuzzyMatch(safeName, index, options);
if (fuzzyMatch) {
logger.debug('Found fuzzy match', {
name: safeName,
matchName: fuzzyMatch.metadata.name,
filePath: fuzzyMatch.filePath
});
return fuzzyMatch;
}
}
logger.debug('No match found for name', { name: safeName });
return null;
}
/**
* Search the portfolio with comprehensive text search
*/
public async search(query: string, options: SearchOptions = {}): Promise<SearchResult[]> {
const index = await this.getIndex();
// Normalize query for security
const normalizedQuery = UnicodeValidator.normalize(query);
if (!normalizedQuery.isValid) {
logger.warn('Invalid Unicode in search query', {
issues: normalizedQuery.detectedIssues
});
return [];
}
const safeQuery = normalizedQuery.normalizedContent.toLowerCase().trim();
const queryTokens = safeQuery.split(/\s+/).filter(token => token.length > 0);
if (queryTokens.length === 0) {
return [];
}
const results: SearchResult[] = [];
const seenPaths = new Set<string>();
const maxResults = options.maxResults || 20;
// Helper to add unique results
const addResult = (entry: IndexEntry, matchType: SearchResult['matchType'], score: number = 1) => {
if (!seenPaths.has(entry.filePath) && results.length < maxResults) {
// Filter by element type if specified
if (options.elementType && entry.elementType !== options.elementType) {
return;
}
seenPaths.add(entry.filePath);
results.push({ entry, matchType, score });
}
};
// 1. Search by name (highest priority)
for (const [name, entry] of index.byName) {
if (this.matchesQuery(name, queryTokens)) {
addResult(entry, 'name', 3);
}
}
// 2. Search by filename
for (const [filename, entry] of index.byFilename) {
if (this.matchesQuery(filename, queryTokens)) {
addResult(entry, 'filename', 2.5);
}
}
// 3. Search by keywords
if (options.includeKeywords !== false) {
for (const [keyword, entries] of index.byKeyword) {
if (this.matchesQuery(keyword, queryTokens)) {
for (const entry of entries) {
addResult(entry, 'keyword', 2);
}
}
}
}
// 4. Search by tags
if (options.includeTags !== false) {
for (const [tag, entries] of index.byTag) {
if (this.matchesQuery(tag, queryTokens)) {
for (const entry of entries) {
addResult(entry, 'tag', 2);
}
}
}
}
// 5. Search by triggers
if (options.includeTriggers !== false) {
for (const [trigger, entries] of index.byTrigger) {
if (this.matchesQuery(trigger, queryTokens)) {
for (const entry of entries) {
addResult(entry, 'trigger', 1.8);
}
}
}
}
// 6. Search by description
if (options.includeDescriptions !== false) {
for (const [_, entry] of index.byName) {
if (entry.metadata.description &&
this.matchesQuery(entry.metadata.description.toLowerCase(), queryTokens)) {
addResult(entry, 'description', 1.5);
}
}
}
// Sort by score (descending)
results.sort((a, b) => b.score - a.score);
logger.debug('Portfolio search completed', {
query: safeQuery,
resultCount: results.length,
totalIndexed: index.byName.size
});
return results;
}
/**
* Get all elements of a specific type
*/
public async getElementsByType(elementType: ElementType): Promise<IndexEntry[]> {
const index = await this.getIndex();
return index.byType.get(elementType) || [];
}
/**
* Get statistics about the index
*/
public async getStats(): Promise<{
totalElements: number;
elementsByType: Record<ElementType, number>;
lastBuilt: Date | null;
isStale: boolean;
}> {
const index = await this.getIndex();
const stats = {
totalElements: index.byName.size,
elementsByType: {} as Record<ElementType, number>,
lastBuilt: this.lastBuilt,
isStale: this.needsRebuild()
};
for (const elementType of Object.values(ElementType)) {
stats.elementsByType[elementType] = (index.byType.get(elementType) || []).length;
}
return stats;
}
/**
* Force rebuild the index
*/
public async rebuildIndex(): Promise<void> {
this.index = null;
this.lastBuilt = null;
await this.buildIndex();
}
/**
* Check if the index needs rebuilding
*/
private needsRebuild(): boolean {
if (!this.index || !this.lastBuilt) {
return true;
}
const age = Date.now() - this.lastBuilt.getTime();
return age > this.TTL_MS;
}
/**
* Build the index by scanning all portfolio directories
*/
private async buildIndex(): Promise<void> {
// Prevent concurrent builds
if (this.isBuilding) {
if (this.buildPromise) {
await this.buildPromise;
}
return;
}
this.isBuilding = true;
this.buildPromise = this.performBuild();
try {
await this.buildPromise;
} finally {
this.isBuilding = false;
this.buildPromise = null;
}
}
/**
* Perform the actual index building
*/
private async performBuild(): Promise<void> {
const startTime = Date.now();
logger.info('Building portfolio index...');
try {
const portfolioManager = PortfolioManager.getInstance();
// Initialize empty index
const newIndex: PortfolioIndex = {
byName: new Map(),
byFilename: new Map(),
byType: new Map(),
byKeyword: new Map(),
byTag: new Map(),
byTrigger: new Map()
};
// Initialize type maps
for (const elementType of Object.values(ElementType)) {
newIndex.byType.set(elementType, []);
}
let totalFiles = 0;
let processedFiles = 0;
// Scan each element type
for (const elementType of Object.values(ElementType)) {
try {
const elementDir = portfolioManager.getElementDir(elementType);
// Check if directory exists
try {
await fs.access(elementDir);
} catch {
logger.debug(`Element directory doesn't exist: ${elementDir}`);
continue;
}
// FIX #1188: Special handling for memories - scan .yaml files in date folders
if (elementType === ElementType.MEMORY) {
// Memories are stored in date folders (YYYY-MM-DD) as .yaml files
const entries = await fs.readdir(elementDir, { withFileTypes: true });
// First process any root .yaml files (legacy/backup)
const rootYamlFiles = entries
.filter(entry => !entry.isDirectory() && entry.name.endsWith('.yaml'))
.map(entry => entry.name);
for (const file of rootYamlFiles) {
try {
const filePath = path.join(elementDir, file);
const entry = await this.createMemoryIndexEntry(filePath, elementType);
if (entry) {
this.addToIndex(newIndex, entry);
processedFiles++;
totalFiles++;
}
} catch (error) {
logger.warn(`Failed to index root memory file`, {
file,
path: path.join(elementDir, file),
location: 'root',
error: error instanceof Error ? error.message : String(error),
errorType: error instanceof Error ? error.constructor.name : typeof error
});
}
}
// Then process date folders
const dateFolders = entries
.filter(entry => entry.isDirectory() && /^\d{4}-\d{2}-\d{2}$/.test(entry.name))
.map(entry => entry.name);
for (const dateFolder of dateFolders) {
const folderPath = path.join(elementDir, dateFolder);
const folderEntries = await fs.readdir(folderPath, { withFileTypes: true });
// Process direct YAML files in date folder
const yamlFiles = folderEntries
.filter(entry => !entry.isDirectory() && entry.name.endsWith('.yaml'))
.map(entry => entry.name);
for (const file of yamlFiles) {
try {
const filePath = path.join(folderPath, file);
const entry = await this.createMemoryIndexEntry(filePath, elementType);
if (entry) {
this.addToIndex(newIndex, entry);
processedFiles++;
totalFiles++;
}
} catch (error) {
logger.warn(`Failed to index date folder memory file`, {
file,
path: path.join(folderPath, file),
dateFolder,
location: 'date-folder',
error: error instanceof Error ? error.message : String(error),
errorType: error instanceof Error ? error.constructor.name : typeof error
});
}
}
// FIX #1188: Process subdirectories for sharded memories
// Large memories are stored as shards in named subdirectories
const subDirs = folderEntries
.filter(entry => entry.isDirectory())
.map(entry => entry.name);
for (const subDir of subDirs) {
const subDirPath = path.join(folderPath, subDir);
const shardFiles = await fs.readdir(subDirPath);
const shardYamlFiles = shardFiles.filter(file => file.endsWith('.yaml'));
// For sharded memories, look for metadata.yaml or the main file
// If not found, use the first shard as representative
let metadataFile = shardYamlFiles.find(f => f === 'metadata.yaml') ||
shardYamlFiles.find(f => f === `${subDir}.yaml`) ||
shardYamlFiles[0];
if (metadataFile) {
try {
const filePath = path.join(subDirPath, metadataFile);
const entry = await this.createMemoryIndexEntry(filePath, elementType);
if (entry) {
// Mark as sharded memory in metadata
entry.metadata.keywords = entry.metadata.keywords || [];
if (!entry.metadata.keywords.includes('sharded')) {
entry.metadata.keywords.push('sharded');
}
// Create properly typed sharded entry
const shardedEntry: ShardedMemoryIndexEntry = {
...entry,
shardInfo: {
shardCount: shardYamlFiles.length,
shardDir: path.join(dateFolder, subDir),
metadataFile: metadataFile
}
};
this.addToIndex(newIndex, shardedEntry);
processedFiles++;
totalFiles++;
}
} catch (error) {
logger.warn(`Failed to index sharded memory`, {
subDir,
dateFolder,
path: path.join(subDirPath, metadataFile),
metadataFile,
shardCount: shardYamlFiles.length,
location: 'sharded-subdirectory',
error: error instanceof Error ? error.message : String(error),
errorType: error instanceof Error ? error.constructor.name : typeof error,
shardFiles: shardYamlFiles.slice(0, 5) // Log first 5 shard files for context
});
}
}
}
}
} else {
// Standard handling for other element types (.md files in root)
const files = await fs.readdir(elementDir);
const mdFiles = files.filter(file => file.endsWith('.md'));
totalFiles += mdFiles.length;
for (const file of mdFiles) {
try {
const filePath = path.join(elementDir, file);
const entry = await this.createIndexEntry(filePath, elementType);
if (entry) {
this.addToIndex(newIndex, entry);
processedFiles++;
}
} catch (error) {
logger.warn(`Failed to index file: ${file}`, {
elementType,
error: error instanceof Error ? error.message : String(error)
});
}
}
}
} catch (error) {
logger.error(`Failed to scan element type: ${elementType}`, {
error: error instanceof Error ? error.message : String(error)
});
}
}
// Update instance state
this.index = newIndex;
this.lastBuilt = new Date();
const duration = Date.now() - startTime;
logger.info('Portfolio index built successfully', {
totalFiles,
processedFiles,
duration: `${duration}ms`,
uniqueNames: newIndex.byName.size,
uniqueKeywords: newIndex.byKeyword.size,
uniqueTags: newIndex.byTag.size
});
// Log security event for audit trail
SecurityMonitor.logSecurityEvent({
type: 'PORTFOLIO_INITIALIZATION',
severity: 'LOW',
source: 'PortfolioIndexManager.performBuild',
details: `Portfolio index rebuilt with ${processedFiles} elements in ${duration}ms`
});
} catch (error) {
ErrorHandler.logError('PortfolioIndexManager.performBuild', error);
throw ErrorHandler.wrapError(error, 'Failed to build portfolio index', ErrorCategory.SYSTEM_ERROR);
}
}
/**
* Create an index entry from a file
*/
private async createIndexEntry(filePath: string, elementType: ElementType): Promise<IndexEntry | null> {
try {
// Get file stats
const stats = await fs.stat(filePath);
// Read file content
const content = await fs.readFile(filePath, 'utf-8');
// Parse frontmatter securely
// SECURITY NOTE: Portfolio files are locally trusted content that users
// have deliberately created or installed. Security validation should focus
// on BEHAVIORAL analysis during import/installation, not superficial word
// matching in descriptions. A malicious actor would never label their
// exploit as "dangerous" - they'd call it "helpful utility".
// Future: Add behavioral analysis on import, not during indexing.
const parsed = SecureYamlParser.parse(content, {
validateContent: false, // Don't scan for words in trusted local files
validateFields: false // Portfolio files are pre-trusted by user choice
});
// Extract base filename
const filename = path.basename(filePath, '.md');
// Build metadata with defaults
const metadata = {
name: parsed.data.name || filename,
description: parsed.data.description,
version: parsed.data.version,
author: parsed.data.author,
tags: Array.isArray(parsed.data.tags) ? parsed.data.tags : [],
keywords: Array.isArray(parsed.data.keywords) ? parsed.data.keywords : [],
triggers: Array.isArray(parsed.data.triggers) ? parsed.data.triggers : [],
category: parsed.data.category,
created: parsed.data.created || parsed.data.created_date,
updated: parsed.data.updated || parsed.data.updated_date
};
const entry: IndexEntry = {
filePath,
elementType,
metadata,
lastModified: stats.mtime,
filename
};
return entry;
} catch (error) {
logger.debug(`Failed to create index entry for: ${filePath}`, {
error: error instanceof Error ? error.message : String(error)
});
return null;
}
}
/**
* Create an index entry from a memory YAML file
* FIX #1188: Special handling for memory files with different structure
* FIX #1196: Use yaml.load for pure YAML files, not SecureYamlParser (which expects Markdown frontmatter)
*/
private async createMemoryIndexEntry(filePath: string, elementType: ElementType): Promise<IndexEntry | null> {
try {
// Get file stats
const stats = await fs.stat(filePath);
// Read file content
const content = await fs.readFile(filePath, 'utf-8');
// FIX #1196: Parse pure YAML using yaml.load()
// Memory files are pure YAML without frontmatter markers, so we can't use SecureYamlParser
// (which is designed for Markdown files with YAML frontmatter between --- markers)
// Using FAILSAFE_SCHEMA for security (same as MemoryManager uses)
// Security validation: Check content size before parsing
if (content.length > 1048576) { // 1MB limit
logger.warn(`Large memory file detected, skipping: ${filePath}`);
return null;
}
const rawParsed = yaml.load(content, {
schema: yaml.FAILSAFE_SCHEMA
});
// Type safety: Ensure parsed result is a valid object
if (!rawParsed || typeof rawParsed !== 'object' || Array.isArray(rawParsed)) {
logger.warn(`Invalid YAML structure in memory file: ${filePath}`);
return null;
}
const parsed = rawParsed as Record<string, any>;
// Extract base filename
const filename = path.basename(filePath, '.yaml');
// Memory files can have metadata at top level OR nested under 'metadata' key
// FIX #1196: Merge both levels, preferring nested metadata block over top-level
// This handles mixed structures where some fields are top-level and others are nested
const metadataSource = parsed.metadata
? { ...parsed, ...parsed.metadata } // Merge top-level with nested, nested wins
: parsed; // No nested metadata, use top-level only
// Build metadata with memory-specific defaults
const metadata = {
name: metadataSource.name || filename.replaceAll('-', ' '),
description: metadataSource.description || 'Memory element',
version: metadataSource.version || '1.0.0',
author: metadataSource.author,
tags: Array.isArray(metadataSource.tags) ? metadataSource.tags : [],
keywords: Array.isArray(metadataSource.keywords) ? metadataSource.keywords : [],
triggers: Array.isArray(metadataSource.triggers) ? metadataSource.triggers : [],
category: metadataSource.category,
created: metadataSource.created || metadataSource.created_date,
updated: metadataSource.updated || metadataSource.updated_date || metadataSource.modified
};
const entry: IndexEntry = {
filePath,
elementType,
metadata,
lastModified: stats.mtime,
filename
};
return entry;
} catch (error) {
logger.debug(`Failed to create memory index entry for: ${filePath}`, {
error: error instanceof Error ? error.message : String(error)
});
return null;
}
}
/**
* Add entry to all relevant index maps
*/
private addToIndex(index: PortfolioIndex, entry: IndexEntry): void {
// Normalize keys for case-insensitive lookup
const normalizedName = entry.metadata.name.toLowerCase();
const normalizedFilename = entry.filename.toLowerCase();
// Add to name map
index.byName.set(normalizedName, entry);
// Add to filename map
index.byFilename.set(normalizedFilename, entry);
// Add to type map
const typeEntries = index.byType.get(entry.elementType) || [];
typeEntries.push(entry);
index.byType.set(entry.elementType, typeEntries);
// Add keywords
for (const keyword of entry.metadata.keywords || []) {
const normalizedKeyword = keyword.toLowerCase();
const keywordEntries = index.byKeyword.get(normalizedKeyword) || [];
keywordEntries.push(entry);
index.byKeyword.set(normalizedKeyword, keywordEntries);
}
// Add tags
for (const tag of entry.metadata.tags || []) {
const normalizedTag = tag.toLowerCase();
const tagEntries = index.byTag.get(normalizedTag) || [];
tagEntries.push(entry);
index.byTag.set(normalizedTag, tagEntries);
}
// Add triggers
for (const trigger of entry.metadata.triggers || []) {
const normalizedTrigger = trigger.toLowerCase();
const triggerEntries = index.byTrigger.get(normalizedTrigger) || [];
triggerEntries.push(entry);
index.byTrigger.set(normalizedTrigger, triggerEntries);
}
}
/**
* Find fuzzy matches for a name
*/
private findFuzzyMatch(searchName: string, index: PortfolioIndex, options: SearchOptions): IndexEntry | null {
const search = searchName.toLowerCase();
let bestMatch: IndexEntry | null = null;
let bestScore = 0;
// Search names with partial matching
for (const [name, entry] of index.byName) {
if (options.elementType && entry.elementType !== options.elementType) {
continue;
}
const score = this.calculateSimilarity(search, name);
if (score > bestScore && score > 0.3) { // Minimum similarity threshold
bestScore = score;
bestMatch = entry;
}
}
// Also check filenames
for (const [filename, entry] of index.byFilename) {
if (options.elementType && entry.elementType !== options.elementType) {
continue;
}
const score = this.calculateSimilarity(search, filename);
if (score > bestScore && score > 0.3) {
bestScore = score;
bestMatch = entry;
}
}
return bestMatch;
}
/**
* Calculate similarity between two strings
*/
private calculateSimilarity(a: string, b: string): number {
// Simple similarity based on substring containment and length
if (a === b) return 1.0;
if (a.includes(b) || b.includes(a)) return 0.8;
// Check for word overlap
const wordsA = a.split(/\s+/);
const wordsB = b.split(/\s+/);
const commonWords = wordsA.filter(word => wordsB.includes(word));
if (commonWords.length > 0) {
return commonWords.length / Math.max(wordsA.length, wordsB.length);
}
return 0;
}
/**
* Check if any query tokens match the text
*/
private matchesQuery(text: string, queryTokens: string[]): boolean {
return queryTokens.some(token => text.includes(token));
}
}