/**
* Oracle v2 Indexer
*
* Parses markdown files from ψ/memory and creates:
* 1. SQLite index (source of truth for metadata)
* 2. Chroma vectors (semantic search)
*
* Following claude-mem's granular vector pattern:
* - Split large documents into smaller chunks
* - Each principle/pattern becomes multiple vectors
* - Enable concept-based filtering
*/
import fs from 'fs';
import path from 'path';
import Database from 'better-sqlite3';
import { ChromaClient } from 'chromadb';
import type { OracleDocument, OracleMetadata, IndexerConfig } from './types.js';
export class OracleIndexer {
private db: Database.Database;
private chroma: ChromaClient;
private config: IndexerConfig;
private collection: any;
constructor(config: IndexerConfig) {
this.config = config;
this.db = new Database(config.dbPath);
this.chroma = new ChromaClient();
this.initDatabase();
}
/**
* Initialize SQLite schema
*/
private initDatabase(): void {
this.db.exec(`
CREATE TABLE IF NOT EXISTS oracle_documents (
id TEXT PRIMARY KEY,
type TEXT NOT NULL,
source_file TEXT NOT NULL,
concepts TEXT NOT NULL,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL,
indexed_at INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_type ON oracle_documents(type);
CREATE INDEX IF NOT EXISTS idx_source ON oracle_documents(source_file);
-- FTS5 for keyword search
CREATE VIRTUAL TABLE IF NOT EXISTS oracle_fts USING fts5(
id UNINDEXED,
content,
concepts
);
-- Consult log for tracking oracle_consult queries
CREATE TABLE IF NOT EXISTS consult_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
decision TEXT NOT NULL,
context TEXT,
principles_found INTEGER NOT NULL,
patterns_found INTEGER NOT NULL,
guidance TEXT NOT NULL,
created_at INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_consult_created ON consult_log(created_at);
-- Indexing status for tray app
CREATE TABLE IF NOT EXISTS indexing_status (
id INTEGER PRIMARY KEY CHECK (id = 1),
is_indexing INTEGER NOT NULL DEFAULT 0,
progress_current INTEGER DEFAULT 0,
progress_total INTEGER DEFAULT 0,
started_at INTEGER,
completed_at INTEGER,
error TEXT
);
-- Ensure single row exists
INSERT OR IGNORE INTO indexing_status (id, is_indexing) VALUES (1, 0);
`);
}
/**
* Update indexing status for tray app
*/
private setIndexingStatus(isIndexing: boolean, current: number = 0, total: number = 0, error?: string): void {
this.db.prepare(`
UPDATE indexing_status SET
is_indexing = ?,
progress_current = ?,
progress_total = ?,
started_at = CASE WHEN ? = 1 AND started_at IS NULL THEN ? ELSE started_at END,
completed_at = CASE WHEN ? = 0 THEN ? ELSE NULL END,
error = ?
WHERE id = 1
`).run(
isIndexing ? 1 : 0,
current,
total,
isIndexing ? 1 : 0,
Date.now(),
isIndexing ? 1 : 0,
Date.now(),
error || null
);
}
/**
* Main indexing workflow
*/
async index(): Promise<void> {
console.log('Starting Oracle indexing...');
// Set indexing status for tray app
this.setIndexingStatus(true, 0, 100);
// Clear existing data to prevent duplicates
console.log('Clearing existing index data...');
this.db.exec('DELETE FROM oracle_fts');
this.db.exec('DELETE FROM oracle_documents');
// Initialize Chroma collection (optional - skip if not available)
try {
await this.chroma.deleteCollection({ name: 'oracle_knowledge' });
this.collection = await this.chroma.getOrCreateCollection({
name: 'oracle_knowledge',
metadata: { description: 'Oracle philosophy and patterns' }
});
console.log('ChromaDB connected');
} catch (e) {
console.log('ChromaDB not available, using SQLite-only mode');
this.collection = null;
}
const documents: OracleDocument[] = [];
// Index each source type
documents.push(...await this.indexResonance());
documents.push(...await this.indexLearnings());
documents.push(...await this.indexRetrospectives());
// Store in SQLite + Chroma
await this.storeDocuments(documents);
// Mark indexing complete
this.setIndexingStatus(false, documents.length, documents.length);
console.log(`Indexed ${documents.length} documents`);
console.log('Indexing complete!');
}
/**
* Index ψ/memory/resonance/ files (identity, principles)
*/
private async indexResonance(): Promise<OracleDocument[]> {
const resonancePath = path.join(this.config.repoRoot, this.config.sourcePaths.resonance);
const files = fs.readdirSync(resonancePath).filter(f => f.endsWith('.md'));
const documents: OracleDocument[] = [];
for (const file of files) {
const filePath = path.join(resonancePath, file);
const content = fs.readFileSync(filePath, 'utf-8');
const docs = this.parseResonanceFile(file, content);
documents.push(...docs);
}
console.log(`Indexed ${documents.length} resonance documents from ${files.length} files`);
return documents;
}
/**
* Parse resonance markdown into granular documents
* Following claude-mem's pattern of splitting by sections
*/
private parseResonanceFile(filename: string, content: string): OracleDocument[] {
const documents: OracleDocument[] = [];
const sourceFile = `ψ/memory/resonance/${filename}`;
const now = Date.now();
// Split by ### headers (principles, sections)
const sections = content.split(/^###\s+/m).filter(s => s.trim());
sections.forEach((section, index) => {
const lines = section.split('\n');
const title = lines[0].trim();
const body = lines.slice(1).join('\n').trim();
if (!body) return;
// Main document for this principle/section
const id = `resonance_${filename.replace('.md', '')}_${index}`;
documents.push({
id,
type: 'principle',
source_file: sourceFile,
content: `${title}: ${body}`,
concepts: this.extractConcepts(title, body),
created_at: now,
updated_at: now
});
// Split bullet points into sub-documents (granular pattern)
const bullets = body.match(/^[-*]\s+(.+)$/gm);
if (bullets) {
bullets.forEach((bullet, bulletIndex) => {
const bulletText = bullet.replace(/^[-*]\s+/, '').trim();
documents.push({
id: `${id}_sub_${bulletIndex}`,
type: 'principle',
source_file: sourceFile,
content: bulletText,
concepts: this.extractConcepts(bulletText),
created_at: now,
updated_at: now
});
});
}
});
return documents;
}
/**
* Index ψ/memory/learnings/ files (patterns discovered)
*/
private async indexLearnings(): Promise<OracleDocument[]> {
const learningsPath = path.join(this.config.repoRoot, this.config.sourcePaths.learnings);
if (!fs.existsSync(learningsPath)) return [];
const files = fs.readdirSync(learningsPath).filter(f => f.endsWith('.md'));
const documents: OracleDocument[] = [];
for (const file of files) {
const filePath = path.join(learningsPath, file);
const content = fs.readFileSync(filePath, 'utf-8');
const docs = this.parseLearningFile(file, content);
documents.push(...docs);
}
console.log(`Indexed ${documents.length} learning documents from ${files.length} files`);
return documents;
}
/**
* Parse learning markdown into documents
*/
private parseLearningFile(filename: string, content: string): OracleDocument[] {
const documents: OracleDocument[] = [];
const sourceFile = `ψ/memory/learnings/${filename}`;
const now = Date.now();
// Extract title from frontmatter or filename
const titleMatch = content.match(/^title:\s*(.+)$/m);
const title = titleMatch ? titleMatch[1] : filename.replace('.md', '');
// Split by ## headers (patterns)
const sections = content.split(/^##\s+/m).filter(s => s.trim());
sections.forEach((section, index) => {
const lines = section.split('\n');
const sectionTitle = lines[0].trim();
const body = lines.slice(1).join('\n').trim();
if (!body) return;
const id = `learning_${filename.replace('.md', '')}_${index}`;
documents.push({
id,
type: 'learning',
source_file: sourceFile,
content: `${title} - ${sectionTitle}: ${body}`,
concepts: this.extractConcepts(sectionTitle, body),
created_at: now,
updated_at: now
});
});
// If no sections, treat whole file as one document
if (documents.length === 0) {
documents.push({
id: `learning_${filename.replace('.md', '')}`,
type: 'learning',
source_file: sourceFile,
content: content,
concepts: this.extractConcepts(title, content),
created_at: now,
updated_at: now
});
}
return documents;
}
/**
* Index ψ/memory/retrospectives/ files (session history)
*/
private async indexRetrospectives(): Promise<OracleDocument[]> {
const retroPath = path.join(this.config.repoRoot, this.config.sourcePaths.retrospectives);
if (!fs.existsSync(retroPath)) return [];
const documents: OracleDocument[] = [];
const files = this.getAllMarkdownFiles(retroPath);
for (const filePath of files) {
const content = fs.readFileSync(filePath, 'utf-8');
const relativePath = path.relative(this.config.repoRoot, filePath);
const docs = this.parseRetroFile(relativePath, content);
documents.push(...docs);
}
console.log(`Indexed ${documents.length} retrospective documents from ${files.length} files`);
return documents;
}
/**
* Recursively get all markdown files
*/
private getAllMarkdownFiles(dir: string): string[] {
const files: string[] = [];
const items = fs.readdirSync(dir);
for (const item of items) {
const fullPath = path.join(dir, item);
const stat = fs.statSync(fullPath);
if (stat.isDirectory()) {
files.push(...this.getAllMarkdownFiles(fullPath));
} else if (item.endsWith('.md')) {
files.push(fullPath);
}
}
return files;
}
/**
* Parse retrospective markdown
*/
private parseRetroFile(relativePath: string, content: string): OracleDocument[] {
const documents: OracleDocument[] = [];
const now = Date.now();
// Extract key sections (AI Diary, What I Learned, etc.)
const sections = content.split(/^##\s+/m).filter(s => s.trim());
sections.forEach((section, index) => {
const lines = section.split('\n');
const sectionTitle = lines[0].trim();
const body = lines.slice(1).join('\n').trim();
if (!body || body.length < 50) return; // Skip short sections
const filename = path.basename(relativePath, '.md');
const id = `retro_${filename}_${index}`;
documents.push({
id,
type: 'retro',
source_file: relativePath,
content: `${sectionTitle}: ${body}`,
concepts: this.extractConcepts(sectionTitle, body),
created_at: now,
updated_at: now
});
});
return documents;
}
/**
* Extract concept tags from text
* Simple keyword extraction - could be enhanced with NLP
*/
private extractConcepts(...texts: string[]): string[] {
const combined = texts.join(' ').toLowerCase();
const concepts = new Set<string>();
// Common Oracle concepts
const keywords = [
'trust', 'pattern', 'mirror', 'append', 'history', 'context',
'delete', 'behavior', 'intention', 'decision', 'human', 'external',
'brain', 'command', 'oracle', 'timestamp', 'immutable', 'preserve'
];
for (const keyword of keywords) {
if (combined.includes(keyword)) {
concepts.add(keyword);
}
}
return Array.from(concepts);
}
/**
* Store documents in SQLite + Chroma
*/
private async storeDocuments(documents: OracleDocument[]): Promise<void> {
const now = Date.now();
// Prepare statements
const insertMeta = this.db.prepare(`
INSERT OR REPLACE INTO oracle_documents
(id, type, source_file, concepts, created_at, updated_at, indexed_at)
VALUES (?, ?, ?, ?, ?, ?, ?)
`);
const insertFts = this.db.prepare(`
INSERT OR REPLACE INTO oracle_fts (id, content, concepts)
VALUES (?, ?, ?)
`);
// Prepare for Chroma
const ids: string[] = [];
const contents: string[] = [];
const metadatas: any[] = [];
for (const doc of documents) {
// SQLite metadata
insertMeta.run(
doc.id,
doc.type,
doc.source_file,
JSON.stringify(doc.concepts),
doc.created_at,
doc.updated_at,
now
);
// SQLite FTS
insertFts.run(
doc.id,
doc.content,
doc.concepts.join(' ')
);
// Chroma vector (metadata must be primitives, not arrays)
ids.push(doc.id);
contents.push(doc.content);
metadatas.push({
type: doc.type,
source_file: doc.source_file,
concepts: doc.concepts.join(',') // Convert array to string for ChromaDB
});
}
// Batch insert to Chroma in chunks of 100 (skip if no collection)
if (!this.collection) {
console.log('Skipping Chroma indexing (SQLite-only mode)');
return;
}
const BATCH_SIZE = 100;
let chromaSuccess = true;
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
const batchIds = ids.slice(i, i + BATCH_SIZE);
const batchContents = contents.slice(i, i + BATCH_SIZE);
const batchMetadatas = metadatas.slice(i, i + BATCH_SIZE);
try {
await this.collection.upsert({
ids: batchIds,
documents: batchContents,
metadatas: batchMetadatas
});
console.log(`Chroma batch ${Math.floor(i / BATCH_SIZE) + 1}/${Math.ceil(ids.length / BATCH_SIZE)} stored`);
} catch (error) {
console.error(`Chroma batch failed:`, error);
chromaSuccess = false;
}
}
console.log(`Stored in SQLite${chromaSuccess ? ' + Chroma' : ' (Chroma failed)'}`);
}
/**
* Close database connections
*/
close(): void {
this.db.close();
}
}
/**
* CLI for running indexer
*/
const isMain = import.meta.url.endsWith('indexer.ts') || import.meta.url.endsWith('indexer.js');
if (isMain) {
const repoRoot = process.env.ORACLE_REPO_ROOT || '/Users/nat/Code/github.com/laris-co/Nat-s-Agents';
const config: IndexerConfig = {
repoRoot,
dbPath: path.join(repoRoot, 'ψ/lab/oracle-v2/oracle.db'),
chromaPath: path.join(repoRoot, 'ψ/lab/oracle-v2/chroma'),
sourcePaths: {
resonance: 'ψ/memory/resonance',
learnings: 'ψ/memory/learnings',
retrospectives: 'ψ/memory/retrospectives'
}
};
const indexer = new OracleIndexer(config);
indexer.index()
.then(() => {
console.log('Indexing complete!');
indexer.close();
})
.catch(err => {
console.error('Indexing failed:', err);
indexer.close();
process.exit(1);
});
}