FactStore.js•15.7 kB
import { promises as fs } from 'fs';
import { join, dirname } from 'path';
import { randomUUID } from 'crypto';
import { homedir } from 'os';
import { RelationshipManager } from './RelationshipManager.js';
import { SemanticIndex } from '../indexing/SemanticIndex.js';
import { FileManager } from './FileManager.js';
export class FactStore {
constructor(configManager) {
this.configManager = configManager;
this.storageDir = join(homedir(), '.mcp_sequential_thinking', 'graphiti_store');
this.factsIndex = new Map();
this.typeIndex = new Map();
this.domainIndex = new Map();
this.scoreIndex = new Map();
this.relationshipManager = null;
this.semanticIndex = new SemanticIndex();
this.fileManager = new FileManager();
this.initialized = false;
}
async initialize() {
try {
await this.ensureStorageDirectory();
await this.loadExistingFacts();
this.relationshipManager = new RelationshipManager(this);
// Build semantic index from existing facts
this.rebuildSemanticIndex();
this.initialized = true;
console.log(`FactStore initialized with ${this.factsIndex.size} facts`);
} catch (error) {
console.error('Failed to initialize FactStore:', error);
throw error;
}
}
async ensureStorageDirectory() {
try {
await fs.mkdir(this.storageDir, { recursive: true });
const subdirs = ['facts', 'indexes', 'backups'];
for (const subdir of subdirs) {
await fs.mkdir(join(this.storageDir, subdir), { recursive: true });
}
} catch (error) {
console.error('Failed to create storage directory:', error);
throw error;
}
}
async loadExistingFacts() {
try {
const factsDir = join(this.storageDir, 'facts');
const files = await fs.readdir(factsDir).catch(() => []);
// First, repair any corrupted files
console.log('Checking for corrupted fact files...');
const repairResults = await this.fileManager.repairCorruptedFiles(factsDir);
if (repairResults.corrupted > 0) {
console.log(`File repair results: ${repairResults.repaired} repaired, ${repairResults.quarantined} quarantined out of ${repairResults.corrupted} corrupted files`);
}
// Now load the facts
for (const file of files) {
if (file.endsWith('.json') && !file.includes('.corrupted') && !file.endsWith('.bak')) {
try {
const factPath = join(factsDir, file);
const fact = await this.fileManager.safeReadJSON(factPath);
this.indexFact(fact);
} catch (error) {
console.warn(`Failed to load fact from ${file}:`, error);
}
}
}
await this.rebuildIndexes();
} catch (error) {
console.error('Failed to load existing facts:', error);
}
}
async storeFact(fact) {
if (!this.initialized) {
throw new Error('FactStore not initialized');
}
try {
const factWithId = {
id: fact.id || randomUUID(),
...fact,
createdAt: fact.createdAt || new Date().toISOString(),
updatedAt: new Date().toISOString(),
};
await this.saveFact(factWithId);
this.indexFact(factWithId);
// Add to semantic index
this.semanticIndex.addDocument(factWithId.id, factWithId.content, {
type: factWithId.type,
domain: factWithId.domain,
tags: factWithId.tags || [],
qualityScore: factWithId.qualityScore,
});
return factWithId;
} catch (error) {
console.error('Failed to store fact:', error);
throw error;
}
}
async updateFact(factId, updates) {
const existing = this.factsIndex.get(factId);
if (!existing) {
throw new Error(`Fact with ID ${factId} not found`);
}
const updated = {
...existing,
...updates,
id: factId,
updatedAt: new Date().toISOString(),
};
await this.saveFact(updated);
this.indexFact(updated);
return updated;
}
async deleteFact(factId) {
const fact = this.factsIndex.get(factId);
if (!fact) {
throw new Error(`Fact with ID ${factId} not found`);
}
try {
const factPath = join(this.storageDir, 'facts', `${factId}.json`);
await fs.unlink(factPath);
this.removeFromIndexes(fact);
// Remove from semantic index
this.semanticIndex.removeDocument(factId);
return true;
} catch (error) {
console.error(`Failed to delete fact ${factId}:`, error);
throw error;
}
}
async queryFacts(query) {
const {
query: searchQuery,
type,
domain,
tags = [],
minScore = 0,
maxScore = 100,
limit = 50,
offset = 0,
sortBy = 'relevance',
includeRelated = false,
} = query;
let candidates = Array.from(this.factsIndex.values());
if (type) {
candidates = candidates.filter(fact => fact.type === type);
}
if (domain) {
candidates = candidates.filter(fact => fact.domain === domain);
}
if (tags.length > 0) {
candidates = candidates.filter(fact =>
tags.some(tag => fact.tags.includes(tag))
);
}
candidates = candidates.filter(fact =>
fact.qualityScore >= minScore && fact.qualityScore <= maxScore
);
if (searchQuery) {
// Use semantic search if we have enough documents, otherwise fall back to keyword search
if (this.semanticIndex.getStats().totalDocuments > 5) {
candidates = this.semanticSearch(candidates, searchQuery);
} else {
candidates = this.scoreRelevance(candidates, searchQuery);
}
}
candidates = this.sortFacts(candidates, sortBy);
const results = candidates.slice(offset, offset + limit);
if (includeRelated) {
for (const fact of results) {
fact.relatedFacts = await this.getRelatedFacts(fact.id);
}
}
await this.updateAccessStats(results);
return {
facts: results,
total: candidates.length,
query: query,
};
}
semanticSearch(facts, searchQuery) {
// Get semantic similarity scores from the index
const semanticResults = this.semanticIndex.search(searchQuery, {
limit: facts.length,
threshold: 0.05,
includeScores: true,
});
// Create a map of factId to semantic score
const semanticScores = new Map();
semanticResults.forEach(result => {
semanticScores.set(result.factId, result.similarity);
});
// Score facts using both semantic and traditional relevance
const scoredFacts = facts.map(fact => {
const semanticScore = semanticScores.get(fact.id) || 0;
const keywordScore = this.calculateKeywordScore(fact, searchQuery);
// Combine semantic and keyword scores
const relevanceScore = (semanticScore * 70) + (keywordScore * 30) + (fact.qualityScore * 0.1);
return { ...fact, relevanceScore, semanticScore, keywordScore };
}).filter(fact => fact.relevanceScore > 0);
return scoredFacts.sort((a, b) => b.relevanceScore - a.relevanceScore);
}
calculateKeywordScore(fact, searchQuery) {
const queryTerms = searchQuery.toLowerCase().split(/\s+/);
let score = 0;
const content = fact.content.toLowerCase();
for (const term of queryTerms) {
if (content.includes(term)) {
score += 10;
}
if (fact.tags && fact.tags.some(tag => tag.toLowerCase().includes(term))) {
score += 5;
}
if (fact.domain && fact.domain.toLowerCase().includes(term)) {
score += 3;
}
}
return score;
}
scoreRelevance(facts, searchQuery) {
const queryTerms = searchQuery.toLowerCase().split(/\s+/);
return facts.map(fact => {
let relevanceScore = 0;
const content = fact.content.toLowerCase();
for (const term of queryTerms) {
if (content.includes(term)) {
relevanceScore += 10;
}
if (fact.tags && fact.tags.some(tag => tag.toLowerCase().includes(term))) {
relevanceScore += 5;
}
if (fact.domain && fact.domain.toLowerCase().includes(term)) {
relevanceScore += 3;
}
}
relevanceScore += fact.qualityScore * 0.1;
relevanceScore += fact.accessCount * 0.5;
if (fact.lastAccessed) {
const daysSinceAccess = (Date.now() - new Date(fact.lastAccessed)) / (1000 * 60 * 60 * 24);
relevanceScore += Math.max(0, 10 - daysSinceAccess);
}
return { ...fact, relevanceScore };
}).filter(fact => fact.relevanceScore > 0);
}
sortFacts(facts, sortBy) {
switch (sortBy) {
case 'relevance':
return facts.sort((a, b) => (b.relevanceScore || 0) - (a.relevanceScore || 0));
case 'quality':
return facts.sort((a, b) => b.qualityScore - a.qualityScore);
case 'recent':
return facts.sort((a, b) => new Date(b.updatedAt) - new Date(a.updatedAt));
case 'access':
return facts.sort((a, b) => b.accessCount - a.accessCount);
default:
return facts;
}
}
async getRelatedFacts(factId, maxDepth = 2, currentDepth = 0) {
if (currentDepth >= maxDepth) return [];
const fact = this.factsIndex.get(factId);
if (!fact || !fact.relationships) return [];
const related = [];
for (const relationship of fact.relationships) {
const relatedFact = this.factsIndex.get(relationship.targetId);
if (relatedFact) {
related.push({
fact: relatedFact,
relationship: relationship.type,
depth: currentDepth + 1,
});
if (currentDepth < maxDepth - 1) {
const deeper = await this.getRelatedFacts(
relationship.targetId,
maxDepth,
currentDepth + 1
);
related.push(...deeper);
}
}
}
return related;
}
async updateAccessStats(facts) {
const now = new Date().toISOString();
for (const fact of facts) {
fact.accessCount = (fact.accessCount || 0) + 1;
fact.lastAccessed = now;
this.factsIndex.set(fact.id, fact);
setImmediate(() => this.saveFact(fact));
}
}
async saveFact(fact) {
const factPath = join(this.storageDir, 'facts', `${fact.id}.json`);
await this.fileManager.safeWriteJSON(factPath, fact, {
createBackup: true,
validateJSON: true,
retries: 3,
atomicWrite: true
});
}
indexFact(fact) {
this.factsIndex.set(fact.id, fact);
if (!this.typeIndex.has(fact.type)) {
this.typeIndex.set(fact.type, new Set());
}
this.typeIndex.get(fact.type).add(fact.id);
if (!this.domainIndex.has(fact.domain)) {
this.domainIndex.set(fact.domain, new Set());
}
this.domainIndex.get(fact.domain).add(fact.id);
const scoreRange = Math.floor(fact.qualityScore / 10) * 10;
if (!this.scoreIndex.has(scoreRange)) {
this.scoreIndex.set(scoreRange, new Set());
}
this.scoreIndex.get(scoreRange).add(fact.id);
}
removeFromIndexes(fact) {
this.factsIndex.delete(fact.id);
const typeSet = this.typeIndex.get(fact.type);
if (typeSet) {
typeSet.delete(fact.id);
if (typeSet.size === 0) {
this.typeIndex.delete(fact.type);
}
}
const domainSet = this.domainIndex.get(fact.domain);
if (domainSet) {
domainSet.delete(fact.id);
if (domainSet.size === 0) {
this.domainIndex.delete(fact.domain);
}
}
const scoreRange = Math.floor(fact.qualityScore / 10) * 10;
const scoreSet = this.scoreIndex.get(scoreRange);
if (scoreSet) {
scoreSet.delete(fact.id);
if (scoreSet.size === 0) {
this.scoreIndex.delete(scoreRange);
}
}
}
async rebuildIndexes() {
this.typeIndex.clear();
this.domainIndex.clear();
this.scoreIndex.clear();
for (const fact of this.factsIndex.values()) {
this.indexFact(fact);
}
await this.saveIndexes();
}
async saveIndexes() {
const indexData = {
types: Array.from(this.typeIndex.entries()).map(([type, ids]) => [type, Array.from(ids)]),
domains: Array.from(this.domainIndex.entries()).map(([domain, ids]) => [domain, Array.from(ids)]),
scores: Array.from(this.scoreIndex.entries()).map(([score, ids]) => [score, Array.from(ids)]),
lastUpdated: new Date().toISOString(),
};
const indexPath = join(this.storageDir, 'indexes', 'main.json');
await this.fileManager.safeWriteJSON(indexPath, indexData, {
createBackup: true,
validateJSON: true,
retries: 2,
atomicWrite: true
});
}
async getStats() {
return {
totalFacts: this.factsIndex.size,
factsByType: Object.fromEntries(
Array.from(this.typeIndex.entries()).map(([type, ids]) => [type, ids.size])
),
factsByDomain: Object.fromEntries(
Array.from(this.domainIndex.entries()).map(([domain, ids]) => [domain, ids.size])
),
averageQualityScore: this.calculateAverageQuality(),
lastUpdated: new Date().toISOString(),
};
}
calculateAverageQuality() {
if (this.factsIndex.size === 0) return 0;
const totalScore = Array.from(this.factsIndex.values())
.reduce((sum, fact) => sum + fact.qualityScore, 0);
return Math.round(totalScore / this.factsIndex.size);
}
rebuildSemanticIndex() {
this.semanticIndex.clear();
for (const fact of this.factsIndex.values()) {
this.semanticIndex.addDocument(fact.id, fact.content, {
type: fact.type,
domain: fact.domain,
tags: fact.tags || [],
qualityScore: fact.qualityScore,
});
}
console.log(`Semantic index rebuilt with ${this.semanticIndex.getStats().totalDocuments} documents`);
}
// Get semantic search statistics
getSemanticStats() {
return this.semanticIndex.getStats();
}
// Find similar facts using semantic similarity
async findSimilarFacts(factId, options = {}) {
const similarResults = this.semanticIndex.findSimilarDocuments(factId, options);
return similarResults.map(result => {
const fact = this.factsIndex.get(result.factId);
return {
...fact,
similarity: result.similarity,
};
});
}
// Get keywords for a fact using TF-IDF
getFactKeywords(factId, topN = 10) {
return this.semanticIndex.getDocumentKeywords(factId, topN);
}
// Convenience methods for SlashCommands
async getRecentFacts(limit = 10) {
const facts = Array.from(this.factsIndex.values())
.sort((a, b) => new Date(b.updatedAt || b.createdAt) - new Date(a.updatedAt || a.createdAt))
.slice(0, limit);
return facts;
}
async getTopScoringFacts(limit = 5) {
const facts = Array.from(this.factsIndex.values())
.sort((a, b) => b.qualityScore - a.qualityScore)
.slice(0, limit);
return facts;
}
async getAllFacts() {
return Array.from(this.factsIndex.values());
}
async shutdown() {
if (this.initialized) {
await this.saveIndexes();
// Clean up old backup files
const factsDir = join(this.storageDir, 'facts');
await this.fileManager.cleanupBackups(factsDir);
console.log('FactStore shut down successfully');
}
}
async repairStorage() {
const factsDir = join(this.storageDir, 'facts');
return await this.fileManager.repairCorruptedFiles(factsDir);
}
}