/**
* Learning Source Registry
* CRUD operations for managing learning sources (URLs, PDFs, etc.)
*/
import fs from 'fs/promises';
import path from 'path';
import { v4 as uuidv4 } from 'uuid';
import { createLearnLogger } from './utils/custom-logger.js';
export class LearningSourceRegistry {
constructor(dataDir) {
this.dataDir = dataDir;
this.learnContentDir = path.join(dataDir, 'learn-content');
this.logger = createLearnLogger('LearningSourceRegistry');
}
/**
* Get project-specific registry path
*/
getRegistryPath(projectId) {
return path.join(this.learnContentDir, projectId, 'sources.json');
}
/**
* Ensure directory exists
*/
async ensureDir(dirPath) {
try {
await fs.access(dirPath);
} catch (error) {
if (error.code === 'ENOENT') {
await fs.mkdir(dirPath, { recursive: true });
}
}
}
/**
* Load registry for project
*/
async loadRegistry(projectId) {
try {
const registryPath = this.getRegistryPath(projectId);
const data = await fs.readFile(registryPath, 'utf8');
return JSON.parse(data);
} catch (error) {
if (error.code === 'ENOENT') {
return {
projectId,
sources: {},
metadata: {
created: new Date().toISOString(),
updated: new Date().toISOString(),
totalSources: 0,
},
};
}
throw error;
}
}
/**
* Save registry for project
*/
async saveRegistry(projectId, registry) {
const registryPath = this.getRegistryPath(projectId);
await this.ensureDir(path.dirname(registryPath));
registry.metadata.updated = new Date().toISOString();
registry.metadata.totalSources = Object.keys(registry.sources).length;
await fs.writeFile(registryPath, JSON.stringify(registry, null, 2));
}
/**
* Add learning sources to project
*/
async addSources(projectId, urls) {
const registry = await this.loadRegistry(projectId);
const addedSources = [];
for (const url of urls) {
const sourceId = uuidv4();
const sourceType = this.detectSourceType(url);
const source = {
id: sourceId,
url,
type: sourceType,
status: 'pending',
metadata: {
title: null,
description: null,
duration: null,
fileSize: null,
language: null,
},
timestamps: {
added: new Date().toISOString(),
lastProcessed: null,
completed: null,
},
processingAttempts: 0,
lastError: null,
};
registry.sources[sourceId] = source;
addedSources.push(source);
}
await this.saveRegistry(projectId, registry);
this.logger.info('Sources added to registry', {
projectId,
count: addedSources.length,
sourceIds: addedSources.map(s => s.id),
});
return addedSources;
}
/**
* Update source status
*/
async updateSourceStatus(projectId, sourceId, status, metadata = {}) {
const registry = await this.loadRegistry(projectId);
if (!registry.sources[sourceId]) {
throw new Error(`Source ${sourceId} not found in project ${projectId}`);
}
registry.sources[sourceId].status = status;
registry.sources[sourceId].metadata = {
...registry.sources[sourceId].metadata,
...metadata,
};
if (status === 'processing') {
registry.sources[sourceId].timestamps.lastProcessed = new Date().toISOString();
registry.sources[sourceId].processingAttempts += 1;
} else if (status === 'completed') {
registry.sources[sourceId].timestamps.completed = new Date().toISOString();
} else if (status === 'failed') {
registry.sources[sourceId].lastError = metadata.error || 'Unknown error';
}
await this.saveRegistry(projectId, registry);
}
/**
* Get sources by status
*/
async getSourcesByStatus(projectId, status = null) {
const registry = await this.loadRegistry(projectId);
const sources = Object.values(registry.sources);
if (status) {
return sources.filter(source => source.status === status);
}
return sources;
}
/**
* Delete sources
*/
async deleteSources(projectId, sourceIds) {
const registry = await this.loadRegistry(projectId);
const deletedSources = [];
for (const sourceId of sourceIds) {
if (registry.sources[sourceId]) {
deletedSources.push(registry.sources[sourceId]);
delete registry.sources[sourceId];
}
}
await this.saveRegistry(projectId, registry);
this.logger.info('Sources deleted from registry', {
projectId,
count: deletedSources.length,
sourceIds,
});
return deletedSources;
}
/**
* Detect source type from URL
*/
detectSourceType(url) {
const urlLower = url.toLowerCase();
if (urlLower.includes('youtube.com') || urlLower.includes('youtu.be')) {
return 'youtube';
} else if (urlLower.endsWith('.pdf')) {
return 'pdf';
} else if (urlLower.startsWith('http')) {
return 'article';
} else {
return 'unknown';
}
}
/**
* Get processing status for project
*/
async getProcessingStatus(projectId) {
const registry = await this.loadRegistry(projectId);
const sources = Object.values(registry.sources);
const statusCounts = {
pending: 0,
processing: 0,
completed: 0,
failed: 0,
};
sources.forEach(source => {
statusCounts[source.status] = (statusCounts[source.status] || 0) + 1;
});
return {
projectId,
totalSources: sources.length,
statusCounts,
lastUpdated: registry.metadata.updated,
isProcessing: statusCounts.processing > 0,
};
}
}