/**
* Batch processor for community node documentation generation.
*
* Orchestrates the full workflow:
* 1. Fetch READMEs from npm registry
* 2. Generate AI documentation summaries
* 3. Store results in database
*/
import { NodeRepository } from '../database/node-repository';
import { CommunityNodeFetcher } from './community-node-fetcher';
import {
DocumentationGenerator,
DocumentationInput,
DocumentationResult,
createDocumentationGenerator,
} from './documentation-generator';
import { logger } from '../utils/logger';
/**
* Options for batch processing
*/
export interface BatchProcessorOptions {
/** Skip nodes that already have READMEs (default: false) */
skipExistingReadme?: boolean;
/** Skip nodes that already have AI summaries (default: false) */
skipExistingSummary?: boolean;
/** Only fetch READMEs, skip AI generation (default: false) */
readmeOnly?: boolean;
/** Only generate AI summaries, skip README fetch (default: false) */
summaryOnly?: boolean;
/** Max nodes to process (default: unlimited) */
limit?: number;
/** Concurrency for npm README fetches (default: 5) */
readmeConcurrency?: number;
/** Concurrency for LLM API calls (default: 3) */
llmConcurrency?: number;
/** Progress callback */
progressCallback?: (message: string, current: number, total: number) => void;
}
/**
* Result of batch processing
*/
export interface BatchProcessorResult {
/** Number of READMEs fetched */
readmesFetched: number;
/** Number of READMEs that failed to fetch */
readmesFailed: number;
/** Number of AI summaries generated */
summariesGenerated: number;
/** Number of AI summaries that failed */
summariesFailed: number;
/** Nodes that were skipped (already had data) */
skipped: number;
/** Total duration in seconds */
durationSeconds: number;
/** Errors encountered */
errors: string[];
}
/**
* Batch processor for generating documentation for community nodes
*/
export class DocumentationBatchProcessor {
private repository: NodeRepository;
private fetcher: CommunityNodeFetcher;
private generator: DocumentationGenerator;
constructor(
repository: NodeRepository,
fetcher?: CommunityNodeFetcher,
generator?: DocumentationGenerator
) {
this.repository = repository;
this.fetcher = fetcher || new CommunityNodeFetcher();
this.generator = generator || createDocumentationGenerator();
}
/**
* Process all community nodes to generate documentation
*/
async processAll(options: BatchProcessorOptions = {}): Promise<BatchProcessorResult> {
const startTime = Date.now();
const result: BatchProcessorResult = {
readmesFetched: 0,
readmesFailed: 0,
summariesGenerated: 0,
summariesFailed: 0,
skipped: 0,
durationSeconds: 0,
errors: [],
};
const {
skipExistingReadme = false,
skipExistingSummary = false,
readmeOnly = false,
summaryOnly = false,
limit,
readmeConcurrency = 5,
llmConcurrency = 3,
progressCallback,
} = options;
try {
// Step 1: Fetch READMEs (unless summaryOnly)
if (!summaryOnly) {
const readmeResult = await this.fetchReadmes({
skipExisting: skipExistingReadme,
limit,
concurrency: readmeConcurrency,
progressCallback,
});
result.readmesFetched = readmeResult.fetched;
result.readmesFailed = readmeResult.failed;
result.skipped += readmeResult.skipped;
result.errors.push(...readmeResult.errors);
}
// Step 2: Generate AI summaries (unless readmeOnly)
if (!readmeOnly) {
const summaryResult = await this.generateSummaries({
skipExisting: skipExistingSummary,
limit,
concurrency: llmConcurrency,
progressCallback,
});
result.summariesGenerated = summaryResult.generated;
result.summariesFailed = summaryResult.failed;
result.skipped += summaryResult.skipped;
result.errors.push(...summaryResult.errors);
}
result.durationSeconds = (Date.now() - startTime) / 1000;
return result;
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
result.errors.push(`Batch processing failed: ${errorMessage}`);
result.durationSeconds = (Date.now() - startTime) / 1000;
return result;
}
}
/**
* Fetch READMEs for community nodes
*/
private async fetchReadmes(options: {
skipExisting?: boolean;
limit?: number;
concurrency?: number;
progressCallback?: (message: string, current: number, total: number) => void;
}): Promise<{ fetched: number; failed: number; skipped: number; errors: string[] }> {
const { skipExisting = false, limit, concurrency = 5, progressCallback } = options;
// Get nodes that need READMEs
let nodes = skipExisting
? this.repository.getCommunityNodesWithoutReadme()
: this.repository.getCommunityNodes({ orderBy: 'downloads' });
if (limit) {
nodes = nodes.slice(0, limit);
}
logger.info(`Fetching READMEs for ${nodes.length} community nodes...`);
if (nodes.length === 0) {
return { fetched: 0, failed: 0, skipped: 0, errors: [] };
}
// Get package names
const packageNames = nodes
.map((n) => n.npmPackageName)
.filter((name): name is string => !!name);
// Fetch READMEs in batches
const readmeMap = await this.fetcher.fetchReadmesBatch(
packageNames,
progressCallback,
concurrency
);
// Store READMEs in database
let fetched = 0;
let failed = 0;
const errors: string[] = [];
for (const node of nodes) {
if (!node.npmPackageName) continue;
const readme = readmeMap.get(node.npmPackageName);
if (readme) {
try {
this.repository.updateNodeReadme(node.nodeType, readme);
fetched++;
} catch (error) {
const msg = `Failed to save README for ${node.nodeType}: ${error}`;
errors.push(msg);
failed++;
}
} else {
failed++;
}
}
logger.info(`README fetch complete: ${fetched} fetched, ${failed} failed`);
return { fetched, failed, skipped: 0, errors };
}
/**
* Generate AI documentation summaries
*/
private async generateSummaries(options: {
skipExisting?: boolean;
limit?: number;
concurrency?: number;
progressCallback?: (message: string, current: number, total: number) => void;
}): Promise<{ generated: number; failed: number; skipped: number; errors: string[] }> {
const { skipExisting = false, limit, concurrency = 3, progressCallback } = options;
// Get nodes that need summaries (must have READMEs first)
let nodes = skipExisting
? this.repository.getCommunityNodesWithoutAISummary()
: this.repository.getCommunityNodes({ orderBy: 'downloads' }).filter(
(n) => n.npmReadme && n.npmReadme.length > 0
);
if (limit) {
nodes = nodes.slice(0, limit);
}
logger.info(`Generating AI summaries for ${nodes.length} nodes...`);
if (nodes.length === 0) {
return { generated: 0, failed: 0, skipped: 0, errors: [] };
}
// Test LLM connection first
const connectionTest = await this.generator.testConnection();
if (!connectionTest.success) {
const error = `LLM connection failed: ${connectionTest.message}`;
logger.error(error);
return { generated: 0, failed: nodes.length, skipped: 0, errors: [error] };
}
logger.info(`LLM connection successful: ${connectionTest.message}`);
// Prepare inputs for batch generation
const inputs: DocumentationInput[] = nodes.map((node) => ({
nodeType: node.nodeType,
displayName: node.displayName,
description: node.description,
readme: node.npmReadme || '',
npmPackageName: node.npmPackageName,
}));
// Generate summaries in parallel
const results = await this.generator.generateBatch(inputs, concurrency, progressCallback);
// Store summaries in database
let generated = 0;
let failed = 0;
const errors: string[] = [];
for (const result of results) {
if (result.error) {
errors.push(`${result.nodeType}: ${result.error}`);
failed++;
} else {
try {
this.repository.updateNodeAISummary(result.nodeType, result.summary);
generated++;
} catch (error) {
const msg = `Failed to save summary for ${result.nodeType}: ${error}`;
errors.push(msg);
failed++;
}
}
}
logger.info(`AI summary generation complete: ${generated} generated, ${failed} failed`);
return { generated, failed, skipped: 0, errors };
}
/**
* Get current documentation statistics
*/
getStats(): ReturnType<NodeRepository['getDocumentationStats']> {
return this.repository.getDocumentationStats();
}
}