parse-resume.cjsโข30.9 kB
#!/usr/bin/env node
/**
* Smart ZIP Parser with Resume Capability
* Tracks parsing progress and can resume from where it left off
*/
// Load environment variables
require('dotenv').config();
const { EGWDatabase, EGWApiClientNew, ContentDownloader, createAuthManager } = require('@surgbc/egw-writings-shared');
const EGWApiClient = EGWApiClientNew;
const { readdirSync, existsSync, writeFileSync, readFileSync, mkdirSync } = require('fs');
const path = require('path');
const AdmZip = require('adm-zip');
class ResumeParser {
constructor() {
this.db = new EGWDatabase();
this.authManager = createAuthManager();
this.apiClient = new EGWApiClient({ authManager: this.authManager });
this.downloader = new ContentDownloader(this.apiClient, this.db);
// Progress tracking
this.progressFile = path.join(process.cwd(), 'data', 'parse-progress.json');
this.zipsDir = path.join(process.cwd(), 'data', 'zips');
this.reportFile = path.join(process.cwd(), 'data', 'parse-report.txt');
this.productionReportFile = path.join(process.cwd(), '..', 'local-server', 'data', 'parse-report.txt');
// Ensure data directory exists
const dataDir = path.join(process.cwd(), 'data');
if (!existsSync(dataDir)) {
mkdirSync(dataDir, { recursive: true });
}
}
/**
* Load previous progress or create new
*/
loadProgress() {
try {
if (existsSync(this.progressFile)) {
const progress = JSON.parse(readFileSync(this.progressFile, 'utf8'));
console.log(`๐ Loaded previous progress:`);
console.log(` Total ZIPs found: ${progress.totalZips}`);
console.log(` Completed: ${progress.completed}`);
console.log(` Failed: ${progress.failed.length}`);
console.log(` Last processed: ${progress.lastProcessed || 'None'}`);
return progress;
}
} catch (error) {
console.warn('โ ๏ธ Could not load progress file, starting fresh');
}
return {
totalZips: 0,
completed: 0,
failed: [],
lastProcessed: null,
startTime: null,
completedZips: []
};
}
/**
* Save current progress
*/
saveProgress(progress) {
try {
progress.lastUpdated = new Date().toISOString();
writeFileSync(this.progressFile, JSON.stringify(progress, null, 2));
} catch (error) {
console.error('โ Failed to save progress:', error);
}
}
/**
* Generate detailed parsing report and copy to production
*/
async generateDetailedReport(progress) {
try {
const stats = await this.db.getStats();
const timestamp = new Date().toISOString();
let report = `EGW WRITINGS - DETAILED PARSING REPORT\n`;
report += `Generated: ${timestamp}\n`;
report += `='.repeat(80)}\n\n`;
// Summary Section
report += `๐ OVERALL STATISTICS\n`;
report += `${'-'.repeat(40)}\n`;
report += `Total Books in Database: ${stats.books}\n`;
report += `Completed Parsed Books: ${stats.downloadedBooks}\n`;
report += `Completion Percentage: ${((stats.downloadedBooks / stats.books) * 100).toFixed(1)}%\n`;
report += `Total Paragraphs: ${stats.paragraphs}\n`;
report += `Languages Supported: ${stats.languages}\n\n`;
// Current Session Progress
report += `๐ CURRENT SESSION PROGRESS\n`;
report += `${'-'.repeat(40)}\n`;
report += `Session Started: ${progress.startTime || 'N/A'}\n`;
report += `Last Updated: ${progress.lastUpdated}\n`;
report += `ZIPs Processed This Session: ${progress.completed}\n`;
report += `Failed This Session: ${progress.failed.length}\n`;
report += `Last Processed: ${progress.lastProcessed || 'None'}\n\n`;
// Recent Successfully Completed Books
report += `โ
RECENTLY COMPLETED BOOKS (Last 10)\n`;
report += `${'-'.repeat(40)}\n`;
try {
const recentBooks = await this.db.db.all(`
SELECT book_id, code, title, author, downloaded_at
FROM books
WHERE downloaded_at IS NOT NULL
ORDER BY downloaded_at DESC
LIMIT 10
`);
recentBooks.forEach((book, idx) => {
report += `${idx + 1}. ${book.code} - ${book.title}\n`;
report += ` ๐ค ${book.author}\n`;
report += ` โ
${book.downloaded_at}\n\n`;
});
} catch (error) {
report += `Error retrieving recent books: ${error.message}\n\n`;
}
// Failed Books
if (progress.failed.length > 0) {
report += `โ FAILED BOOKS THIS SESSION\n`;
report += `${'-'.repeat(40)}\n`;
progress.failed.slice(-10).forEach((failure, idx) => {
report += `${idx + 1}. ${failure.zip}\n`;
report += ` โ ๏ธ ${failure.error}\n`;
report += ` ๐ ${failure.timestamp}\n\n`;
});
}
// Database File Info
const dbPath = this.db.db.filename;
const fs = require('fs');
let dbSize = 'Unknown';
try {
const stats = fs.statSync(dbPath);
dbSize = (stats.size / 1024 / 1024).toFixed(2) + ' MB';
} catch (error) {
dbSize = 'Error getting size: ' + error.message;
}
report += `๐พ DATABASE INFORMATION\n`;
report += `${'-'.repeat(40)}\n`;
report += `Database File: ${dbPath}\n`;
report += `Database Size: ${dbSize}\n`;
report += `Report File: ${this.reportFile}\n`;
report += `Production Report: ${this.productionReportFile}\n\n`;
// Performance Metrics
if (progress.startTime) {
const elapsed = Date.now() - new Date(progress.startTime).getTime();
const elapsedMinutes = elapsed / 1000 / 60;
const rate = progress.completed / elapsedMinutes;
report += `โก PERFORMANCE METRICS\n`;
report += `${'-'.repeat(40)}\n`;
report += `Elapsed Time: ${elapsedMinutes.toFixed(1)} minutes\n`;
report += `Processing Rate: ${rate.toFixed(1)} books/minute\n`;
if (rate > 0) {
const remaining = stats.books - stats.downloadedBooks;
const eta = remaining / rate;
report += `ETA for completion: ${eta.toFixed(0)} minutes\n`;
}
report += `\n`;
}
// Footer
report += `${'='.repeat(80)}\n`;
report += `End of Report - ${timestamp}\n`;
// Write report locally
writeFileSync(this.reportFile, report, 'utf8');
// Copy to production location
try {
const prodDir = path.dirname(this.productionReportFile);
if (!existsSync(prodDir)) {
mkdirSync(prodDir, { recursive: true });
}
writeFileSync(this.productionReportFile, report, 'utf8');
console.log(`๐ Detailed report saved to: ${this.productionReportFile}`);
} catch (copyError) {
console.warn(`โ ๏ธ Could not copy report to production: ${copyError.message}`);
}
console.log(`๐ Report generated: ${stats.downloadedBooks}/${stats.books} books completed (${((stats.downloadedBooks / stats.books) * 100).toFixed(1)}%)`);
} catch (error) {
console.error('โ Failed to generate detailed report:', error.message);
}
}
/**
* Recursively find all ZIP files
*/
findAllZipFiles(dir) {
const files = [];
const scan = (currentDir) => {
const items = readdirSync(currentDir, { withFileTypes: true });
for (const item of items) {
const fullPath = path.join(currentDir, item.name);
if (item.isDirectory()) {
scan(fullPath);
} else if (item.isFile() && item.name.endsWith('.zip')) {
files.push(fullPath);
}
}
};
scan(dir);
return files;
}
/**
* Check if a ZIP has already been parsed successfully
*/
isZipAlreadyParsed(zipPath, progress) {
const zipBasename = path.basename(zipPath);
return progress.completedZips.includes(zipBasename);
}
/**
* Extract book ID from ZIP filename
*/
extractBookId(zipPath) {
const filename = path.basename(zipPath, '.zip');
const match = filename.match(/_(\d+)$/);
return match ? parseInt(match[1]) : null;
}
/**
* Check if book already has content in database
*/
async hasBookContent(bookId) {
try {
const paragraphs = await this.db.getParagraphs(bookId, 1);
return paragraphs && paragraphs.length > 0;
} catch (error) {
console.warn(`โ ๏ธ Error checking book ${bookId} content:`, error.message);
return false;
}
}
/**
* Verify data integrity - detect partial vs complete data
*/
async verifyBookIntegrity(bookId, bookInfo, extractDir) {
try {
console.log(`๐ Verifying integrity for book ${bookId}...`);
// 1. Check database paragraph count
const dbParagraphs = await this.db.getParagraphs(bookId, 1);
const dbCount = dbParagraphs ? dbParagraphs.length : 0;
console.log(` ๐ Database: ${dbCount} paragraphs`);
// 2. Check library files
const libraryFiles = this.countLibraryFiles(extractDir);
console.log(` ๐ Library: ${libraryFiles.files} JSON files, ${libraryFiles.paragraphs} paragraphs`);
// 3. Check book metadata completeness
const book = await this.db.db.get(
'SELECT * FROM books WHERE book_id = ?',
[bookId]
);
const isDownloaded = book && book.downloaded_at;
console.log(` ๐ท๏ธ Download flag: ${isDownloaded ? 'SET' : 'NOT SET'}`);
// 4. Integrity checks
const issues = [];
if (dbCount === 0) {
issues.push('No paragraphs in database');
}
if (libraryFiles.files === 0) {
issues.push('No JSON files in library');
}
if (dbCount !== libraryFiles.paragraphs) {
issues.push(`Paragraph count mismatch: DB=${dbCount}, Library=${libraryFiles.paragraphs}`);
}
if (!isDownloaded) {
issues.push('Download flag not set');
}
// 5. Check for common partial data patterns
if (dbCount > 0 && libraryFiles.paragraphs === 0) {
issues.push('Database has content but library is empty - partial extraction');
}
if (dbCount === 0 && libraryFiles.paragraphs > 0) {
issues.push('Library has content but database is empty - partial parsing');
}
if (issues.length > 0) {
console.log(` โ Integrity issues found:`);
issues.forEach(issue => console.log(` - ${issue}`));
return { valid: false, issues, dbCount, libraryCount: libraryFiles.paragraphs };
}
console.log(` โ
Book data integrity verified`);
return { valid: true, dbCount, libraryCount: libraryFiles.paragraphs };
} catch (error) {
console.warn(`โ ๏ธ Error verifying book ${bookId} integrity:`, error.message);
return { valid: false, issues: ['Verification error'], error: error.message };
}
}
/**
* Count JSON files and paragraphs in library directory
*/
countLibraryFiles(extractDir) {
if (!existsSync(extractDir)) {
return { files: 0, paragraphs: 0 };
}
try {
let fileCount = 0;
let paragraphCount = 0;
const files = readdirSync(extractDir, { recursive: true });
for (const file of files) {
if (file.endsWith('.json') && !file.includes('info.json')) {
fileCount++;
try {
const filePath = path.join(extractDir, file);
const content = JSON.parse(readFileSync(filePath, 'utf8'));
if (Array.isArray(content)) {
paragraphCount += content.length;
}
} catch (parseError) {
// Skip unparseable files
console.warn(`โ ๏ธ Could not parse ${file}: ${parseError.message}`);
}
}
}
return { files: fileCount, paragraphs: paragraphCount };
} catch (error) {
console.warn(`โ ๏ธ Error counting library files:`, error.message);
return { files: 0, paragraphs: 0 };
}
}
/**
* Parse a single ZIP file with enhanced error handling
*/
async parseZip(zipPath, progress) {
const zipBasename = path.basename(zipPath);
const relativePath = path.relative(this.zipsDir, zipPath);
const bookId = this.extractBookId(zipPath);
console.log(`\n๐ฆ [${progress.completed + 1}/${progress.totalZips}] Processing: ${relativePath}`);
if (!bookId) {
const error = `Cannot extract book ID from filename: ${zipBasename}`;
console.error(`โ ${error}`);
progress.failed.push({ zip: zipBasename, error, timestamp: new Date().toISOString() });
return false;
}
try {
// Check if book already has content AND library directory exists (integrity check)
const hasContent = await this.hasBookContent(bookId);
const bookInfo = await this.apiClient.getBook(bookId, { trans: 'all' });
const { category, subcategory } = this.categorizeBook(bookInfo);
const extractDir = path.join(
process.cwd(), 'data', 'library', bookInfo.lang, category, subcategory, `${bookInfo.code}_${bookId}`
);
const hasLibraryDir = existsSync(extractDir);
// If both exist, verify integrity to detect partial data
if (hasContent && hasLibraryDir) {
const integrity = await this.verifyBookIntegrity(bookId, bookInfo, extractDir);
if (integrity.valid) {
console.log(`โ
Skipping ${zipBasename} - integrity verified (${integrity.dbCount} paragraphs)`);
progress.completed++;
progress.completedZips.push(zipBasename);
return true;
} else {
console.log(`๐ง ${zipBasename} - integrity issues detected, overwriting...`);
console.log(` Issues: ${integrity.issues.join(', ')}`);
// Fast overwrite - just replace directory, no slow cleanup
const { rmSync } = require('fs');
if (existsSync(extractDir)) {
console.log(`๐๏ธ Fast overwrite: removing library directory`);
rmSync(extractDir, { recursive: true, force: true });
}
// Database will be overwritten during parsing (much faster!)
}
}
if (hasContent && !hasLibraryDir) {
console.log(`๐ ${zipBasename} - has database content but missing library, re-extracting...`);
}
// Get book details
console.log(`๐ Book: ${bookInfo.title} by ${bookInfo.author}`);
if (!existsSync(extractDir)) {
mkdirSync(extractDir, { recursive: true });
}
console.log(`๐ Extracting to: ${path.relative(process.cwd(), extractDir)}`);
// Extract ZIP
const zip = new AdmZip(zipPath);
zip.extractAllTo(extractDir, true);
console.log(`โ
Extraction complete`);
// Parse extracted content
console.log(`๐ Starting content parsing...`);
const parseResult = await this.parseExtractedContent(extractDir, bookId, bookInfo.title);
console.log(`๐ Content parsing completed.`);
if (parseResult.success) {
console.log(`๐ Parsed ${parseResult.paragraphs} paragraphs from ${parseResult.files} files`);
// Mark book as downloaded ONLY after successful parsing
await this.db.markBookAsDownloaded(bookId);
progress.completed++;
progress.completedZips.push(zipBasename);
progress.lastProcessed = zipBasename;
return true;
} else {
console.error(`โ Failed to parse content: ${parseResult.error}`);
// CRITICAL: Clean up partial data on failure
console.log(`๐งน Cleaning up partial data for ${zipBasename}...`);
await this.cleanupFailedBook(bookId, extractDir);
progress.failed.push({
zip: zipBasename,
error: parseResult.error,
timestamp: new Date().toISOString()
});
return false;
}
} catch (error) {
const errorMessage = error.message || String(error);
console.error(`โ Failed to process ${zipBasename}: ${errorMessage}`);
progress.failed.push({
zip: zipBasename,
error: errorMessage,
timestamp: new Date().toISOString()
});
return false;
}
}
/**
* Parse extracted JSON content
*/
async parseExtractedContent(extractDir, bookId, bookTitle) {
try {
const files = readdirSync(extractDir, { recursive: true });
const jsonFiles = files.filter(file =>
file.endsWith('.json') && !file.includes('info.json')
);
if (jsonFiles.length === 0) {
return { success: false, error: 'No JSON content files found' };
}
let totalParagraphs = 0;
let processedFiles = 0;
console.log(`๐ Processing ${jsonFiles.length} JSON files...`);
for (let fileIndex = 0; fileIndex < jsonFiles.length; fileIndex++) {
const file = jsonFiles[fileIndex];
try {
const filePath = path.join(extractDir, file);
const content = JSON.parse(readFileSync(filePath, 'utf8'));
if (Array.isArray(content)) {
console.log(`๐ Processing ${file} (${content.length} paragraphs)...`);
for (let paraIndex = 0; paraIndex < content.length; paraIndex++) {
const paragraphData = content[paraIndex];
// Validate paragraph data before inserting
if (!paragraphData || !paragraphData.content) {
console.warn(`โ ๏ธ Skipping paragraph with missing content in ${file}[${paraIndex}]`);
continue;
}
const dbParagraph = {
para_id: paragraphData.para_id,
id_prev: paragraphData.id_prev || '',
id_next: paragraphData.id_next || '',
refcode_1: paragraphData.refcode_1 || '',
refcode_2: paragraphData.refcode_2 || '',
refcode_3: paragraphData.refcode_3 || '',
refcode_4: paragraphData.refcode_4 || '',
refcode_short: paragraphData.refcode_short || '',
refcode_long: paragraphData.refcode_long || '',
element_type: paragraphData.element_type,
element_subtype: paragraphData.element_subtype || '',
content: paragraphData.content,
puborder: paragraphData.puborder,
translations: paragraphData.translations || []
};
// Replace paragraph (overwrites existing - much faster than delete+insert)
try {
await this.db.insertParagraph(dbParagraph, bookId, path.basename(file, '.json'));
totalParagraphs++;
} catch (insertError) {
console.warn(`โ ๏ธ Insert error for ${file}[${paraIndex}]: ${insertError.message}`);
}
}
processedFiles++;
} else {
console.warn(`โ ๏ธ ${file} does not contain paragraph array`);
}
} catch (fileError) {
console.warn(`โ ๏ธ Error processing ${file}: ${fileError.message}`);
}
// Progress within file
if ((fileIndex + 1) % 5 === 0) {
console.log(`๐ File progress: ${fileIndex + 1}/${jsonFiles.length}, ${totalParagraphs} paragraphs so far`);
}
}
return {
success: true,
paragraphs: totalParagraphs,
files: processedFiles
};
} catch (error) {
return {
success: false,
error: error.message || 'Unknown error parsing content'
};
}
}
/**
* Categorize book (same logic as downloader)
*/
categorizeBook(book) {
const author = book.author?.toLowerCase() || '';
const title = book.title?.toLowerCase() || '';
const type = book.type?.toLowerCase() || '';
const code = book.code?.toLowerCase() || '';
// Ellen G. White writings
if (author.includes('white') || author.includes('elena')) {
if (title.includes('maranatha') || title.includes('heavenly') ||
title.includes('sons') || title.includes('daughters') ||
title.includes('morning watch') || title.includes('devotional')) {
return { category: 'egw', subcategory: 'devotional' };
}
if (title.includes('manuscript release') || code.includes('mr')) {
return { category: 'egw', subcategory: 'manuscripts' };
}
if (title.includes('letter') || code.includes('lt')) {
return { category: 'egw', subcategory: 'letters' };
}
if (title.includes('testimon') || code.includes('tt') || code.includes('1t')) {
return { category: 'egw', subcategory: 'testimonies' };
}
if (title.includes('great controversy') || title.includes('desire') ||
title.includes('patriarchs') || title.includes('acts') ||
title.includes('prophets and kings') || title.includes('education') ||
title.includes('ministry of healing') || title.includes('steps to christ')) {
return { category: 'egw', subcategory: 'books' };
}
if (type === 'pamphlet' || book.npages < 100) {
return { category: 'egw', subcategory: 'pamphlets' };
}
return { category: 'egw', subcategory: 'books' };
}
// Pioneer authors
const pioneers = [
'uriah smith', 'a. t. jones', 'j. n. andrews', 'john andrews',
'm. l. andreasen', 'j. n. loughborough', 'alonzo jones',
'ellet waggoner', 'stephen haskell', 'william miller',
'joshua himes', 'hiram edson', 'joseph bates'
];
if (pioneers.some(pioneer => author.includes(pioneer))) {
if (type === 'periodical' || title.includes('review') || title.includes('herald')) {
return { category: 'periodical', subcategory: 'pioneer' };
}
return { category: 'pioneer', subcategory: 'books' };
}
// Periodicals
if (type === 'periodical' ||
title.includes('review') || title.includes('herald') ||
title.includes('signs') || title.includes('times') ||
title.includes('youth') || title.includes('instructor') ||
title.includes('advent') && title.includes('herald')) {
return { category: 'periodical', subcategory: 'historical' };
}
// Reference materials
if (type === 'bible' || type === 'dictionary' || type === 'scriptindex' ||
type === 'topicalindex' || title.includes('concordance')) {
return { category: 'reference', subcategory: 'biblical' };
}
// Historical works
if (title.includes('history') || title.includes('origin') ||
title.includes('movement') || title.includes('denomination') ||
author.includes('spalding') || author.includes('knight')) {
return { category: 'historical', subcategory: 'denominational' };
}
// Modern devotional works
if (type === 'devotional' || title.includes('devotional') ||
title.includes('daily') || title.includes('meditation')) {
return { category: 'devotional', subcategory: 'modern' };
}
// Default classification
if (type === 'book') {
return { category: 'historical', subcategory: 'general' };
}
return { category: 'reference', subcategory: 'general' };
}
/**
* Main resume parsing function
*/
async resumeParsing() {
console.log('๐ Starting Smart Resume Parser\n');
// Load previous progress
const progress = this.loadProgress();
// Find all ZIP files
console.log('๐ Scanning for ZIP files...');
const allZipFiles = this.findAllZipFiles(this.zipsDir);
if (allZipFiles.length === 0) {
console.error(`โ No ZIP files found in: ${this.zipsDir}`);
process.exit(1);
}
// Initialize progress if new
if (!progress.startTime) {
progress.totalZips = allZipFiles.length;
progress.startTime = new Date().toISOString();
console.log(`๐ Found ${progress.totalZips} ZIP files to process`);
}
// Filter out already completed ZIPs
const remainingZips = allZipFiles.filter(zipPath =>
!this.isZipAlreadyParsed(zipPath, progress)
);
console.log(`๐ Progress Summary:`);
console.log(` Total ZIPs: ${progress.totalZips}`);
console.log(` Already completed: ${progress.completed}`);
console.log(` Remaining: ${remainingZips.length}`);
console.log(` Failed so far: ${progress.failed.length}`);
if (remainingZips.length === 0) {
console.log('\n๐ All ZIP files have been processed!');
await this.showFinalStats();
return;
}
console.log(`\n๐ Processing ${remainingZips.length} remaining ZIP files...\n`);
// Process remaining ZIPs
for (let i = 0; i < remainingZips.length; i++) {
const zipPath = remainingZips[i];
// Save progress before each ZIP (in case of crash)
progress.currentZipIndex = progress.completed + i;
this.saveProgress(progress);
const success = await this.parseZip(zipPath, progress);
if (success) {
// Save progress after successful completion
this.saveProgress(progress);
// Generate detailed report every 10 completed books
if (progress.completed % 10 === 0) {
await this.generateDetailedReport(progress);
}
}
// Show progress every 10 files
if ((progress.completed + i + 1) % 10 === 0) {
const elapsed = Date.now() - new Date(progress.startTime).getTime();
const rate = (progress.completed + i + 1) / (elapsed / 1000 / 60); // per minute
const remaining = progress.totalZips - (progress.completed + i + 1);
const eta = remaining / rate; // minutes
console.log(`\n๐ Progress: ${progress.completed + i + 1}/${progress.totalZips} (${((progress.completed + i + 1) / progress.totalZips * 100).toFixed(1)}%)`);
console.log(` Rate: ${rate.toFixed(1)} files/min, ETA: ${eta.toFixed(0)} minutes`);
}
}
// Final save and stats
this.saveProgress(progress);
console.log('\n๐ Parsing session completed!');
await this.showFinalStats();
// Generate final detailed report
await this.generateDetailedReport(progress);
if (progress.failed.length > 0) {
console.log('\nโ ๏ธ Failed ZIPs:');
progress.failed.forEach((failure, index) => {
console.log(` ${index + 1}. ${failure.zip}: ${failure.error}`);
});
console.log('\n๐ก Run this script again to retry failed ZIPs or investigate errors manually.');
}
this.db.close();
}
/**
* Show final statistics
*/
async showFinalStats() {
try {
const stats = await this.db.getStats();
console.log('\n๐ Final Database Statistics:');
console.log(` Languages: ${stats.languages}`);
console.log(` Books: ${stats.books}`);
console.log(` Downloaded Books: ${stats.downloadedBooks}`);
console.log(` Paragraphs: ${stats.paragraphs}`);
} catch (error) {
console.error('โ Error getting final stats:', error.message);
}
}
/**
* Clean up partial data from failed parsing
*/
async cleanupFailedBook(bookId, extractDir) {
try {
// Remove partial library directory
const { rmSync } = require('fs');
if (existsSync(extractDir)) {
console.log(`๐๏ธ Removing partial library directory: ${path.relative(process.cwd(), extractDir)}`);
rmSync(extractDir, { recursive: true, force: true });
}
// Remove partial database entries
console.log(`๐๏ธ Removing partial database entries for book ${bookId}...`);
// Use a direct database query to delete partial paragraphs
const deleteQuery = 'DELETE FROM paragraphs WHERE book_id = ?';
await new Promise((resolve, reject) => {
this.db.db.run(deleteQuery, [bookId], function(err) {
if (err) reject(err);
else resolve();
});
});
// Clear the downloaded flag if set
const clearFlagQuery = 'UPDATE books SET downloaded_at = NULL WHERE book_id = ?';
await new Promise((resolve, reject) => {
this.db.db.run(clearFlagQuery, [bookId], function(err) {
if (err) reject(err);
else resolve();
});
});
console.log(`โ
Cleanup complete for book ${bookId}`);
} catch (error) {
console.warn(`โ ๏ธ Error during cleanup: ${error.message}`);
}
}
/**
* Reset progress (start fresh)
*/
resetProgress() {
try {
if (existsSync(this.progressFile)) {
require('fs').unlinkSync(this.progressFile);
console.log('๐๏ธ Progress file reset');
}
} catch (error) {
console.error('โ Error resetting progress:', error.message);
}
}
}
// Export the class for external use
module.exports = { ResumeParser };
// CLI interface
async function main() {
const args = process.argv.slice(2);
const parser = new ResumeParser();
if (args.includes('--reset')) {
parser.resetProgress();
console.log('๐ Progress reset. Run script again to start fresh.');
return;
}
if (args.includes('--help') || args.includes('-h')) {
console.log(`
Smart Resume ZIP Parser
Usage: node parse-resume.cjs [options]
Options:
--reset Reset progress tracking and start fresh
--help, -h Show this help message
Features:
โข Tracks parsing progress in data/parse-progress.json
โข Skips already processed ZIP files
โข Resumes from where it left off
โข Handles crashes and interruptions gracefully
โข Shows detailed progress and ETA
โข Maintains failed ZIPs list for retry
Examples:
node parse-resume.cjs # Resume parsing
node parse-resume.cjs --reset # Reset and start fresh
`);
return;
}
await parser.resumeParsing();
}
// Handle uncaught errors
process.on('uncaughtException', (error) => {
console.error('\n๐ฅ Uncaught Exception:', error.message);
console.error('๐ Progress has been saved. Run script again to resume.');
process.exit(1);
});
process.on('unhandledRejection', (reason, promise) => {
console.error('\n๐ฅ Unhandled Promise Rejection:', reason);
console.error('๐ Progress has been saved. Run script again to resume.');
process.exit(1);
});
// Run main function
main().catch(error => {
console.error('\nโ Fatal error:', error.message);
console.error('๐ Progress has been saved. Run script again to resume.');
process.exit(1);
});