check-completeness.jsā¢6.76 kB
#!/usr/bin/env node
/**
* Download Completeness Checker
*
* Analyzes the current state of downloads to determine completeness
*/
import { EGWDatabase } from '@surgbc/egw-writings-shared';
import { readFileSync, existsSync } from 'fs';
import path from 'path';
async function checkDownloadCompleteness() {
console.log('š Analyzing Download Completeness...\n');
const db = new EGWDatabase();
try {
// Get overall statistics
const stats = db.getStats();
console.log('š OVERALL DATABASE STATISTICS:');
console.log('================================');
console.log(` Languages: ${stats.languages}`);
console.log(` Books: ${stats.books}`);
console.log(` Downloaded Books: ${stats.downloadedBooks}`);
console.log(` Paragraphs: ${stats.paragraphs}`);
// Get all books for analysis
const allBooks = db.getBooks('en');
console.log('\nš BOOK COMPLETENESS ANALYSIS:');
console.log('==============================');
// Analyze book completion status
let booksWithContent = 0;
let booksWithoutContent = 0;
let booksWithParagraphs = 0;
let booksWithoutParagraphs = 0;
let totalParagraphs = 0;
const incompleteBooks = [];
for (const book of allBooks) {
const hasDownloadedTimestamp = book.downloaded_at !== null;
const paragraphCount = db.getParagraphs(book.book_id, 1).length;
const hasParagraphs = paragraphCount > 0;
if (hasDownloadedTimestamp) {
booksWithContent++;
} else {
booksWithoutContent++;
}
if (hasParagraphs) {
booksWithParagraphs++;
totalParagraphs += paragraphCount;
} else {
booksWithoutParagraphs++;
}
// Book is incomplete if either:
// - No downloaded timestamp (never attempted)
// - Has timestamp but no paragraphs (failed download)
if (!hasDownloadedTimestamp || !hasParagraphs) {
incompleteBooks.push({
id: book.book_id,
title: book.title,
author: book.author,
hasDownloadedTimestamp,
hasParagraphs,
paragraphCount
});
}
}
console.log(` ā
Books with download timestamp: ${booksWithContent}`);
console.log(` ā Books without download timestamp: ${booksWithoutContent}`);
console.log(` š Books with paragraphs: ${booksWithParagraphs}`);
console.log(` š Books without paragraphs: ${booksWithoutParagraphs}`);
console.log(` š Average paragraphs per book: ${(totalParagraphs / Math.max(booksWithParagraphs, 1)).toFixed(1)}`);
console.log('\nš INCOMPLETE BOOKS ANALYSIS:');
console.log('============================');
console.log(` Total incomplete books: ${incompleteBooks.length}`);
if (incompleteBooks.length > 0) {
console.log('\nš INCOMPLETE BOOKS DETAILS:');
console.log('============================');
// Group by type
const neverAttempted = incompleteBooks.filter(b => !b.hasDownloadedTimestamp);
const failedDownloads = incompleteBooks.filter(b => b.hasDownloadedTimestamp && !b.hasParagraphs);
console.log(` š Never attempted download: ${neverAttempted.length}`);
console.log(` ā Failed downloads (no paragraphs): ${failedDownloads.length}`);
// Show sample of incomplete books
console.log('\nš SAMPLE OF INCOMPLETE BOOKS:');
incompleteBooks.slice(0, 10).forEach(book => {
const status = !book.hasDownloadedTimestamp ? 'NEVER_ATTEMPTED' : 'FAILED_DOWNLOAD';
console.log(` - ${book.title} (ID: ${book.id}) - ${status}`);
});
if (incompleteBooks.length > 10) {
console.log(` ... and ${incompleteBooks.length - 10} more`);
}
}
// Check skipped books file
const skippedBooksFile = path.join(process.cwd(), 'data', 'skipped-books.json');
let skippedBooks = [];
if (existsSync(skippedBooksFile)) {
try {
const skippedData = readFileSync(skippedBooksFile, 'utf-8');
skippedBooks = JSON.parse(skippedData);
// Remove duplicates by book ID
const uniqueSkippedBooks = [];
const seenIds = new Set();
for (const book of skippedBooks) {
if (!seenIds.has(book.id)) {
seenIds.add(book.id);
uniqueSkippedBooks.push(book);
}
}
console.log('\nš SKIPPED BOOKS SUMMARY:');
console.log('=========================');
console.log(` Total skipped books (unique): ${uniqueSkippedBooks.length}`);
if (uniqueSkippedBooks.length > 0) {
console.log('\nš SKIPPED BOOKS DETAILS:');
uniqueSkippedBooks.forEach(book => {
console.log(` - ${book.title} (ID: ${book.id})`);
console.log(` Error: ${book.error}`);
});
}
} catch (error) {
console.log(' ā ļø Could not read skipped books file');
}
}
// Calculate completion percentage
const totalBooks = allBooks.length;
const completedBooks = booksWithContent && booksWithParagraphs ? booksWithContent : booksWithParagraphs;
const completionPercentage = ((completedBooks / totalBooks) * 100).toFixed(1);
console.log('\nšÆ DOWNLOAD COMPLETION SUMMARY:');
console.log('===============================');
console.log(` š Overall Completion: ${completionPercentage}%`);
console.log(` ā
Complete Books: ${completedBooks}/${totalBooks}`);
console.log(` š Incomplete Books: ${incompleteBooks.length}/${totalBooks}`);
console.log(` š Total Paragraphs: ${totalParagraphs.toLocaleString()}`);
if (parseFloat(completionPercentage) >= 95) {
console.log('\nš EXCELLENT! Downloads are nearly complete!');
console.log('š” The remaining books likely require special permissions.');
} else if (parseFloat(completionPercentage) >= 80) {
console.log('\nš GOOD PROGRESS! Most books are downloaded.');
console.log('š” Consider running the resume command to complete remaining downloads.');
} else {
console.log('\nš PROGRESS NEEDED! Many books still need downloading.');
console.log('š” Run "download:resume" to continue downloading.');
}
} catch (error) {
console.error('ā Error analyzing download completeness:', error);
process.exit(1);
} finally {
db.close();
}
}
// Run the analysis
checkDownloadCompleteness()
.then(() => {
console.log('\nā
Analysis complete!');
process.exit(0);
})
.catch(error => {
console.error('ā Analysis failed:', error);
process.exit(1);
});