EGH Research

check-completeness.js•6.76 kB

#!/usr/bin/env node /** * Download Completeness Checker * * Analyzes the current state of downloads to determine completeness */ import { EGWDatabase } from '@surgbc/egw-writings-shared'; import { readFileSync, existsSync } from 'fs'; import path from 'path'; async function checkDownloadCompleteness() { console.log('🔍 Analyzing Download Completeness...\n'); const db = new EGWDatabase(); try { // Get overall statistics const stats = db.getStats(); console.log('📊 OVERALL DATABASE STATISTICS:'); console.log('================================'); console.log(` Languages: ${stats.languages}`); console.log(` Books: ${stats.books}`); console.log(` Downloaded Books: ${stats.downloadedBooks}`); console.log(` Paragraphs: ${stats.paragraphs}`); // Get all books for analysis const allBooks = db.getBooks('en'); console.log('\n📚 BOOK COMPLETENESS ANALYSIS:'); console.log('=============================='); // Analyze book completion status let booksWithContent = 0; let booksWithoutContent = 0; let booksWithParagraphs = 0; let booksWithoutParagraphs = 0; let totalParagraphs = 0; const incompleteBooks = []; for (const book of allBooks) { const hasDownloadedTimestamp = book.downloaded_at !== null; const paragraphCount = db.getParagraphs(book.book_id, 1).length; const hasParagraphs = paragraphCount > 0; if (hasDownloadedTimestamp) { booksWithContent++; } else { booksWithoutContent++; } if (hasParagraphs) { booksWithParagraphs++; totalParagraphs += paragraphCount; } else { booksWithoutParagraphs++; } // Book is incomplete if either: // - No downloaded timestamp (never attempted) // - Has timestamp but no paragraphs (failed download) if (!hasDownloadedTimestamp || !hasParagraphs) { incompleteBooks.push({ id: book.book_id, title: book.title, author: book.author, hasDownloadedTimestamp, hasParagraphs, paragraphCount }); } } console.log(` ✅ Books with download timestamp: ${booksWithContent}`); console.log(` ❌ Books without download timestamp: ${booksWithoutContent}`); console.log(` 📄 Books with paragraphs: ${booksWithParagraphs}`); console.log(` 📭 Books without paragraphs: ${booksWithoutParagraphs}`); console.log(` 📊 Average paragraphs per book: ${(totalParagraphs / Math.max(booksWithParagraphs, 1)).toFixed(1)}`); console.log('\n🔄 INCOMPLETE BOOKS ANALYSIS:'); console.log('============================'); console.log(` Total incomplete books: ${incompleteBooks.length}`); if (incompleteBooks.length > 0) { console.log('\n📋 INCOMPLETE BOOKS DETAILS:'); console.log('============================'); // Group by type const neverAttempted = incompleteBooks.filter(b => !b.hasDownloadedTimestamp); const failedDownloads = incompleteBooks.filter(b => b.hasDownloadedTimestamp && !b.hasParagraphs); console.log(` 📭 Never attempted download: ${neverAttempted.length}`); console.log(` ❌ Failed downloads (no paragraphs): ${failedDownloads.length}`); // Show sample of incomplete books console.log('\n🔍 SAMPLE OF INCOMPLETE BOOKS:'); incompleteBooks.slice(0, 10).forEach(book => { const status = !book.hasDownloadedTimestamp ? 'NEVER_ATTEMPTED' : 'FAILED_DOWNLOAD'; console.log(` - ${book.title} (ID: ${book.id}) - ${status}`); }); if (incompleteBooks.length > 10) { console.log(` ... and ${incompleteBooks.length - 10} more`); } } // Check skipped books file const skippedBooksFile = path.join(process.cwd(), 'data', 'skipped-books.json'); let skippedBooks = []; if (existsSync(skippedBooksFile)) { try { const skippedData = readFileSync(skippedBooksFile, 'utf-8'); skippedBooks = JSON.parse(skippedData); // Remove duplicates by book ID const uniqueSkippedBooks = []; const seenIds = new Set(); for (const book of skippedBooks) { if (!seenIds.has(book.id)) { seenIds.add(book.id); uniqueSkippedBooks.push(book); } } console.log('\n📝 SKIPPED BOOKS SUMMARY:'); console.log('========================='); console.log(` Total skipped books (unique): ${uniqueSkippedBooks.length}`); if (uniqueSkippedBooks.length > 0) { console.log('\n🔍 SKIPPED BOOKS DETAILS:'); uniqueSkippedBooks.forEach(book => { console.log(` - ${book.title} (ID: ${book.id})`); console.log(` Error: ${book.error}`); }); } } catch (error) { console.log(' ⚠️ Could not read skipped books file'); } } // Calculate completion percentage const totalBooks = allBooks.length; const completedBooks = booksWithContent && booksWithParagraphs ? booksWithContent : booksWithParagraphs; const completionPercentage = ((completedBooks / totalBooks) * 100).toFixed(1); console.log('\n🎯 DOWNLOAD COMPLETION SUMMARY:'); console.log('==============================='); console.log(` 📊 Overall Completion: ${completionPercentage}%`); console.log(` ✅ Complete Books: ${completedBooks}/${totalBooks}`); console.log(` 🔄 Incomplete Books: ${incompleteBooks.length}/${totalBooks}`); console.log(` 📄 Total Paragraphs: ${totalParagraphs.toLocaleString()}`); if (parseFloat(completionPercentage) >= 95) { console.log('\n🎉 EXCELLENT! Downloads are nearly complete!'); console.log('💡 The remaining books likely require special permissions.'); } else if (parseFloat(completionPercentage) >= 80) { console.log('\n👍 GOOD PROGRESS! Most books are downloaded.'); console.log('💡 Consider running the resume command to complete remaining downloads.'); } else { console.log('\n📈 PROGRESS NEEDED! Many books still need downloading.'); console.log('💡 Run "download:resume" to continue downloading.'); } } catch (error) { console.error('❌ Error analyzing download completeness:', error); process.exit(1); } finally { db.close(); } } // Run the analysis checkDownloadCompleteness() .then(() => { console.log('\n✅ Analysis complete!'); process.exit(0); }) .catch(error => { console.error('❌ Analysis failed:', error); process.exit(1); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/pythondev-pro/egw_writings_mcp_server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server