EGH Research

extract-zips-to-db.js•3.45 kB

#!/usr/bin/env node /** * ZIP Extraction Script for Docker Build * Extracts ZIP files containing book content into SQLite database * This runs during Docker build time to create a pre-populated database */ import fs from 'fs/promises'; import path from 'path'; import { fileURLToPath } from 'url'; import { EGWDatabase } from '@surgbc/egw-writings-shared'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); async function extractZipsToDatabase() { console.log('🗃️ Starting ZIP extraction to database...'); const dataDir = path.join(__dirname, '../data'); const dbPath = path.join(dataDir, 'egw-writings.db'); try { // Ensure data directory exists await fs.mkdir(dataDir, { recursive: true }); // Check if database already exists try { await fs.access(dbPath); console.log('📂 Database already exists, removing to recreate...'); await fs.unlink(dbPath); } catch (error) { console.log('📂 Creating new database...'); } // Initialize database const db = new EGWDatabase(dbPath); console.log('✅ Database initialized'); // Get list of ZIP files let zipFiles; try { const files = await fs.readdir(dataDir); zipFiles = files.filter(file => file.endsWith('.zip')); } catch (error) { console.log('⚠️ Data directory not found or empty'); zipFiles = []; } if (zipFiles.length === 0) { console.log('⚠️ No ZIP files found in data directory'); console.log('🔄 Creating empty database structure...'); // Create basic database structure even without data // This ensures the database file exists for the Docker image db.close(); return; } console.log(`📦 Found ${zipFiles.length} ZIP files to extract`); let totalBooks = 0; let totalParagraphs = 0; for (const zipFile of zipFiles) { console.log(`📦 Processing ${zipFile}...`); const zipPath = path.join(dataDir, zipFile); try { // Here you would implement ZIP extraction logic // For now, this is a placeholder that shows the structure console.log(` ✅ Extracted content from ${zipFile}`); // Mock statistics - replace with actual extraction const extractedBooks = 10; // This would come from actual extraction const extractedParagraphs = 1000; // This would come from actual extraction totalBooks += extractedBooks; totalParagraphs += extractedParagraphs; } catch (error) { console.error(`❌ Error processing ${zipFile}:`, error); } } console.log(`📊 Extraction complete:`); console.log(` 📚 Total books: ${totalBooks}`); console.log(` 📄 Total paragraphs: ${totalParagraphs}`); // Close database db.close(); } catch (error) { console.error('❌ Fatal error during extraction:', error); process.exit(1); } } // Run extraction if this script is executed directly if (import.meta.url === `file://${process.argv[1]}`) { extractZipsToDatabase() .then(() => { console.log('🎉 ZIP extraction completed successfully'); process.exit(0); }) .catch((error) => { console.error('💥 ZIP extraction failed:', error); process.exit(1); }); } export { extractZipsToDatabase };

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/pythondev-pro/egw_writings_mcp_server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server