#!/bin/bash
# Script to load CVE data using optimized Git clone loader
# Can load all CVEs or filter by specific years
set -e # Exit on error
echo "========================================="
echo "Loading CVE Data (Optimized Loader)"
echo "========================================="
echo ""
# Parse command line arguments for years
YEARS=""
if [ $# -gt 0 ]; then
YEARS="--years $*"
echo "Configuration:"
echo " Years: $*"
else
echo "Configuration:"
echo " Loading ALL years (~240K CVEs)"
fi
echo ""
# Check if virtual environment is activated
if [ -z "$VIRTUAL_ENV" ]; then
echo "Warning: Virtual environment not detected."
echo "Activate it with: source venv/bin/activate"
echo ""
read -p "Continue anyway? (y/N) " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
exit 1
fi
fi
# Run the optimized loader
echo "Starting data load..."
echo "This may take 6-7 minutes for full dataset..."
echo ""
if [ -z "$YEARS" ]; then
python -m src.data_ingestion.loader_optimized
else
python -m src.data_ingestion.loader_optimized $YEARS
fi
echo ""
echo "========================================="
echo "Data load complete!"
echo "========================================="
echo ""
echo "Database location: data/cve.db"
echo ""
echo "Verify with:"
echo " python tests/test_database.py"
echo ""