fetch-feeds.ts•4.87 kB
import fs from 'fs';
import path from 'path';
import os from 'os';
import { parseRssFeed } from './feed-parser';
import { getDb, getWriteDb } from './db';
// Ensure database directory exists
const DB_DIR = process.env.DB_DIR || path.join(os.homedir(), '.mcp-rss-crawler');
if (!fs.existsSync(DB_DIR)) {
  fs.mkdirSync(DB_DIR, { recursive: true });
}
/**
 * Fetch feeds from all configured RSS sources and save to the database
 * @param itemsPerFeed Number of items to fetch per feed (default: 20)
 */
export async function fetchAllFeeds(itemsPerFeed: number = 20): Promise<{
  success: boolean;
  feedsProcessed: number;
  totalItems: number;
  error?: string;
}> {
  try {
    console.log(`Fetching feeds with ${itemsPerFeed} items per feed...`);
    
    // Get all feeds from the database
    const db = getDb();
    const feeds = db.prepare('SELECT * FROM feeds ORDER BY category, name').all() as {
      id: string;
      url: string;
      name: string;
      category: string | null;
      last_updated: number;
    }[];
    
    if (!feeds || feeds.length === 0) {
      console.log('No feeds found in the database');
      return { success: false, feedsProcessed: 0, totalItems: 0, error: 'No feeds found in the database' };
    }
    
    console.log(`Found ${feeds.length} feeds to process`);
    
    let totalItems = 0;
    let processedFeeds = 0;
    
    // Process each feed
    for (const feed of feeds) {
      try {
        console.log(`Processing feed: ${feed.name} (${feed.url})`);
        
        // Parse the RSS feed
        const parsedFeed = await parseRssFeed(feed.url);
        
        if (!parsedFeed || !parsedFeed.items || parsedFeed.items.length === 0) {
          console.log(`No items found in feed: ${feed.url}`);
          continue;
        }
        
        // Limit items to the specified number
        const limitedItems = parsedFeed.items.slice(0, itemsPerFeed);
        console.log(`Fetched ${limitedItems.length} items from feed: ${feed.url}`);
        
        // Save each item to the database
        const feedId = feed.id;
        
        // Begin transaction
        const writeDb = getWriteDb();
        writeDb.transaction(() => {
          for (const item of limitedItems) {
            // Convert date string to timestamp if needed
            const publishedTime = typeof item.published === 'string' 
              ? new Date(item.published).getTime() / 1000 
              : Math.floor(item.published || Date.now() / 1000);
            
            // Get summary content
            const summary = typeof item.summary === 'string' 
              ? item.summary 
              : item.summary?.content || '';
            
            // Get content
            const content = typeof item.content === 'string'
              ? item.content
              : item.content?.content || '';
            
            // Get link
            const link = typeof item.link === 'string'
              ? item.link
              : item.alternate?.[0]?.href || '';
            
            // Insert the item
            writeDb.prepare(`
              INSERT OR REPLACE INTO items (id, feed_id, title, link, summary, content, published, author)
              VALUES (?, ?, ?, ?, ?, ?, ?, ?)
            `).run(
              item.id,
              feedId,
              item.title || 'Untitled',
              link,
              summary,
              content,
              publishedTime,
              item.author || ''
            );
            
            // Insert categories
            if (item.categories && Array.isArray(item.categories)) {
              for (const category of item.categories) {
                if (category) {
                  writeDb.prepare(`
                    INSERT OR REPLACE INTO categories (item_id, category)
                    VALUES (?, ?)
                  `).run(item.id, category);
                }
              }
            }
          }
          
          // Update the feed's last_updated timestamp
          writeDb.prepare(`
            UPDATE feeds SET last_updated = ? WHERE id = ?
          `).run(Date.now(), feedId);
        })();
        
        totalItems += limitedItems.length;
        processedFeeds++;
      } catch (error) {
        console.error(`Error processing feed ${feed.url}:`, error);
      }
    }
    
    console.log(`Processed ${processedFeeds} feeds with a total of ${totalItems} items`);
    
    return {
      success: true,
      feedsProcessed: processedFeeds,
      totalItems
    };
  } catch (error) {
    console.error('Error fetching feeds:', error);
    return {
      success: false,
      feedsProcessed: 0,
      totalItems: 0,
      error: error instanceof Error ? error.message : String(error)
    };
  } finally {
    // No need to close the database connection here
    // as it's managed by the db.ts module
  }
}