index.ts•17.8 kB
import dbManager from '../db-manager';
import { parseRssFeed, type ParsedFeed } from './feed-parser';
import { convertDbItemsToFeedItems, formatFeedResponse } from './formatter';
import { defaultRssFeeds } from '../../../../config';
import type { RssFeed, FeedResponse, FeedItem } from './types';
import FirecrawlApp from '@mendable/firecrawl-js';
import crypto from 'crypto';
/**
 * RSS Manager class to handle RSS feed operations
 */
export class RssManager {
  private cachedFeedItems: Map<string, FeedItem[]> = new Map();
  private lastFetchTime: number = 0;
  private initialized: boolean = false;
  private firecrawl: any;
  constructor() {
    // Initialize firecrawl with API key from environment variables
    const apiKey = process.env.FIRECRAWL_API_KEY;
    if (!apiKey) {
      console.warn('FIRECRAWL_API_KEY not found in environment variables. Article fetching may not work correctly.');
      // Create a dummy FirecrawlApp that will throw errors when used
      this.firecrawl = new FirecrawlApp({ apiKey: 'dummy-key' });
    } else {
      this.firecrawl = new FirecrawlApp({ apiKey });
    }
    
    // Initialize with default feeds if needed - do this asynchronously
    setTimeout(() => {
      this.initializeFeeds().catch(err => {
        console.error('Error initializing feeds:', err);
      });
    }, 1000);
  }
  /**
   * Initialize feeds in the database with default feeds if needed
   */
  private async initializeFeeds(): Promise<void> {
    if (this.initialized) return;
    try {
      // Get all feeds from the database
      const feeds = await dbManager.getAllFeeds();
      
      // If no feeds exist, add the default feeds
      if (feeds.length === 0) {
        for (const feed of defaultRssFeeds) {
          await dbManager.addFeed(feed.url, feed.name, feed.category);
        }
        console.error('Initialized database with default RSS feeds');
      }
      
      this.initialized = true;
    } catch (error) {
      console.error('Error initializing feeds:', error);
    }
  }
  /**
   * Add a new RSS feed to the database
   */
  public async addFeed(feed: RssFeed): Promise<boolean> {
    return await dbManager.addFeed(feed.url, feed.name, feed.category);
  }
  /**
   * Remove an RSS feed from the database
   */
  public async removeFeed(url: string): Promise<boolean> {
    return await dbManager.removeFeed(url);
  }
  /**
   * Get the list of RSS feeds from the database
   */
  public async getFeeds(): Promise<RssFeed[]> {
    const feeds = await dbManager.getAllFeeds();
    return feeds.map((feed: any) => ({
      url: feed.url,
      name: feed.name,
      category: feed.category || undefined
    }));
  }
  /**
   * Add a new interest keyword to the database
   */
  public async addKeyword(keyword: string): Promise<boolean> {
    return await dbManager.addKeyword(keyword);
  }
  /**
   * Remove an interest keyword from the database
   */
  public async removeKeyword(keyword: string): Promise<boolean> {
    return await dbManager.removeKeyword(keyword);
  }
  /**
   * Get all interest keywords from the database
   */
  public async getKeywords(): Promise<string[]> {
    const keywords = await dbManager.getAllKeywords();
    return keywords.map((k: any) => k.keyword);
  }
  /**
   * Get articles matching the user's interest keywords
   */
  public async getArticlesByKeywords(limit: number = 10): Promise<FeedResponse> {
    try {
      // Get items matching keywords from the database
      const items = await dbManager.getItemsByKeywords(limit);
      
      // Convert to feed items
      const feedItems = convertDbItemsToFeedItems(items);
      
      // Return formatted response
      return formatFeedResponse(
        feedItems,
        "Articles Matching Your Interests",
        "feed/interests",
        "Articles matching your interest keywords"
      );
    } catch (error) {
      console.error('Error getting articles by keywords:', error);
      return formatFeedResponse([], "Error", "error", "Error getting articles by keywords");
    }
  }
  /**
   * Fetch an article from a URL using firecrawl and save it to the database
   * @param url The URL to fetch the article from
   * @returns The fetched article or null if there was an error
   */
  public async fetchArticleFromUrl(url: string): Promise<any> {
    try {
      console.error(`Fetching article from URL: ${url}`);
      
      // Check if the article already exists in the database
      const existingArticle = await dbManager.getArticleByUrl(url);
      if (existingArticle) {
        console.error(`Article already exists in database: ${url}`);
        return existingArticle;
      }
      
      // Use firecrawl to fetch the article
      const scrapeResponse = await this.firecrawl.scrapeUrl(url, { 
        formats: ['markdown', 'html']
      });
      
      if (!scrapeResponse.success) {
        console.error(`Failed to scrape URL: ${url} - ${scrapeResponse.error}`);
        return null;
      }
      
      // Generate a unique ID for the article
      const id = `article/${crypto.createHash('md5').update(url).digest('hex')}`;
      
      // Save the article to the database
      const article = {
        id,
        url,
        title: scrapeResponse.title || '',
        content: scrapeResponse.markdown || '',
        html: scrapeResponse.html || '',
        author: '', // Firecrawl may not provide author directly
        published_date: '', // Firecrawl may not provide published date directly
        image_url: '', // Firecrawl may not provide image URL directly
        summary: scrapeResponse.description || '',
        fetched_at: Date.now()
      };
      
      const saved = await dbManager.saveArticle(article);
      if (!saved) {
        console.error(`Failed to save article to database: ${url}`);
        return null;
      }
      
      console.error(`Successfully fetched and saved article: ${url}`);
      return article;
    } catch (error) {
      console.error(`Error fetching article from URL: ${url}`, error);
      return null;
    }
  }
  /**
   * Crawl a website using Firecrawl and save all pages to the database
   * @param url The URL to crawl
   * @param limit Maximum number of pages to crawl
   * @returns Object with success status and result information
   */
  public async crawlWebsite(url: string, limit: number = 100): Promise<any> {
    try {
      console.error(`Crawling website: ${url} with limit: ${limit}`);
      
      const crawlResponse = await this.firecrawl.crawlUrl(url, {
        limit: limit,
        scrapeOptions: {
          formats: ['markdown', 'html'],
        }
      });
      if (!crawlResponse.success) {
        console.error(`Failed to crawl website: ${url} - ${crawlResponse.error}`);
        return { success: false, error: crawlResponse.error };
      }
      
      console.error(`Successfully crawled website: ${url}`);
      
      // Process and save each page
      let savedCount = 0;
      // Use type assertion to access response data
      const responseData = crawlResponse as any;
      const pages = responseData.pages || [];
      if (Array.isArray(pages)) {
        for (const page of pages) {
          if (!page.url) continue;
          
          // Generate a unique ID for the article
          const id = `article/${crypto.createHash('md5').update(page.url).digest('hex')}`;
          
          // Save the article to the database
          const article = {
            id,
            url: page.url,
            title: page.title || '',
            content: page.markdown || '',
            html: page.html || '',
            author: page.author || '',
            published_date: page.publishedDate || '',
            image_url: page.imageUrl || '',
            summary: page.description || '',
            fetched_at: Date.now()
          };
          
          const saved = await dbManager.saveArticle(article);
          if (saved) {
            savedCount++;
          }
        }
      }
      
      return { 
        success: true, 
        totalPages: pages.length,
        savedPages: savedCount
      };
    } catch (error) {
      console.error(`Error crawling website: ${url}`, error);
      return { 
        success: false, 
        error: error instanceof Error ? error.message : String(error)
      };
    }
  }
  /**
   * Asynchronously crawl a website using Firecrawl
   * @param url The URL to crawl
   * @param limit Maximum number of pages to crawl
   * @returns Object with crawl ID and status
   */
  public async asyncCrawlWebsite(url: string, limit: number = 100): Promise<any> {
    try {
      console.error(`Starting async crawl of website: ${url} with limit: ${limit}`);
      
      const crawlResponse = await this.firecrawl.asyncCrawlUrl(url, {
        limit: limit,
        scrapeOptions: {
          formats: ['markdown', 'html'],
        }
      });
      if (!crawlResponse.success) {
        console.error(`Failed to start async crawl: ${url} - ${crawlResponse.error}`);
        return { success: false, error: crawlResponse.error };
      }
      
      console.error(`Successfully started async crawl of website: ${url}, crawl ID: ${crawlResponse.id}`);
      // Use type assertion to access response data
      const responseData = crawlResponse as any;
      return { 
        success: true, 
        id: crawlResponse.id,
        status: responseData.status || 'pending'
      };
    } catch (error) {
      console.error(`Error starting async crawl of website: ${url}`, error);
      return { 
        success: false, 
        error: error instanceof Error ? error.message : String(error)
      };
    }
  }
  /**
   * Check the status of an asynchronous crawl
   * @param crawlId The ID of the crawl job
   * @returns The current status of the crawl job
   */
  public async checkCrawlStatus(crawlId: string): Promise<any> {
    try {
      console.error(`Checking status of crawl: ${crawlId}`);
      
      const statusResponse = await this.firecrawl.checkCrawlStatus(crawlId);
      
      if (!statusResponse.success) {
        console.error(`Failed to check crawl status: ${crawlId} - ${statusResponse.error}`);
        return { success: false, error: statusResponse.error };
      }
      
      // Use type assertion to access response data
      const responseData = statusResponse as any;
      return { 
        success: true, 
        status: responseData.status || 'unknown',
        pagesProcessed: responseData.pagesProcessed || 0,
        totalPages: responseData.totalPages || 0
      };
    } catch (error) {
      console.error(`Error checking crawl status: ${crawlId}`, error);
      return { 
        success: false, 
        error: error instanceof Error ? error.message : String(error)
      };
    }
  }
  /**
   * Cancel an asynchronous crawl job
   * @param crawlId The ID of the crawl job to cancel
   * @returns The cancellation status
   */
  public async cancelCrawl(crawlId: string): Promise<any> {
    try {
      console.error(`Cancelling crawl: ${crawlId}`);
      
      const cancelResponse = await this.firecrawl.cancelCrawl(crawlId);
      
      if (!cancelResponse.success) {
        console.error(`Failed to cancel crawl: ${crawlId} - ${cancelResponse.error}`);
        return { success: false, error: cancelResponse.error };
      }
      
      return { 
        success: true, 
        message: `Crawl ${crawlId} cancelled successfully`
      };
    } catch (error) {
      console.error(`Error cancelling crawl: ${crawlId}`, error);
      return { 
        success: false, 
        error: error instanceof Error ? error.message : String(error)
      };
    }
  }
  /**
   * Get articles from the database
   * @param limit Maximum number of articles to return
   * @returns Array of articles
   */
  public async getArticles(limit: number = 10): Promise<any[]> {
    try {
      console.error(`Getting articles with limit: ${limit}`);
      return await dbManager.getAllArticles(limit);
    } catch (error) {
      console.error(`Error getting articles: ${error}`);
      return [];
    }
  }
  /**
   * Search articles in the database
   * @param query Search query
   * @param limit Maximum number of articles to return
   * @returns Array of matching articles
   */
  public async searchArticles(query: string, limit: number = 10): Promise<any[]> {
    try {
      console.error(`Searching articles with query: ${query}, limit: ${limit}`);
      return await dbManager.searchArticles(query, limit);
    } catch (error) {
      console.error(`Error searching articles: ${error}`);
      return [];
    }
  }
  /**
   * Crawl articles from all RSS feeds and store in SQLite
   */
  public async crawlFeed(limit: number = 10): Promise<FeedResponse> {
    try {
      // Collect items from all feeds
      const allItems: FeedItem[] = [];
      
      // Get all feeds from the database
      const feeds = await this.getFeeds();
      
      // Process feeds in parallel
      const feedPromises = feeds.map(feed => 
        this.fetchFeed(feed.url, feed.name, feed.category)
          .then(items => {
            // Add items to the collection
            if (items && items.length > 0) {
              allItems.push(...items);
              
              // Cache the items for this feed
              this.cachedFeedItems.set(feed.url, items);
            }
          })
          .catch(error => {
            console.error(`Error fetching feed ${feed.url}:`, error);
          })
      );
      
      // Wait for all feeds to be processed
      await Promise.all(feedPromises);
      
      // Sort all items by published date (newest first)
      allItems.sort((a, b) => (b.published || 0) - (a.published || 0));
      
      // Update last fetch time
      this.lastFetchTime = Date.now();
      
      // Return formatted response with limited items
      return formatFeedResponse(
        allItems.slice(0, limit),
        "RSS Manager Feeds",
        "feed/all",
        "Aggregated feeds from RSS Manager"
      );
    } catch (error) {
      console.error('Error fetching feeds:', error);
      return formatFeedResponse([], "Error", "error", "Error fetching feeds");
    }
  }
  
  /**
   * Fetch a single RSS feed and store in SQLite
   * @param itemsPerFeed Number of items to fetch per feed (default: 20)
   */
  private async fetchFeed(feedUrl: string, feedName: string, category?: string, itemsPerFeed: number = 20): Promise<FeedItem[]> {
    try {
      // Parse the RSS feed
      const parsedFeed: ParsedFeed = await parseRssFeed(feedUrl);
      
      if (!parsedFeed || !parsedFeed.items || parsedFeed.items.length === 0) {
        console.warn(`No items found in feed: ${feedUrl}`);
        return [];
      }
      
      // Use the feed title from the parsed feed if available, otherwise use the provided name
      const title = feedName || parsedFeed.title || 'Unknown Feed';
      
      // Limit items to the specified number
      const limitedItems = parsedFeed.items.slice(0, itemsPerFeed);
      console.error(`Fetched ${limitedItems.length} items from feed: ${feedUrl}`);
      
      // Save the feed and its items to the database
      await dbManager.saveFeed(
        feedUrl,
        title,
        category,
        limitedItems
      );
      
      return limitedItems;
    } catch (error) {
      console.error(`Error fetching feed ${feedUrl}:`, error);
      return [];
    }
  }
  
  /**
   * Get the latest feeds from the database
   */
  public async getLatestArticles(limit: number = 10): Promise<FeedResponse> {
    try {
      // Get items from the database
      const items = await dbManager.getItems(limit);
      
      // Convert to feed items
      const feedItems = convertDbItemsToFeedItems(items);
      
      // Return formatted response
      return formatFeedResponse(
        feedItems,
        "Latest RSS Feeds",
        "feed/latest",
        "Latest articles from RSS feeds"
      );
    } catch (error) {
      console.error('Error getting latest feeds:', error);
      return formatFeedResponse([], "Error", "error", "Error getting latest feeds");
    }
  }
  
  /**
   * Get feeds by category from the database
   */
  public async getFeedsByCategory(category: string, limit: number = 10): Promise<FeedResponse> {
    try {
      // Get items by category from the database
      const items = await dbManager.getItemsByCategory(category, limit);
      
      // Convert to feed items
      const feedItems = convertDbItemsToFeedItems(items);
      
      // Return formatted response
      return formatFeedResponse(
        feedItems,
        `${category} Feeds`,
        `category/${category}`,
        `Feeds from the ${category} category`
      );
    } catch (error) {
      console.error(`Error getting feeds by category ${category}:`, error);
      return formatFeedResponse([], "Error", "error", `Error getting feeds for category: ${category}`);
    }
  }
  
  /**
   * Search feeds in the database
   */
  public async searchFeeds(query: string, limit: number = 10): Promise<FeedResponse> {
    try {
      // Search items in the database
      const items = await dbManager.searchItems(query, limit);
      
      // Convert to feed items
      const feedItems = convertDbItemsToFeedItems(items);
      
      // Return formatted response
      return formatFeedResponse(
        feedItems,
        `Search Results for "${query}"`,
        `search/${query}`,
        `Search results for "${query}"`
      );
    } catch (error) {
      console.error(`Error searching feeds for ${query}:`, error);
      return formatFeedResponse([], "Error", "error", `Error searching feeds for: ${query}`);
    }
  }
}
// Create and export an instance of the RSS manager
const rssManager = new RssManager();
export default rssManager;