feed-parser.ts•5.85 kB
import axios from 'axios';
import { parseStringPromise } from 'xml2js';
/**
 * Interface for the parsed feed result
 */
export interface ParsedFeed {
  title: string;
  description?: string;
  link?: string;
  items: FeedItem[];
}
/**
 * Interface for a feed item
 */
export interface FeedItem {
  id: string;
  title: string;
  published: number;
  updated?: number;
  summary: string | { direction: string; content: string };
  content?: string | { direction: string; content: string };
  author?: string;
  categories?: string[];
  origin?: {
    streamId: string;
    title: string;
    htmlUrl: string;
  };
  alternate?: {
    href: string;
    type: string;
  }[];
  link?: string;
}
/**
 * Parse an RSS feed and convert it to the standard format
 */
export async function parseRssFeed(feedUrl: string): Promise<ParsedFeed> {
  try {
    // Fetch the RSS feed
    const response = await axios.get(feedUrl, {
      headers: {
        'User-Agent': 'Mozilla/5.0 (compatible; RSSManager/1.0)'
      }
    });
    // Parse the XML
    const result = await parseStringPromise(response.data, {
      explicitArray: false,
      mergeAttrs: true
    });
    // Handle different RSS formats
    let channel;
    let items = [];
    let feedTitle = '';
    let feedDescription = '';
    let feedLink = '';
    // Regular RSS
    if (result.rss?.channel) {
      channel = result.rss.channel;
      items = Array.isArray(channel.item) ? channel.item : channel.item ? [channel.item] : [];
      feedTitle = channel.title || '';
      feedDescription = channel.description || '';
      feedLink = channel.link || '';
    } 
    // Atom
    else if (result.feed) {
      channel = result.feed;
      items = Array.isArray(channel.entry) ? channel.entry : channel.entry ? [channel.entry] : [];
      feedTitle = channel.title || '';
      feedDescription = channel.subtitle || '';
      feedLink = channel.link?.href || channel.link || '';
    } 
    // RDF
    else if (result.rdf?.channel) {
      channel = result.rdf.channel;
      items = Array.isArray(result.rdf.item) ? result.rdf.item : result.rdf.item ? [result.rdf.item] : [];
      feedTitle = channel.title || '';
      feedDescription = channel.description || '';
      feedLink = channel.link || '';
    }
    // Other formats
    else if (result['rdf:RDF']) {
      const rdf = result['rdf:RDF'];
      channel = rdf.channel || rdf['channel:channel'] || {};
      items = Array.isArray(rdf.item) ? rdf.item : rdf.item ? [rdf.item] : [];
      feedTitle = channel.title || '';
      feedDescription = channel.description || '';
      feedLink = channel.link || '';
    }
    
    if (!channel || items.length === 0) {
      console.error('Unsupported RSS format or no items found:', Object.keys(result));
      return { title: 'Unknown Feed', items: [] };
    }
    // Convert items to standard format
    const standardItems = items.map((item: any) => {
      // Get the title
      const title = item.title || '';
      
      // Get the published date
      let published = 0;
      if (item.pubDate) {
        published = Math.floor(new Date(item.pubDate).getTime() / 1000);
      } else if (item.published) {
        published = Math.floor(new Date(item.published).getTime() / 1000);
      } else if (item.updated) {
        published = Math.floor(new Date(item.updated).getTime() / 1000);
      } else if (item['dc:date']) {
        published = Math.floor(new Date(item['dc:date']).getTime() / 1000);
      } else {
        published = Math.floor(Date.now() / 1000);
      }
      
      // Get the updated date (default to published date)
      let updated = published;
      if (item.updated) {
        updated = Math.floor(new Date(item.updated).getTime() / 1000);
      }
      
      // Get the summary/content
      let summary = '';
      if (item.description) {
        summary = item.description;
      } else if (item.summary) {
        summary = item.summary;
      } else if (item.content) {
        summary = item.content;
      } else if (item['content:encoded']) {
        summary = item['content:encoded'];
      }
      
      // Get the author
      let author = '';
      if (item.author) {
        if (typeof item.author === 'string') {
          author = item.author;
        } else if (item.author.name) {
          author = item.author.name;
        }
      } else if (item['dc:creator']) {
        author = item['dc:creator'];
      }
      
      // Get the link
      let link = '';
      if (item.link) {
        if (typeof item.link === 'string') {
          link = item.link;
        } else if (item.link.href) {
          link = item.link.href;
        }
      }
      
      // Get the categories
      let categories: string[] = [];
      if (item.category) {
        if (typeof item.category === 'string') {
          categories = [item.category];
        } else if (Array.isArray(item.category)) {
          categories = item.category.map((cat: any) => 
            typeof cat === 'string' ? cat : cat._ || ''
          ).filter(Boolean);
        }
      }
      
      return {
        id: item.guid || item.id || `${feedUrl}/${title}`,
        title,
        published,
        updated,
        summary: {
          direction: 'ltr',
          content: summary
        },
        author,
        categories,
        origin: {
          streamId: `feed/${Buffer.from(feedUrl).toString('base64').substring(0, 20)}`,
          title: feedTitle,
          htmlUrl: feedLink
        },
        alternate: [{
          href: link,
          type: 'text/html'
        }],
        link
      };
    });
    return {
      title: feedTitle,
      description: feedDescription,
      link: feedLink,
      items: standardItems
    };
  } catch (error) {
    console.error('Error parsing RSS feed:', error);
    return { title: 'Error', items: [] };
  }
}