RssService.tsā¢3.78 kB
import Parser from 'rss-parser';
import { AppDataSource } from '../config/database';
import { Feed } from '../entities/Feed';
import { Article, ArticleStatus } from '../entities/Article';
import { EmbeddingService } from './EmbeddingService';
export class RssService {
private parser: Parser;
private embeddingService: EmbeddingService | null = null;
constructor() {
this.parser = new Parser({
customFields: {
item: [
['content:encoded', 'content'],
['description', 'description']
]
}
});
// Initialize embedding service if API key is available and not empty
if (process.env.OPENAI_API_KEY && process.env.OPENAI_API_KEY.trim() !== '') {
try {
this.embeddingService = new EmbeddingService();
} catch (error) {
console.error('Failed to initialize EmbeddingService in RssService:', error);
// Service remains null, embeddings will be skipped
}
}
}
/**
* Fetch latest articles from all RSS feeds
*/
async fetchAllFeeds(): Promise<void> {
try {
const feedRepository = AppDataSource.getRepository(Feed);
const feeds = await feedRepository.find();
for (const feed of feeds) {
await this.fetchFeed(feed);
}
} catch (error) {
// Handle error silently
}
}
/**
* Fetch latest articles from a single RSS feed
* @param feed - RSS feed source
*/
private async fetchFeed(feed: Feed): Promise<void> {
try {
const feedData = await this.parser.parseURL(feed.url);
const articleRepository = AppDataSource.getRepository(Article);
const fetchDate = new Date();
for (const item of feedData.items) {
// Check if article already exists
const existingArticle = await articleRepository.findOne({
where: { link: item.link }
});
if (!existingArticle) {
const article = new Article();
article.title = item.title || 'No title';
article.content = item.content || item.description || '';
article.link = item.link || '';
// Parse pub date from RSS feed or use fetch date as fallback
article.pubDate = item.pubDate ? new Date(item.pubDate) : (item.isoDate ? new Date(item.isoDate) : fetchDate);
article.fetchDate = fetchDate;
article.status = ArticleStatus.UNREAD;
article.feed = feed;
// Generate embedding for articles from 2020 onwards
if (this.embeddingService && this.shouldGenerateEmbedding(article.pubDate)) {
try {
const textForEmbedding = this.embeddingService.prepareTextForEmbedding(
article.title,
article.content
);
const embedding = await this.embeddingService.generateEmbedding(textForEmbedding);
article.embedding = `[${embedding.join(',')}]`;
} catch (error) {
console.error(`Failed to generate embedding for article: ${article.title}`, error);
// Continue without embedding
}
}
await articleRepository.save(article);
}
}
} catch (error) {
// console.error(`Failed to fetch feed ${feed.title}:`, error);
}
}
/**
* Check if embedding should be generated based on article date
* Only generate for articles from 2020 onwards
* @param pubDate - Article publication date
* @returns true if should generate embedding
*/
private shouldGenerateEmbedding(pubDate: Date): boolean {
try {
const cutoffDate = new Date('2020-01-01T00:00:00Z');
return pubDate >= cutoffDate;
} catch (error) {
// If date parsing fails, don't generate embedding
return false;
}
}
}