MCP-RSS-Crawler

route.ts•4.12 kB

import { NextRequest, NextResponse } from 'next/server'; import { getDb, getWriteDb } from '@/lib/db'; import crypto from 'crypto'; import FirecrawlApp from '@mendable/firecrawl-js'; export async function POST(request: NextRequest) { try { const body = await request.json(); const { url } = body; console.log('Fetching article for URL:', url); if (!url) { console.log('URL is required'); return NextResponse.json( { error: 'URL is required' }, { status: 400 } ); } // Check if the article already exists in the database const db = getDb(); // Read-only connection for checking console.log('Checking if article exists in database'); const existingArticle = db.prepare(` SELECT * FROM articles WHERE url = ? `).get(url); if (existingArticle) { console.log('Article found in database'); return NextResponse.json(existingArticle); } // Generate a unique ID for the article const id = `article/${crypto.createHash('md5').update(url).digest('hex')}`; try { console.log('Article not found in database, fetching with Firecrawl'); // Initialize Firecrawl app with API key from environment variables const apiKey = process.env.FIRECRAWL_API_KEY; if (!apiKey) { console.error('FIRECRAWL_API_KEY not found in environment variables'); throw new Error('FIRECRAWL_API_KEY not found in environment variables'); } const firecrawl = new FirecrawlApp({ apiKey }); // Fetch article content using Firecrawl const firecrawlResult = await firecrawl.scrapeUrl(url, { formats: ['markdown', 'html'] }); console.log('Firecrawl result:', firecrawlResult); if (firecrawlResult && firecrawlResult.success) { // Create article object with Firecrawl data const article = { id, url, title: firecrawlResult.title || 'Untitled Article', content: firecrawlResult.markdown || '', html: firecrawlResult.html || '', author: firecrawlResult.metadata?.ogAuthor || '', published_date: firecrawlResult.metadata?.dcDateCreated || firecrawlResult.metadata?.dctermsCreated || '', image_url: firecrawlResult.metadata?.ogImage || '', summary: firecrawlResult.description || '', fetched_at: Date.now() }; // Save the article to the database console.log('Saving article to database'); const writeDb = getWriteDb(); // Write connection for saving writeDb.prepare(` INSERT INTO articles ( id, url, title, content, html, author, published_date, image_url, summary, fetched_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `).run( article.id, article.url, article.title, article.content, article.html, article.author, article.published_date, article.image_url, article.summary, article.fetched_at ); console.log('Article saved to database'); return NextResponse.json(article); } else { throw new Error('Failed to extract content with Firecrawl'); } } catch (firecrawlError) { console.error('Firecrawl error:', firecrawlError); // Create a fallback article object if Firecrawl fails const article = { id, url, title: 'Article content not available', content: 'Failed to extract content from the article.', html: '<p>Failed to extract content from the article.</p>', author: '', published_date: '', image_url: '', summary: 'Please visit the original article to view the content.', fetched_at: Date.now() }; return NextResponse.json(article); } } catch (error) { console.error('Error fetching article:', error); return NextResponse.json( { error: 'Failed to fetch article' }, { status: 500 } ); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mshk/mcp-rss-crawler'

If you have feedback or need assistance with the MCP directory API, please join our Discord server