route.ts•4.12 kB
import { NextRequest, NextResponse } from 'next/server';
import { getDb, getWriteDb } from '@/lib/db';
import crypto from 'crypto';
import FirecrawlApp from '@mendable/firecrawl-js';
export async function POST(request: NextRequest) {
try {
const body = await request.json();
const { url } = body;
console.log('Fetching article for URL:', url);
if (!url) {
console.log('URL is required');
return NextResponse.json(
{ error: 'URL is required' },
{ status: 400 }
);
}
// Check if the article already exists in the database
const db = getDb(); // Read-only connection for checking
console.log('Checking if article exists in database');
const existingArticle = db.prepare(`
SELECT * FROM articles WHERE url = ?
`).get(url);
if (existingArticle) {
console.log('Article found in database');
return NextResponse.json(existingArticle);
}
// Generate a unique ID for the article
const id = `article/${crypto.createHash('md5').update(url).digest('hex')}`;
try {
console.log('Article not found in database, fetching with Firecrawl');
// Initialize Firecrawl app with API key from environment variables
const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
console.error('FIRECRAWL_API_KEY not found in environment variables');
throw new Error('FIRECRAWL_API_KEY not found in environment variables');
}
const firecrawl = new FirecrawlApp({ apiKey });
// Fetch article content using Firecrawl
const firecrawlResult = await firecrawl.scrapeUrl(url, {
formats: ['markdown', 'html']
});
console.log('Firecrawl result:', firecrawlResult);
if (firecrawlResult && firecrawlResult.success) {
// Create article object with Firecrawl data
const article = {
id,
url,
title: firecrawlResult.title || 'Untitled Article',
content: firecrawlResult.markdown || '',
html: firecrawlResult.html || '',
author: firecrawlResult.metadata?.ogAuthor || '',
published_date: firecrawlResult.metadata?.dcDateCreated || firecrawlResult.metadata?.dctermsCreated || '',
image_url: firecrawlResult.metadata?.ogImage || '',
summary: firecrawlResult.description || '',
fetched_at: Date.now()
};
// Save the article to the database
console.log('Saving article to database');
const writeDb = getWriteDb(); // Write connection for saving
writeDb.prepare(`
INSERT INTO articles (
id, url, title, content, html, author, published_date,
image_url, summary, fetched_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`).run(
article.id,
article.url,
article.title,
article.content,
article.html,
article.author,
article.published_date,
article.image_url,
article.summary,
article.fetched_at
);
console.log('Article saved to database');
return NextResponse.json(article);
} else {
throw new Error('Failed to extract content with Firecrawl');
}
} catch (firecrawlError) {
console.error('Firecrawl error:', firecrawlError);
// Create a fallback article object if Firecrawl fails
const article = {
id,
url,
title: 'Article content not available',
content: 'Failed to extract content from the article.',
html: '<p>Failed to extract content from the article.</p>',
author: '',
published_date: '',
image_url: '',
summary: 'Please visit the original article to view the content.',
fetched_at: Date.now()
};
return NextResponse.json(article);
}
} catch (error) {
console.error('Error fetching article:', error);
return NextResponse.json(
{ error: 'Failed to fetch article' },
{ status: 500 }
);
}
}