mcp-pinterest
by terryso
Verified
// Pinterest image scraper using puppeteer-core
import fs from 'fs';
import puppeteer from 'puppeteer-core';
// Default configuration constants
const DEFAULT_SEARCH_LIMIT = 10;
const DEFAULT_HEADLESS_MODE = true;
class PinterestScraper {
constructor() {
this.baseUrl = 'https://www.pinterest.com';
this.searchUrl = `${this.baseUrl}/search/pins/?q=`;
// Default Chrome paths for different platforms
this.chromePaths = {
mac: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
macAlt: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
linux: '/usr/bin/google-chrome',
win: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
winAlt: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe'
};
}
/**
* Search for Pinterest images
* @param {string} keyword - Search keyword
* @param {number} limit - Result limit
* @param {boolean} headless - Whether to use headless mode
* @returns {Promise<Array>} - Search results array
*/
async search(keyword, limit = DEFAULT_SEARCH_LIMIT, headless = DEFAULT_HEADLESS_MODE) {
// Debug log for parameters
console.error('PinterestScraper.search called with:');
console.error('- keyword:', keyword);
console.error('- limit:', limit);
console.error('- headless:', headless);
let browser = null;
try {
// Build search URL
const searchQuery = encodeURIComponent(keyword);
const url = `${this.searchUrl}${searchQuery}`;
console.error('Search URL:', url);
// Launch browser - using system installed Chrome
try {
const options = {
headless: headless ? 'new' : false,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu'
]
};
// Try to find Chrome executable
const platform = process.platform;
if (platform === 'darwin') {
options.executablePath = this.chromePaths.mac;
} else if (platform === 'linux') {
options.executablePath = this.chromePaths.linux;
} else if (platform === 'win32') {
options.executablePath = this.chromePaths.win;
}
console.error('Launching browser with options:', JSON.stringify(options));
browser = await puppeteer.launch(options);
} catch (err) {
console.error('Failed to launch browser:', err.message);
return [];
}
if (!browser) {
console.error('Browser is null, returning empty results');
return [];
}
// Create new page
let page;
try {
page = await browser.newPage();
} catch (err) {
console.error('Failed to create page:', err.message);
await browser.close();
return [];
}
// Set viewport size
await page.setViewport({ width: 1280, height: 800 }).catch(err => {
console.error('Failed to set viewport:', err.message);
});
// Set user agent
await page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36').catch(err => {
console.error('Failed to set user agent:', err.message);
});
// Set timeouts
page.setDefaultNavigationTimeout(60000);
page.setDefaultTimeout(30000);
// Simplify request interception
try {
await page.setRequestInterception(true);
page.on('request', (req) => {
const resourceType = req.resourceType();
if (resourceType === 'image' || resourceType === 'font' || resourceType === 'media') {
req.abort();
} else {
req.continue();
}
});
} catch (err) {
console.error('Failed to set request interception:', err.message);
}
// Navigate to Pinterest search page
try {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
} catch (err) {
console.error('Page navigation failed:', err.message);
await browser.close();
return [];
}
// Wait for images to load
try {
await page.waitForSelector('div[data-test-id="pin"]', { timeout: 10000 });
} catch (err) {
console.log('Pin elements not found, but continuing:', err.message);
}
// Scroll page to load more content
try {
// Calculate scroll distance based on limit
const scrollDistance = Math.max(limit * 300, 1000);
await this.autoScroll(page, scrollDistance);
} catch (err) {
console.error('Failed to scroll page:', err.message);
}
// Extract image data
let results = [];
try {
// Extract src attributes from all image elements
results = await page.evaluate(() => {
const images = Array.from(document.querySelectorAll('img'));
return images
.filter(img => img.src && img.src.includes('pinimg.com'))
.map(img => {
let imageUrl = img.src;
// Handle various thumbnail sizes, convert to original size
if (imageUrl.match(/\/\d+x\d*\//)) {
imageUrl = imageUrl.replace(/\/\d+x\d*\//, '/originals/');
}
// Replace specific thumbnail patterns
const thumbnailPatterns = ['/60x60/', '/236x/', '/474x/', '/736x/'];
for (const pattern of thumbnailPatterns) {
if (imageUrl.includes(pattern)) {
imageUrl = imageUrl.replace(pattern, '/originals/');
break;
}
}
return {
title: img.alt || 'Unknown Title',
image_url: imageUrl,
link: img.closest('a') ? img.closest('a').href : imageUrl,
source: 'pinterest'
};
});
}).catch(err => {
console.error('Failed to extract images:', err.message);
return [];
});
} catch (err) {
console.error('Error evaluating page:', err.message);
results = [];
}
// Ensure results is an array
const validResults = Array.isArray(results) ? results : [];
// Deduplicate and limit results
const uniqueResults = [];
const urlSet = new Set();
for (const item of validResults) {
if (uniqueResults.length >= limit) break;
// Ensure item is valid object with image_url property
if (item && typeof item === 'object' && item.image_url && !urlSet.has(item.image_url)) {
urlSet.add(item.image_url);
uniqueResults.push(item);
}
}
return uniqueResults;
} catch (error) {
console.error('Pinterest search error:', error.message);
return [];
} finally {
// Close browser
if (browser) {
try {
await browser.close();
} catch (e) {
console.error('Error closing browser:', e.message);
}
}
}
}
/**
* Auto-scroll page to load more content
* @param {Page} page - Puppeteer page object
* @param {number} maxScrollDistance - Maximum scroll distance
*/
async autoScroll(page, maxScrollDistance = 3000) {
await page.evaluate(async (maxScrollDistance) => {
await new Promise((resolve) => {
let totalHeight = 0;
const distance = 100;
const timer = setInterval(() => {
const scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
// Stop after scrolling a certain distance
if (totalHeight >= maxScrollDistance) {
clearInterval(timer);
resolve();
}
}, 100);
});
}, maxScrollDistance);
// Wait for new content to load
await new Promise(resolve => setTimeout(resolve, 2000));
}
/**
* Download image
* @param {string} imageUrl - Image URL
* @param {string} outputPath - Output path
* @returns {Promise<boolean>} - Whether download was successful
*/
async downloadImage(imageUrl, outputPath) {
try {
console.log(`Downloading image: ${imageUrl}`);
const response = await fetch(imageUrl);
if (!response.ok) {
throw new Error(`Download failed, status code: ${response.status}`);
}
const buffer = await response.arrayBuffer();
fs.writeFileSync(outputPath, Buffer.from(buffer));
console.log(`Image saved to: ${outputPath}`);
return true;
} catch (error) {
console.error(`Failed to download image: ${error.message}`);
return false;
}
}
}
// Export PinterestScraper class
export { PinterestScraper as default };