Skip to main content
Glama

mcp-pinterest

pinterest-scraper.js17 kB
// Pinterest image scraper using puppeteer-core import fs from 'fs'; import puppeteer from 'puppeteer-core'; // Default configuration constants const DEFAULT_SEARCH_LIMIT = 10; const DEFAULT_HEADLESS_MODE = true; // 检查是否在测试环境中 const isTestEnvironment = process.env.NODE_ENV === 'test' || process.env.JEST_WORKER_ID !== undefined; // 从环境变量获取代理设置 const PROXY_SERVER = process.env.MCP_PINTEREST_PROXY_SERVER || ''; class PinterestScraper { constructor() { this.baseUrl = 'https://www.pinterest.com'; this.searchUrl = `${this.baseUrl}/search/pins/?q=`; // Default Chrome paths for different platforms this.chromePaths = { mac: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', macAlt: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', linux: '/usr/bin/google-chrome', win: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe', winAlt: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe' }; } /** * 获取当前操作系统的Chrome可执行文件路径 * @returns {string} Chrome可执行文件路径 */ getChromePath() { const platform = process.platform; // 在测试环境中返回一个固定路径 if (isTestEnvironment) { return '/mock/chrome/path'; } // 检测操作系统类型 if (platform === 'darwin') { // macOS if (fs.existsSync(this.chromePaths.mac)) { return this.chromePaths.mac; } else if (fs.existsSync(this.chromePaths.macAlt)) { return this.chromePaths.macAlt; } } else if (platform === 'linux') { // Linux if (fs.existsSync(this.chromePaths.linux)) { return this.chromePaths.linux; } } else if (platform === 'win32') { // Windows if (fs.existsSync(this.chromePaths.win)) { return this.chromePaths.win; } else if (fs.existsSync(this.chromePaths.winAlt)) { return this.chromePaths.winAlt; } } // 如果找不到Chrome,抛出错误 throw new Error('无法找到Chrome浏览器,请安装Chrome或手动指定可执行文件路径'); } /** * Search for Pinterest images * @param {string} keyword - Search keyword * @param {number} limit - Result limit * @param {boolean} headless - Whether to use headless mode * @param {AbortSignal} signal - AbortController signal for cancelling the operation * @returns {Promise<Array>} - Search results array */ async search(keyword, limit = DEFAULT_SEARCH_LIMIT, headless = DEFAULT_HEADLESS_MODE, signal) { // Debug log for parameters // console.error('PinterestScraper.search called with:'); // console.error('- keyword:', keyword); // console.error('- limit:', limit); // console.error('- headless:', headless); // console.error('- signal:', signal ? 'provided' : 'not provided'); let browser = null; let page = null; try { // Support for cancellation if (signal && signal.aborted) { // console.error('Search aborted before starting'); throw new Error('操作被取消'); } // Build search URL const searchQuery = encodeURIComponent(keyword); const url = `${this.searchUrl}${searchQuery}`; // console.error('Search URL:', url); // Launch browser - using system installed Chrome try { // 在测试环境中使用 mock if (isTestEnvironment) { // console.error('Test environment detected, using mock browser'); // 在测试环境中,puppeteer-core已经被Jest模拟了,这里简单启动即可 // 不需要提供executablePath,因为模拟版本不会真正启动Chrome browser = await puppeteer.launch(); } else { const options = { executablePath: this.getChromePath(), headless: headless ? 'new' : false, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-accelerated-2d-canvas', '--disable-gpu', '--lang=zh-CN,zh' ] }; // 如果设置了代理服务器,添加到启动参数中 if (PROXY_SERVER) { console.log(`使用代理服务器: ${PROXY_SERVER}`); options.args.push(`--proxy-server=${PROXY_SERVER}`); } browser = await puppeteer.launch(options); } } catch (err) { // console.error('Failed to launch browser:', err.message); return []; } // Check for cancellation after browser launch if (signal && signal.aborted) { // console.error('Search aborted after browser launch'); await browser.close(); throw new Error('操作被取消'); } if (!browser) { // console.error('Browser is null, returning empty results'); return []; } // Create new page try { page = await browser.newPage(); } catch (err) { // console.error('Failed to create page:', err.message); await browser.close(); return []; } // Check for cancellation after page creation if (signal && signal.aborted) { // console.error('Search aborted after page creation'); await browser.close(); throw new Error('操作被取消'); } // Set viewport size await page.setViewport({ width: 1280, height: 800 }).catch(err => { // console.error('Failed to set viewport:', err.message); }); // Set user agent await page.setUserAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36').catch(err => { // console.error('Failed to set user agent:', err.message); }); // Set timeouts page.setDefaultNavigationTimeout(60000); page.setDefaultTimeout(30000); // 跟踪添加的事件监听器 const addedEventListeners = new Set(); // Simplify request interception try { await page.setRequestInterception(true); // Handle request interception with cancellation support const requestHandler = (req) => { // Check if operation was cancelled if (signal && signal.aborted) { req.abort(); return; } const resourceType = req.resourceType(); if (resourceType === 'image' || resourceType === 'font' || resourceType === 'media') { req.abort(); } else { req.continue(); } }; page.on('request', requestHandler); addedEventListeners.add('request'); } catch (err) { // console.error('Failed to set request interception:', err.message); } // Check for cancellation before navigation if (signal && signal.aborted) { // console.error('Search aborted before navigation'); await browser.close(); throw new Error('操作被取消'); } // Navigate to Pinterest search page try { await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 }); } catch (err) { // console.error('Page navigation failed:', err.message); await browser.close(); return []; } // Check for cancellation after navigation if (signal && signal.aborted) { // console.error('Search aborted after navigation'); await browser.close(); throw new Error('操作被取消'); } // Wait for images to load try { await page.waitForSelector('div[data-test-id="pin"]', { timeout: 10000 }); } catch (err) { // console.log('Pin elements not found, but continuing:', err.message); } // Check for cancellation before scrolling if (signal && signal.aborted) { // console.error('Search aborted before scrolling'); await browser.close(); throw new Error('操作被取消'); } // Scroll page to load more content try { // Calculate scroll distance based on limit const scrollDistance = Math.max(limit * 300, 1000); await this.autoScroll(page, scrollDistance, signal); } catch (err) { // If error is from cancellation, propagate it if (signal && signal.aborted) { // console.error('Scroll cancelled:', err.message); await browser.close(); throw new Error('操作被取消'); } // console.error('Failed to scroll page:', err.message); } // Check for cancellation before extracting images if (signal && signal.aborted) { // console.error('Search aborted before image extraction'); await browser.close(); throw new Error('操作被取消'); } // Extract image data let results = []; try { // Extract src attributes from all image elements results = await page.evaluate(() => { const images = Array.from(document.querySelectorAll('img')); return images .filter(img => img.src && img.src.includes('pinimg.com')) .map(img => { let imageUrl = img.src; // Handle various thumbnail sizes, convert to original size if (imageUrl.match(/\/\d+x\d*\//)) { imageUrl = imageUrl.replace(/\/\d+x\d*\//, '/originals/'); } // Replace specific thumbnail patterns const thumbnailPatterns = ['/60x60/', '/236x/', '/474x/', '/736x/']; for (const pattern of thumbnailPatterns) { if (imageUrl.includes(pattern)) { imageUrl = imageUrl.replace(pattern, '/originals/'); break; } } return { title: img.alt || 'Unknown Title', image_url: imageUrl, link: img.closest('a') ? img.closest('a').href : imageUrl, source: 'pinterest' }; }); }).catch(err => { // console.error('Failed to extract images:', err.message); return []; }); } catch (err) { // console.error('Error evaluating page:', err.message); results = []; } // Final cancellation check before processing results if (signal && signal.aborted) { // console.error('Search aborted before processing results'); await browser.close(); throw new Error('操作被取消'); } // Ensure results is an array const validResults = Array.isArray(results) ? results : []; // Deduplicate and limit results const uniqueResults = []; const urlSet = new Set(); for (const item of validResults) { if (uniqueResults.length >= limit) break; // Ensure item is valid object with image_url property if (item && typeof item === 'object' && item.image_url && !urlSet.has(item.image_url)) { urlSet.add(item.image_url); uniqueResults.push({ ...item, // Ensure 'source' field is present source: item.source || 'pinterest' }); } } return uniqueResults; } catch (error) { // Check if error is from cancellation if (signal && signal.aborted || error.message === '操作被取消') { // console.error('Pinterest search cancelled:', error.message); throw error; // Propagate cancellation error } // console.error('Pinterest search error:', error.message); return []; } finally { // 清理所有事件监听器 if (page) { try { page.removeAllListeners(); } catch (e) { // console.error('Error removing event listeners:', e.message); } } // Close browser if (browser) { try { await browser.close(); } catch (e) { // console.error('Error closing browser:', e.message); } } } } /** * Auto-scroll page to load more content * @param {Page} page - Puppeteer page object * @param {number} maxScrollDistance - Maximum scroll distance * @param {AbortSignal} signal - AbortController signal */ async autoScroll(page, maxScrollDistance = 3000, signal) { // Check for cancellation before starting scroll if (signal && signal.aborted) { throw new Error('操作被取消'); } await page.evaluate(async (maxScrollDistance) => { await new Promise((resolve, reject) => { let totalHeight = 0; const distance = 100; const timer = setInterval(() => { const scrollHeight = document.body.scrollHeight; window.scrollBy(0, distance); totalHeight += distance; // Stop after scrolling a certain distance if (totalHeight >= maxScrollDistance) { clearInterval(timer); resolve(); } }, 100); // Add cleanup function to handle potential cancellation window.scrollCancelled = () => { clearInterval(timer); reject(new Error('操作被取消')); }; }); }, maxScrollDistance); // Check for cancellation during scrolling if (signal && signal.aborted) { await page.evaluate(() => { if (window.scrollCancelled) window.scrollCancelled(); }); throw new Error('操作被取消'); } // Wait for new content to load const waitTime = signal && signal.aborted ? 0 : 2000; if (waitTime > 0) { await new Promise(resolve => setTimeout(resolve, waitTime)); } // Final cancellation check if (signal && signal.aborted) { throw new Error('操作被取消'); } } /** * Download image * @param {string} imageUrl - Image URL * @param {string} outputPath - Output path * @param {AbortSignal} signal - AbortController signal * @returns {Promise<boolean>} - Success flag */ async downloadImage(imageUrl, outputPath, signal) { try { // 检查参数 if (!imageUrl || !outputPath) { // console.error('Image URL or output path is empty'); return false; } // 检查取消信号 if (signal && signal.aborted) { // console.error('Download cancelled before starting'); return false; } // console.log('Downloading image:', imageUrl); // 将缩略图URL转换为原图 imageUrl = this.transformImageUrl(imageUrl); // 使用fetch API下载图片,支持取消功能 const fetchOptions = signal ? { signal } : undefined; const response = await fetch(imageUrl, fetchOptions); if (!response.ok) { throw new Error(`Download failed, status code: ${response.status}`); } // 检查取消信号 if (signal && signal.aborted) { // console.error('Download cancelled after fetch response'); return false; } const arrayBuffer = await response.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); // 检查取消信号 if (signal && signal.aborted) { // console.error('Download cancelled after buffer download'); return false; } // 保存图片 - 确保在测试环境中模拟函数被正确调用 fs.writeFileSync(outputPath, buffer); // console.log('Image saved to:', outputPath); return true; } catch (error) { // 检查是否为取消操作导致的错误 if (signal && signal.aborted || error.name === 'AbortError') { // console.error('Download operation was cancelled'); return false; } // console.error(`Failed to download image: ${error.message}`); return false; } } /** * Transform thumbnail URL to original size * @param {string} url - Thumbnail URL * @returns {string} - Original size URL */ transformImageUrl(url) { if (!url) return url; // Handle various thumbnail sizes, convert to original size if (url.match(/\/\d+x\d*\//)) { return url.replace(/\/\d+x\d*\//, '/originals/'); } // Replace specific thumbnail patterns const thumbnailPatterns = ['/60x60/', '/236x/', '/474x/', '/736x/']; for (const pattern of thumbnailPatterns) { if (url.includes(pattern)) { return url.replace(pattern, '/originals/'); } } return url; } } // Export PinterestScraper class export { PinterestScraper as default };

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/terryso/mcp-pinterest'

If you have feedback or need assistance with the MCP directory API, please join our Discord server