Web Content MCP Server
by amotivv
Verified
- experiments
- puppeteer-binding
/**
* Experiment: Cloudflare Browser Rendering Workers Binding API with Puppeteer
*
* Note: This is a simulation of how you would use the Cloudflare Browser Rendering
* Workers Binding API with Puppeteer. In a real implementation, this code would
* run within a Cloudflare Worker with the Browser Rendering binding.
*/
// In a real Cloudflare Worker, you would import Puppeteer like this:
// import puppeteer from '@cloudflare/puppeteer';
/**
* Simulated function to navigate through Cloudflare documentation and extract structured information
*/
async function navigateAndExtractContent() {
console.log('Simulating Puppeteer navigation and content extraction...');
// In a real implementation, you would initialize Puppeteer like this:
/*
const browser = await puppeteer.launch({
// Browser Rendering specific options
userDataDir: '/tmp/puppeteer_user_data',
});
try {
const page = await browser.newPage();
// Navigate to Cloudflare docs
await page.goto('https://developers.cloudflare.com/browser-rendering/', {
waitUntil: 'networkidle0',
});
// Extract headings
const headings = await page.evaluate(() => {
const headingElements = document.querySelectorAll('h1, h2, h3');
return Array.from(headingElements).map(el => ({
level: el.tagName.toLowerCase(),
text: el.textContent?.trim() || '',
}));
});
// Extract code examples
const codeExamples = await page.evaluate(() => {
const codeElements = document.querySelectorAll('pre code');
return Array.from(codeElements).map(el => ({
language: el.className.replace('language-', ''),
code: el.textContent?.trim() || '',
}));
});
// Navigate to a different section
await page.click('a[href*="rest-api"]');
await page.waitForNavigation({ waitUntil: 'networkidle0' });
// Extract API endpoints
const apiEndpoints = await page.evaluate(() => {
const endpointElements = document.querySelectorAll('.endpoint');
return Array.from(endpointElements).map(el => ({
method: el.querySelector('.method')?.textContent?.trim() || '',
path: el.querySelector('.path')?.textContent?.trim() || '',
description: el.querySelector('.description')?.textContent?.trim() || '',
}));
});
return {
headings,
codeExamples,
apiEndpoints,
};
} finally {
// In a real implementation with session reuse, you would use:
// await browser.disconnect();
// Instead of:
// await browser.close();
}
*/
// For this simulation, we'll return mock data
return {
headings: [
{ level: 'h1', text: 'Browser Rendering' },
{ level: 'h2', text: 'Overview' },
{ level: 'h2', text: 'REST API' },
{ level: 'h3', text: 'Content Endpoint' },
{ level: 'h3', text: 'Screenshot Endpoint' },
{ level: 'h2', text: 'Workers Binding API' },
],
codeExamples: [
{
language: 'javascript',
code: `
// Example of using the REST API
fetch('https://browser-rendering.example.workers.dev/content', {
method: 'POST',
body: JSON.stringify({
url: 'https://example.com',
rejectResourceTypes: ['image', 'font']
})
})
.then(response => response.json())
.then(data => console.log(data.content));
`
},
{
language: 'javascript',
code: `
// Example of using the Workers Binding API
import puppeteer from '@cloudflare/puppeteer';
export default {
async fetch(request, env) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://example.com');
const content = await page.content();
await browser.disconnect();
return new Response(content);
}
};
`
}
],
apiEndpoints: [
{
method: 'POST',
path: '/content',
description: 'Fetches rendered HTML content from a URL'
},
{
method: 'POST',
path: '/screenshot',
description: 'Captures a screenshot of a web page'
},
{
method: 'POST',
path: '/pdf',
description: 'Renders a web page as a PDF document'
},
{
method: 'POST',
path: '/scrape',
description: 'Extracts structured data from HTML elements'
}
]
};
}
/**
* Simulated function to demonstrate session reuse
*/
async function demonstrateSessionReuse() {
console.log('Simulating Puppeteer session reuse...');
// In a real implementation, you would use code like this:
/*
// Get existing browser sessions
const sessions = await puppeteer.sessions();
let browser;
if (sessions.length > 0) {
// Connect to an existing session
browser = await puppeteer.connect({ sessionId: sessions[0].id });
console.log('Connected to existing session');
} else {
// Create a new session
browser = await puppeteer.launch();
console.log('Created new session');
}
try {
// Use the browser...
const page = await browser.newPage();
await page.goto('https://example.com');
// ...
} finally {
// Disconnect instead of closing to keep the session alive
await browser.disconnect();
}
*/
console.log('In a real implementation, you would:');
console.log('1. Check for existing sessions with puppeteer.sessions()');
console.log('2. Connect to an existing session or create a new one');
console.log('3. Use browser.disconnect() instead of browser.close() to keep the session alive');
}
/**
* Main function to run the experiment
*/
async function runExperiment() {
console.log('Starting Browser Rendering Workers Binding API experiment...');
try {
// Simulate navigating and extracting content
const extractedData = await navigateAndExtractContent();
// Display the extracted data
console.log('\nExtracted headings:');
extractedData.headings.forEach(heading => {
console.log(`${heading.level}: ${heading.text}`);
});
console.log('\nExtracted API endpoints:');
extractedData.apiEndpoints.forEach(endpoint => {
console.log(`${endpoint.method} ${endpoint.path} - ${endpoint.description}`);
});
console.log('\nExtracted code examples (first example):');
if (extractedData.codeExamples.length > 0) {
console.log(`Language: ${extractedData.codeExamples[0].language}`);
console.log(extractedData.codeExamples[0].code);
}
// Simulate session reuse
await demonstrateSessionReuse();
console.log('\nNote: This is a simulation. In a real implementation:');
console.log('1. This code would run within a Cloudflare Worker');
console.log('2. You would use the actual @cloudflare/puppeteer package');
console.log('3. You would need to set up the Browser Rendering binding in your wrangler.toml');
} catch (error) {
console.error('Experiment failed:', error);
}
}
// Run the experiment
runExperiment();