web_data_x_posts
Extract structured data from X posts using a post URL. This tool provides reliable access to post information through cache lookup, avoiding direct scraping limitations.
Instructions
Quickly read structured X post data. Requires a valid X post URL. This can be a cache lookup, so it can be more reliable than scraping
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| url | Yes |
Implementation Reference
- server.js:687-743 (handler)Handler function that executes the web_data_x_posts tool by triggering a BrightData dataset snapshot for X posts data and polling for the results until ready.execute: tool_fn(`web_data_${id}`, async(data, ctx)=>{ let trigger_response = await axios({ url: 'https://api.brightdata.com/datasets/v3/trigger', params: {dataset_id, include_errors: true}, method: 'POST', data: [data], headers: api_headers(), }); if (!trigger_response.data?.snapshot_id) throw new Error('No snapshot ID returned from request'); let snapshot_id = trigger_response.data.snapshot_id; console.error(`[web_data_${id}] triggered collection with ` +`snapshot ID: ${snapshot_id}`); let max_attempts = 600; let attempts = 0; while (attempts < max_attempts) { try { if (ctx && ctx.reportProgress) { await ctx.reportProgress({ progress: attempts, total: max_attempts, message: `Polling for data (attempt ` +`${attempts + 1}/${max_attempts})`, }); } let snapshot_response = await axios({ url: `https://api.brightdata.com/datasets/v3` +`/snapshot/${snapshot_id}`, params: {format: 'json'}, method: 'GET', headers: api_headers(), }); if (['running', 'building'].includes(snapshot_response.data?.status)) { console.error(`[web_data_${id}] snapshot not ready, ` +`polling again (attempt ` +`${attempts + 1}/${max_attempts})`); attempts++; await new Promise(resolve=>setTimeout(resolve, 1000)); continue; } console.error(`[web_data_${id}] snapshot data received ` +`after ${attempts + 1} attempts`); let result_data = JSON.stringify(snapshot_response.data); return result_data; } catch(e){ console.error(`[web_data_${id}] polling error: ` +`${e.message}`); attempts++; await new Promise(resolve=>setTimeout(resolve, 1000)); } } throw new Error(`Timeout after ${max_attempts} seconds waiting ` +`for data`); }),
- server.js:676-682 (schema)Dynamically generates the input schema using Zod for web_data_x_posts based on dataset inputs ['url'], resulting in {url: z.string().url()}.let parameters = {}; for (let input of inputs) { let param_schema = input=='url' ? z.string().url() : z.string(); parameters[input] = defaults[input] !== undefined ? param_schema.default(defaults[input]) : param_schema; }
- server.js:606-615 (registration)Dataset configuration for 'x_posts' that is used to create and register the 'web_data_x_posts' tool, including description, dataset_id, and input schema definition.{ id: 'x_posts', dataset_id: 'gd_lwxkxvnf1cynvib9co', description: [ 'Quickly read structured X post data.', 'Requires a valid X post URL.', 'This can be a cache lookup, so it can be more reliable than scraping', ].join('\n'), inputs: ['url'], },
- server.js:674-745 (registration)The loop that processes each dataset entry, including 'x_posts', and registers the corresponding web_data_* tool with name `web_data_${id}`, schema, description, and handler.for (let {dataset_id, id, description, inputs, defaults = {}} of datasets) { let parameters = {}; for (let input of inputs) { let param_schema = input=='url' ? z.string().url() : z.string(); parameters[input] = defaults[input] !== undefined ? param_schema.default(defaults[input]) : param_schema; } addTool({ name: `web_data_${id}`, description, parameters: z.object(parameters), execute: tool_fn(`web_data_${id}`, async(data, ctx)=>{ let trigger_response = await axios({ url: 'https://api.brightdata.com/datasets/v3/trigger', params: {dataset_id, include_errors: true}, method: 'POST', data: [data], headers: api_headers(), }); if (!trigger_response.data?.snapshot_id) throw new Error('No snapshot ID returned from request'); let snapshot_id = trigger_response.data.snapshot_id; console.error(`[web_data_${id}] triggered collection with ` +`snapshot ID: ${snapshot_id}`); let max_attempts = 600; let attempts = 0; while (attempts < max_attempts) { try { if (ctx && ctx.reportProgress) { await ctx.reportProgress({ progress: attempts, total: max_attempts, message: `Polling for data (attempt ` +`${attempts + 1}/${max_attempts})`, }); } let snapshot_response = await axios({ url: `https://api.brightdata.com/datasets/v3` +`/snapshot/${snapshot_id}`, params: {format: 'json'}, method: 'GET', headers: api_headers(), }); if (['running', 'building'].includes(snapshot_response.data?.status)) { console.error(`[web_data_${id}] snapshot not ready, ` +`polling again (attempt ` +`${attempts + 1}/${max_attempts})`); attempts++; await new Promise(resolve=>setTimeout(resolve, 1000)); continue; } console.error(`[web_data_${id}] snapshot data received ` +`after ${attempts + 1} attempts`); let result_data = JSON.stringify(snapshot_response.data); return result_data; } catch(e){ console.error(`[web_data_${id}] polling error: ` +`${e.message}`); attempts++; await new Promise(resolve=>setTimeout(resolve, 1000)); } } throw new Error(`Timeout after ${max_attempts} seconds waiting ` +`for data`); }), }); }