mcp-google-sheets

Overview Schema Related Servers Score Discussions

crawl.ts•10.2 KiB

import { createAction, Property, DynamicPropsValue, InputPropertyMap } from '@activepieces/pieces-framework'; import { httpClient, HttpMethod } from '@activepieces/pieces-common'; import { firecrawlAuth } from '../../index'; import { forScreenshotOutputFormat, forSimpleOutputFormat, forJsonOutputFormat, polling, downloadAndSaveCrawlScreenshots, FIRECRAWL_API_BASE_URL } from '../common/common'; function webhookConfig(useWebhook: boolean, webhookProperties: any): any { if (!useWebhook || !webhookProperties) { return null; } const webhookUrl = webhookProperties['webhookUrl']; if (!webhookUrl) { return null; } const webhook: Record<string, any> = { url: webhookUrl, }; if (webhookProperties['webhookHeaders']) { webhook['headers'] = webhookProperties['webhookHeaders']; } if (webhookProperties['webhookMetadata']) { webhook['metadata'] = webhookProperties['webhookMetadata']; } if (webhookProperties['webhookEvents'] && Array.isArray(webhookProperties['webhookEvents']) && webhookProperties['webhookEvents'].length > 0) { webhook['events'] = webhookProperties['webhookEvents']; } return webhook; } export const crawl = createAction({ auth: firecrawlAuth, name: 'crawl', displayName: 'Crawl', description: 'Crawl multiple pages from a website based on specified rules and patterns.', props: { url: Property.ShortText({ displayName: 'URL', description: 'The base URL to start crawling from.', required: true, }), prompt: Property.LongText({ displayName: 'Prompt', description: 'Describe what information you want to extract.', required: false, defaultValue: 'Get me all of the blog pages on the website, probably localed in /blog' }), limit: Property.Number({ displayName: 'Limit', description: 'Maximum number of pages to crawl. Default limit is 10.', required: false, defaultValue: 10, }), formats: Property.Dropdown({ displayName: 'Output Format', description: 'Choose what format you want your output in.', required: true, refreshers: [], options: async () => { return { options: [ { label: 'Markdown', value: 'markdown' }, { label: 'Summary', value: 'summary' }, { label: 'Links', value: 'links' }, { label: 'HTML', value: 'html' }, { label: 'Screenshot', value: 'screenshot' }, { label: 'JSON', value: 'json' }, ] }; }, defaultValue: 'markdown', }), onlyMainContent: Property.Checkbox({ displayName: 'Only Main Content', description: 'Only return the main content of the page, excluding headers, navs, footers, etc.', required: false, defaultValue: false, }), extractMode: Property.DynamicProperties({ displayName: 'Schema Mode', description: 'Data schema type.', required: false, refreshers: ['formats'], props: async (propsValue: Record<string, DynamicPropsValue>): Promise<InputPropertyMap> => { const format = propsValue['formats'] as unknown as string; if (format !== 'json') { return {}; } return { mode: Property.StaticDropdown<'simple' | 'advanced'>({ displayName: 'Data Schema Type', description: 'For complex schema, you can use advanced mode.', required: true, defaultValue: 'simple', options: { disabled: false, options: [ { label: 'Simple', value: 'simple' }, { label: 'Advanced', value: 'advanced' }, ], }, }), }; }, }), extractSchema: Property.DynamicProperties({ displayName: 'Data Definition', required: false, refreshers: ['formats', 'extractMode'], props: async (propsValue: Record<string, DynamicPropsValue>): Promise<InputPropertyMap> => { const mode = propsValue['extractMode']?.['mode'] as unknown as 'simple' | 'advanced'; const format = propsValue['formats'] as unknown as string; if (format !== 'json') { return {}; } if (mode === 'advanced') { return { fields: Property.Json({ displayName: 'JSON Schema', description: 'Learn more about JSON Schema here: https://json-schema.org/learn/getting-started-step-by-step', required: true, defaultValue: { type: 'object', properties: { name: { type: 'string', }, age: { type: 'number', }, }, required: ['name'], }, }), }; } return { fields: Property.Array({ displayName: 'Data Definition', required: true, properties: { name: Property.ShortText({ displayName: 'Name', description: 'Provide the name of the value you want to extract from the unstructured text. The name should be unique and short. ', required: true, }), description: Property.LongText({ displayName: 'Description', description: 'Brief description of the data, this hints for the AI on what to look for', required: false, }), type: Property.StaticDropdown({ displayName: 'Data Type', description: 'Type of parameter.', required: true, defaultValue: 'string', options: { disabled: false, options: [ { label: 'Text', value: 'string' }, { label: 'Number', value: 'number' }, { label: 'Boolean', value: 'boolean' }, ], }, }), isRequired: Property.Checkbox({ displayName: 'Fail if Not present?', required: true, defaultValue: false, }), }, }), }; }, }), timeout: Property.Number({ displayName: 'Timeout (seconds)', description: 'Timeout in seconds after which the task will be cancelled', required: false, defaultValue: 300, }), useWebhook: Property.Checkbox({ displayName: 'Deliver Results to Webhook', description: 'Enable to send crawl results to a webhook URL.', required: false, defaultValue: false, }), webhookProperties: Property.DynamicProperties({ displayName: 'Webhook Properties', description: 'Properties for webhook configuration.', required: false, refreshers: ['useWebhook'], props: async (propsValue: Record<string, DynamicPropsValue>): Promise<InputPropertyMap> => { const useWebhook = propsValue['useWebhook'] as unknown as boolean; if (!useWebhook) { return {}; } return { webhookUrl: Property.ShortText({ displayName: 'Webhook URL', description: 'The URL to send the webhook to. This will trigger for crawl started (crawl.started), every page crawled (crawl.page) and when the crawl is completed (crawl.completed or crawl.failed).', required: true, }), webhookHeaders: Property.Json({ displayName: 'Webhook Headers', description: 'Headers to send to the webhook URL.', required: false, defaultValue: {}, }), webhookMetadata: Property.Json({ displayName: 'Webhook Metadata', description: 'Custom metadata that will be included in all webhook payloads for this crawl.', required: false, defaultValue: {}, }), webhookEvents: Property.Array({ displayName: 'Webhook Events', description: 'Type of events that should be sent to the webhook URL. (default: all)', required: false, defaultValue: ['completed', 'page', 'failed', 'started'], }), }; }, }), }, async run(context) { const { auth, propsValue } = context; const body: Record<string, any> = { url: propsValue.url, sitemap: "include", crawlEntireDomain: false, maxDiscoveryDepth: 10, }; if (propsValue.limit !== undefined) { body['limit'] = propsValue.limit; } if (propsValue.prompt !== undefined) { body['prompt'] = propsValue.prompt } const scrapeOptions: Record<string, any> = {}; const format = propsValue.formats as string; if (format === 'screenshot') { scrapeOptions['formats'] = [forScreenshotOutputFormat()]; } else if (format === 'json') { const extractConfig = { mode: propsValue.extractMode?.['mode'], schema: propsValue.extractSchema }; const jsonFormat = forJsonOutputFormat(extractConfig); scrapeOptions['formats'] = [{ type: 'json', schema: jsonFormat.schema }]; } else { scrapeOptions['formats'] = [forSimpleOutputFormat(format)]; } if (propsValue.onlyMainContent !== undefined) { scrapeOptions['onlyMainContent'] = propsValue.onlyMainContent; } scrapeOptions['maxAge'] = 172800000; if (Object.keys(scrapeOptions).length > 0) { body['scrapeOptions'] = scrapeOptions; } const webhook = webhookConfig(propsValue.useWebhook || false, propsValue.webhookProperties); if (webhook) { body['webhook'] = webhook; } const response = await httpClient.sendRequest({ method: HttpMethod.POST, url: `${FIRECRAWL_API_BASE_URL}/crawl`, headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${auth}`, }, body: body, }); const jobId = response.body.id; // polling const timeoutSeconds = propsValue.timeout || 300; const result = await polling(jobId, auth, timeoutSeconds, 'crawl'); if (propsValue.formats === 'screenshot') { await downloadAndSaveCrawlScreenshots(result, context); } return result; }, });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/activepieces/activepieces'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

crawl.ts•10.2 KiB