crawl-website.ts•1.85 kB
import { createAction, Property } from '@activepieces/pieces-framework';
import { httpClient, HttpMethod } from '@activepieces/pieces-common';
import { dumplingAuth } from '../../index';
export const crawlWebsite = createAction({
name: 'crawl_website',
auth: dumplingAuth,
displayName: 'Crawl Website',
description: 'Crawl a website and return structured content from multiple pages.',
props: {
url: Property.ShortText({
displayName: 'URL',
required: true,
description: 'The website URL to crawl.',
}),
limit: Property.Number({
displayName: 'Page Limit',
required: false,
defaultValue: 5,
description: 'Maximum number of pages to crawl.',
}),
depth: Property.Number({
displayName: 'Crawl Depth',
required: false,
defaultValue: 2,
description: 'Depth of crawling (distance between base URL path and sub paths).',
}),
format: Property.StaticDropdown({
displayName: 'Output Format',
required: false,
defaultValue: 'markdown',
options: {
options: [
{ label: 'Markdown', value: 'markdown' },
{ label: 'Text', value: 'text' },
{ label: 'Raw', value: 'raw' },
],
},
description: 'Format of the output content.',
}),
},
async run(context) {
const { url, limit, depth, format } = context.propsValue;
const requestBody: Record<string, any> = {
url,
};
// Add optional parameters if provided
if (limit !== undefined) requestBody['limit'] = limit;
if (depth !== undefined) requestBody['depth'] = depth;
if (format) requestBody['format'] = format;
const response = await httpClient.sendRequest({
method: HttpMethod.POST,
url: 'https://app.dumplingai.com/api/v1/crawl',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${context.auth}`,
},
body: requestBody,
});
return response.body;
},
});