paginated-extraction.ts•4.89 kB
import { HttpMethod } from '@activepieces/pieces-common';
import { createAction, Property } from '@activepieces/pieces-framework';
import { airtopAuth } from '../common/auth';
import { airtopApiCall } from '../common/client';
import { sessionId, windowId } from '../common/props';
import { propsValidation } from '@activepieces/pieces-common';
import { z } from 'zod';
export const paginatedExtractionAction = createAction({
	name: 'paginated-extraction',
	auth: airtopAuth,
	displayName: 'Paginated Extraction',
	description: 'Extract content from paginated or dynamically loaded pages.',
	props: {
		sessionId: sessionId,
		windowId: windowId,
		prompt: Property.LongText({
			displayName: 'Prompt',
			description: 'Instructions on what to extract and how to paginate (e.g. "Navigate through 3 pages and extract titles and prices").',
			required: true,
		}),
		clientRequestId: Property.ShortText({
			displayName: 'Client Request ID',
			description: 'Optional ID to track this request.',
			required: false,
		}),
		outputSchema: Property.LongText({
			displayName: 'Output Schema (JSON)',
			description: 'JSON schema defining the structure of the output. Must be valid JSON schema format.',
			required: false,
		}),
		scrollWithin: Property.ShortText({
			displayName: 'Scroll Within',
			description: 'Describe the scrollable area (e.g. "results container in middle of page").',
			required: false,
		}),
		paginationMode: Property.StaticDropdown({
			displayName: 'How to Load More Content',
			description: 'Choose how to navigate through pages (default: auto)',
			defaultValue: 'auto',
			required: false,
			options: {
				options: [
					{ label: 'Auto (Recommended)', value: 'auto' },
					{ label: 'Click Next/Previous Links', value: 'paginated' },
					{ label: 'Infinite Scroll', value: 'infinite-scroll' },
				],
			},
		}),
		interactionMode: Property.StaticDropdown({
			displayName: 'Speed vs Accuracy',
			description: 'Balance between speed and accuracy (default: auto)',
			defaultValue: 'auto',
			required: false,
			options: {
				options: [
					{ label: 'Auto (Balanced)', value: 'auto' },
					{ label: 'More Accurate (Slower)', value: 'accurate' },
					{ label: 'Faster (Less Accurate)', value: 'cost-efficient' },
				],
			},
		}),
		optimizeUrls: Property.StaticDropdown({
			displayName: 'Optimize URLs',
			description: 'Improve scraping performance by optimizing URLs (default: auto)',
			defaultValue: 'auto',
			required: false,
			options: {
				options: [
					{ label: 'Auto (Default)', value: 'auto' },
					{ label: 'Enabled', value: 'enabled' },
					{ label: 'Disabled', value: 'disabled' },
				],
			},
		}),
		costThresholdCredits: Property.Number({
			displayName: 'Maximum Credits to Spend',
			description: 'Abort if the credit cost exceeds this amount. Set to 0 to disable.',
			required: false,
		}),
		timeThresholdSeconds: Property.Number({
			displayName: 'Maximum Time (Seconds)',
			description: 'Abort if the operation takes longer than this. Set to 0 to disable.',
			required: false,
		}),
	},
	async run({ propsValue, auth }) {
		const {
			sessionId,
			windowId,
			prompt,
			clientRequestId,
			costThresholdCredits,
			timeThresholdSeconds,
			outputSchema,
			scrollWithin,
			paginationMode,
			interactionMode,
			optimizeUrls,
		} = propsValue;
		await propsValidation.validateZod(propsValue, {
			costThresholdCredits: z.number().min(0).optional(),
			timeThresholdSeconds: z.number().min(0).optional(),
			outputSchema: z.string().refine((val) => {
				if (!val) return true;
				try {
					JSON.parse(val);
					return true;
				} catch {
					return false;
				}
			}, { message: 'Must be valid JSON format' }).optional(),
		});
		const configuration: Record<string, any> = {};
		if (outputSchema) {
			configuration['outputSchema'] = outputSchema;
		}
		if (scrollWithin) {
			configuration['experimental'] = {
				scrollWithin,
			};
		}
		if (paginationMode !== 'auto') {
			configuration['paginationMode'] = paginationMode;
		}
		if (interactionMode !== 'auto') {
			configuration['interactionMode'] = interactionMode;
		}
		if (optimizeUrls !== 'auto') {
			configuration['scrape'] = {
				optimizeUrls,
			};
		}
		if (typeof costThresholdCredits === 'number') {
			configuration['costThresholdCredits'] = costThresholdCredits;
		}
		if (typeof timeThresholdSeconds === 'number') {
			configuration['timeThresholdSeconds'] = timeThresholdSeconds;
		}
		const body: Record<string, any> = {
			prompt,
		};
		if (clientRequestId) {
			body['clientRequestId'] = clientRequestId;
		}
		if (Object.keys(configuration).length > 0) {
			body['configuration'] = configuration;
		}
		const response = await airtopApiCall({
			apiKey: auth,
			method: HttpMethod.POST,
			resourceUri: `/sessions/${sessionId}/windows/${windowId}/paginated-extraction`,
			body,
		});
		return response;
	},
});