mcp-google-sheets

Overview Schema Related Servers Score Discussions

extract-file-data.ts•11.8 KiB

import { createAction, Property } from '@activepieces/pieces-framework'; import { httpClient, HttpMethod } from '@activepieces/pieces-common'; import { extractaAiAuth } from '../common/auth'; const SUPPORTED_FILE_TYPES = ['pdf', 'docx', 'doc', 'txt', 'jpeg', 'jpg', 'png', 'tiff', 'bmp']; interface ExtractionOptions { hasTable: boolean; hasVisuals: boolean; handwrittenTextRecognition: boolean; checkboxRecognition: boolean; longDocument: boolean; splitPdfPages: boolean; specificPageProcessing: boolean; specificPageProcessingOptions?: { from: number; to: number; }; } export const extractFileData = createAction({ auth: extractaAiAuth, name: 'extract_file_data', displayName: 'Extract File Data', description: 'Upload a file and immediately receive extracted content', props: { file: Property.File({ displayName: 'File', description: 'The file to upload for extraction. Supported: PDF, images (JPEG, PNG, TIFF, BMP), Word docs (DOC, DOCX), text files (TXT)', required: true }), name: Property.ShortText({ displayName: 'Extraction Name', description: 'A descriptive name for the extraction', required: true }), language: Property.StaticDropdown({ displayName: 'Language', description: "Document's language for accurate extraction. Extracta.ai supports multiple languages with advanced AI algorithms designed to parse linguistic nuances.", required: true, defaultValue: 'English', options: { options: [ { label: 'Multi-Lingual', value: 'Multi-Lingual' }, { label: 'Arabic', value: 'Arabic' }, { label: 'Bangla', value: 'Bangla' }, { label: 'Bulgarian', value: 'Bulgarian' }, { label: 'Croatian', value: 'Croatian' }, { label: 'Czech', value: 'Czech' }, { label: 'English', value: 'English' }, { label: 'Filipino', value: 'Filipino' }, { label: 'French', value: 'French' }, { label: 'German', value: 'German' }, { label: 'Hindi', value: 'Hindi' }, { label: 'Hungarian', value: 'Hungarian' }, { label: 'Italian', value: 'Italian' }, { label: 'Nepali', value: 'Nepali' }, { label: 'Polish', value: 'Polish' }, { label: 'Portuguese', value: 'Portuguese' }, { label: 'Romanian', value: 'Romanian' }, { label: 'Russian', value: 'Russian' }, { label: 'Serbian', value: 'Serbian' }, { label: 'Spanish', value: 'Spanish' }, { label: 'Turkish', value: 'Turkish' }, { label: 'Ukrainian', value: 'Ukrainian' }, { label: 'Urdu', value: 'Urdu' }, { label: 'Vietnamese', value: 'Vietnamese' } ] } }), fields: Property.Array({ displayName: 'Fields to Extract', description: 'Define the data fields you want to extract from documents', required: true, properties: { key: Property.ShortText({ displayName: 'Field Key', description: 'Unique identifier for this field (e.g., "name", "email")', required: true }), type: Property.StaticDropdown({ displayName: 'Field Type', description: 'Type of data to extract', required: true, defaultValue: 'string', options: { options: [ { label: 'String', value: 'string' }, { label: 'Object', value: 'object' }, { label: 'Array of Strings', value: 'array_string' }, { label: 'Array of Objects', value: 'array_object' } ] } }), description: Property.LongText({ displayName: 'Description', description: 'Describe what this field represents', required: true }), example: Property.ShortText({ displayName: 'Example (Optional)', description: 'Sample value to help the AI understand the expected format', required: false }), objectProperties: Property.Json({ displayName: 'Object Properties', description: 'For object types: Define nested fields as an array. Example: [{"key":"name","type":"string","description":"Full name","example":"John Doe"}]', required: false }), arrayItemProperties: Property.Json({ displayName: 'Array Item Properties', description: 'For array of objects: Define the structure of each array item as an array. Example: [{"key":"company","type":"string","description":"Company name","example":"Tech Corp"}]', required: false }) } }), description: Property.LongText({ displayName: 'Description', description: 'A description for the extraction', required: false }), hasTable: Property.Checkbox({ displayName: 'Has Table', description: 'Whether the document contains tables', required: false, defaultValue: false }), hasVisuals: Property.Checkbox({ displayName: 'Has Visuals', description: 'Whether the document contains charts, graphs, or diagrams', required: false, defaultValue: false }), handwrittenTextRecognition: Property.Checkbox({ displayName: 'Handwritten Text Recognition', description: 'Whether to recognize handwritten text', required: false, defaultValue: false }), checkboxRecognition: Property.Checkbox({ displayName: 'Checkbox Recognition', description: 'Whether to recognize checkboxes and their states', required: false, defaultValue: false }), longDocument: Property.Checkbox({ displayName: 'Long Document', description: 'Enable for very large or complex documents', required: false, defaultValue: false }), splitPdfPages: Property.Checkbox({ displayName: 'Split PDF Pages', description: 'Treat each PDF page as a separate extraction unit', required: false, defaultValue: false }), specificPageProcessing: Property.Checkbox({ displayName: 'Specific Page Processing', description: 'Extract only a specified range of pages', required: false, defaultValue: false }), pageFrom: Property.Number({ displayName: 'Page From', description: 'Starting page number (required if Specific Page Processing is enabled)', required: false }), pageTo: Property.Number({ displayName: 'Page To', description: 'Ending page number (required if Specific Page Processing is enabled)', required: false }) }, async run(context) { const apiKey = context.auth; const file = context.propsValue.file; const fileExtension = file.filename.split('.').pop()?.toLowerCase(); if (!fileExtension || !SUPPORTED_FILE_TYPES.includes(fileExtension)) { throw new Error( `File type not supported. Supported types: ${SUPPORTED_FILE_TYPES.join(', ')}` ); } if (context.propsValue.specificPageProcessing) { if (!context.propsValue.pageFrom || !context.propsValue.pageTo) { throw new Error( 'Page From and Page To are required when Specific Page Processing is enabled' ); } if (context.propsValue.pageFrom > context.propsValue.pageTo) { throw new Error('Page From must be less than or equal to Page To'); } } const options: ExtractionOptions = { hasTable: context.propsValue.hasTable ?? false, hasVisuals: context.propsValue.hasVisuals ?? false, handwrittenTextRecognition: context.propsValue.handwrittenTextRecognition ?? false, checkboxRecognition: context.propsValue.checkboxRecognition ?? false, longDocument: context.propsValue.longDocument ?? false, splitPdfPages: context.propsValue.splitPdfPages ?? false, specificPageProcessing: context.propsValue.specificPageProcessing ?? false }; if ( context.propsValue.specificPageProcessing && context.propsValue.pageFrom && context.propsValue.pageTo ) { options.specificPageProcessingOptions = { from: context.propsValue.pageFrom, to: context.propsValue.pageTo }; } if (!context.propsValue.fields || context.propsValue.fields.length === 0) { throw new Error('At least one field must be defined'); } const fieldKeys = context.propsValue.fields.map((f: any) => f.key); const duplicates = fieldKeys.filter( (key: string, index: number) => fieldKeys.indexOf(key) !== index ); if (duplicates.length > 0) { throw new Error(`Duplicate field keys found: ${duplicates.join(', ')}`); } const apiFields = (context.propsValue.fields || []).map((field: any) => { const baseField: any = { key: field.key, description: field.description, type: field.type === 'array_string' || field.type === 'array_object' ? 'array' : field.type }; if (field.example) { baseField.example = field.example; } if (field.type === 'object' && field.objectProperties) { baseField.properties = field.objectProperties; } if (field.type === 'array_string') { baseField.items = { type: 'string', example: field.example || 'example' }; } if (field.type === 'array_object' && field.arrayItemProperties) { baseField.items = { type: 'object', properties: field.arrayItemProperties }; } return baseField; }); const extractionDetails: any = { name: context.propsValue.name, language: context.propsValue.language, fields: apiFields, options }; if (context.propsValue.description) { extractionDetails.description = context.propsValue.description; } try { const createResponse = await httpClient.sendRequest({ method: HttpMethod.POST, url: 'https://api.extracta.ai/api/v1/createExtraction', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` }, body: { extractionDetails } }); const extractionId = createResponse.body.extractionId; if (!extractionId) { throw new Error('No extractionId returned from createExtraction'); } const formData = new FormData(); formData.append('extractionId', extractionId); const blob = new Blob([Buffer.from(file.base64, 'base64')], { type: file.extension ? `application/${file.extension}` : 'application/octet-stream' }); formData.append('files', blob, file.filename); const uploadResponse = await httpClient.sendRequest({ method: HttpMethod.POST, url: 'https://api.extracta.ai/api/v1/uploadFiles', headers: { Authorization: `Bearer ${apiKey}` }, body: formData }); return { extractionId, uploadResult: uploadResponse.body }; } catch (error: any) { if (error.response) { const status = error.response.status; const body = error.response.body; switch (status) { case 401: throw new Error('Authentication failed. Please check your API key.'); case 403: throw new Error( 'Access denied. Your API key may not have permission for this operation.' ); case 429: throw new Error('Rate limit exceeded. Please try again later.'); case 400: throw new Error( `Invalid request: ${body.message || JSON.stringify(body)}` ); default: throw new Error( `API error (${status}): ${body.message || 'Unknown error'}` ); } } throw new Error(`Request failed: ${error.message}`); } } });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/activepieces/activepieces'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

extract-file-data.ts•11.8 KiB