ingestUrls
Extract and process content from multiple URLs with custom scraping options and metadata for efficient content management in the SourceSync.ai MCP Server.
Instructions
Ingests content from a list of URLs. Supports scraping options and metadata.
Input Schema
Name | Required | Description | Default |
---|---|---|---|
ingestConfig | Yes | ||
namespaceId | No | ||
tenantId | No |
Input Schema (JSON Schema)
{
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
"properties": {
"ingestConfig": {
"additionalProperties": false,
"properties": {
"chunkConfig": {
"additionalProperties": false,
"description": "Optional Chunk config. When not passed, default chunk config will be used.",
"properties": {
"chunkOverlap": {
"type": "number"
},
"chunkSize": {
"type": "number"
}
},
"required": [
"chunkSize",
"chunkOverlap"
],
"type": "object"
},
"config": {
"additionalProperties": false,
"properties": {
"metadata": {
"additionalProperties": {
"anyOf": [
{
"type": "string"
},
{
"items": {
"type": "string"
},
"type": "array"
}
]
},
"type": "object"
},
"scrapeOptions": {
"additionalProperties": false,
"properties": {
"excludeSelectors": {
"items": {
"type": "string"
},
"type": "array"
},
"includeSelectors": {
"items": {
"type": "string"
},
"type": "array"
}
},
"type": "object"
},
"urls": {
"items": {
"type": "string"
},
"type": "array"
}
},
"required": [
"urls"
],
"type": "object"
},
"source": {
"const": "URLS_LIST",
"type": "string"
}
},
"required": [
"source",
"config"
],
"type": "object"
},
"namespaceId": {
"type": "string"
},
"tenantId": {
"type": "string"
}
},
"required": [
"ingestConfig"
],
"type": "object"
}