ingestSitemap
Extract and ingest website content using sitemap.xml. Apply path filtering, link limits, and custom chunking for structured data integration with AI models.
Instructions
Ingests content from a website using its sitemap.xml. Supports path filtering and link limits.
Input Schema
| Name | Required | Description | Default | 
|---|---|---|---|
| ingestConfig | Yes | ||
| namespaceId | No | ||
| tenantId | No | 
Input Schema (JSON Schema)
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "additionalProperties": false,
  "properties": {
    "ingestConfig": {
      "additionalProperties": false,
      "properties": {
        "chunkConfig": {
          "additionalProperties": false,
          "description": "Optional Chunk config. When not passed, default chunk config will be used.",
          "properties": {
            "chunkOverlap": {
              "type": "number"
            },
            "chunkSize": {
              "type": "number"
            }
          },
          "required": [
            "chunkSize",
            "chunkOverlap"
          ],
          "type": "object"
        },
        "config": {
          "additionalProperties": false,
          "properties": {
            "excludePaths": {
              "items": {
                "type": "string"
              },
              "type": "array"
            },
            "includePaths": {
              "items": {
                "type": "string"
              },
              "type": "array"
            },
            "maxLinks": {
              "type": "number"
            },
            "metadata": {
              "additionalProperties": {
                "anyOf": [
                  {
                    "type": "string"
                  },
                  {
                    "items": {
                      "type": "string"
                    },
                    "type": "array"
                  }
                ]
              },
              "type": "object"
            },
            "url": {
              "type": "string"
            }
          },
          "required": [
            "url"
          ],
          "type": "object"
        },
        "source": {
          "const": "SITEMAP",
          "type": "string"
        }
      },
      "required": [
        "source",
        "config"
      ],
      "type": "object"
    },
    "namespaceId": {
      "type": "string"
    },
    "tenantId": {
      "type": "string"
    }
  },
  "required": [
    "ingestConfig"
  ],
  "type": "object"
}