Skip to main content
Glama

layout.batch_ingest

Process multiple URLs simultaneously for layout analysis with configurable concurrency and error handling options.

Instructions

Batch ingest multiple URLs for layout analysis. Processes URLs in parallel with configurable concurrency. Supports skip/abort modes for error handling.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
urlsYesArray of URLs to ingest (1-100 items)
optionsNoBatch processing options

Implementation Reference

  • The main handler function `layoutBatchIngestHandler` which processes the batch ingest requests, manages concurrency, handles errors (skip/abort), and interacts with the database.
    export async function layoutBatchIngestHandler(input: unknown): Promise<LayoutBatchIngestOutput> {
      const startTime = Date.now();
    
      // 開発環境でのログ出力
      if (isDevelopment()) {
        logger.info("[MCP Tool] layout.batch_ingest called", {
          urlCount: Array.isArray((input as Record<string, unknown>)?.urls)
            ? ((input as Record<string, unknown>).urls as unknown[]).length
            : 0,
        });
      }
    
      // 入力バリデーション
      let validated: LayoutBatchIngestInput;
      try {
        validated = layoutBatchIngestInputSchema.parse(input);
      } catch (error) {
        if (error instanceof ZodError) {
          const errorWithHints = createValidationErrorWithHints(error, "layout.batch_ingest");
          const detailedMessage = formatMultipleDetailedErrors(errorWithHints.errors);
          const formattedErrors = formatZodError(error);
    
          if (isDevelopment()) {
            logger.error("[MCP Tool] layout.batch_ingest validation error", {
              errors: errorWithHints.errors,
            });
          }
    
          return {
            success: false,
            error: {
              code: LAYOUT_MCP_ERROR_CODES.VALIDATION_ERROR,
              message: `Validation error:\n${detailedMessage}`,
              details: {
                errors: formattedErrors,
                detailedErrors: errorWithHints.errors,
              },
            },
          };
        }
        throw error;
      }
    
      // オプションの取得
      const concurrency = validated.options?.concurrency ?? 5;
      const onError = validated.options?.on_error ?? "skip";
      const saveToDb = validated.options?.save_to_db ?? true;
      const autoAnalyze = validated.options?.auto_analyze ?? true;
    
      const jobId = uuidv7();
      const results: BatchIngestResultItem[] = [];
      let completed = 0;
      let failed = 0;
      let totalPatterns = 0;
    
      if (isDevelopment()) {
        logger.info("[MCP Tool] layout.batch_ingest starting", {
          jobId,
          urlCount: validated.urls.length,
          concurrency,
          onError,
          saveToDb,
          autoAnalyze,
        });
      }
    
      // on_error: 'abort' モードの場合は順次処理
      if (onError === "abort") {
        for (const url of validated.urls) {
          const result = await ingestSingleUrl(url, saveToDb, autoAnalyze);
          results.push(result);
    
          if (result.status === "success") {
            completed++;
            totalPatterns += result.patterns_extracted ?? 0;
          } else {
            failed++;
            // abortモードでは最初の失敗で中止
            const processingTimeMs = Date.now() - startTime;
    
            if (isDevelopment()) {
              logger.warn("[MCP Tool] layout.batch_ingest aborted", {
                jobId,
                failedUrl: url,
                error: result.error,
                completed,
                failed,
                processingTimeMs,
              });
            }
    
            return {
              success: false,
              error: {
                code: LAYOUT_MCP_ERROR_CODES.BATCH_ABORTED,
                message: `Batch processing aborted due to failure: ${url} - ${result.error}`,
                details: {
                  job_id: jobId,
                  completed,
                  failed,
                  results,
                  processing_time_ms: processingTimeMs,
                },
              },
            };
          }
        }
      } else {
        // on_error: 'skip' モードの場合は並列処理
        const tasks = validated.urls.map((url) => async (): Promise<BatchIngestResultItem> => {
          return await ingestSingleUrl(url, saveToDb, autoAnalyze);
        });
    
        const batchResults = await runWithConcurrencyLimit(tasks, concurrency);
    
        for (const result of batchResults) {
          results.push(result);
          if (result.status === "success") {
            completed++;
            totalPatterns += result.patterns_extracted ?? 0;
          } else {
            failed++;
          }
        }
      }
    
      const processingTimeMs = Date.now() - startTime;
      const successRate =
        validated.urls.length > 0 ? Math.round((completed / validated.urls.length) * 10000) / 100 : 0;
    
      if (isDevelopment()) {
        logger.info("[MCP Tool] layout.batch_ingest completed", {
          jobId,
          total: validated.urls.length,
          completed,
          failed,
          successRate,
          totalPatterns,
          processingTimeMs,
        });
      }
    
      return {
        success: true,
        data: {
          job_id: jobId,
          total: validated.urls.length,
          completed,
          failed,
          results,
          summary: {
            success_rate: successRate,
            total_patterns: totalPatterns,
            processing_time_ms: processingTimeMs,
          },
        },
      };
    }
  • The MCP tool definition object `layoutBatchIngestToolDefinition`, which includes the schema and metadata for the `layout.batch_ingest` tool.
    export const layoutBatchIngestToolDefinition = {
      name: "layout.batch_ingest",
      description:
        "Batch ingest multiple URLs for layout analysis. Processes URLs in parallel with configurable concurrency. Supports skip/abort modes for error handling.",
      annotations: {
        title: "Layout Batch Ingest",
        readOnlyHint: false,
        idempotentHint: false,
        openWorldHint: true,
      },
      inputSchema: {
        type: "object" as const,
        properties: {
          urls: {
            type: "array",
            description: "Array of URLs to ingest (1-100 items)",
            items: {
              type: "string",
              format: "uri",
            },
            minItems: 1,
            maxItems: 100,
          },
          options: {
            type: "object",
            description: "Batch processing options",
            properties: {
              concurrency: {
                type: "number",
                description: "Number of concurrent requests (1-10, default: 5)",
                minimum: 1,
                maximum: 10,
                default: 5,
              },
              on_error: {
                type: "string",
                enum: ["skip", "abort"],
                description:
                  "Error handling mode: skip (continue on error) or abort (stop on first error). Default: skip",
                default: "skip",
              },
              save_to_db: {
                type: "boolean",
                description: "Save to WebPage table (default: true)",
                default: true,
              },
              auto_analyze: {
                type: "boolean",
                description:
                  "Auto-analyze HTML and save SectionPattern with embeddings (default: true)",
                default: true,
              },
            },
          },
        },
        required: ["urls"],
      },
    };

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/TKMD/reftrix-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server