layout.batch_ingest
Process multiple URLs simultaneously for layout analysis with configurable concurrency and error handling options.
Instructions
Batch ingest multiple URLs for layout analysis. Processes URLs in parallel with configurable concurrency. Supports skip/abort modes for error handling.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| urls | Yes | Array of URLs to ingest (1-100 items) | |
| options | No | Batch processing options |
Implementation Reference
- The main handler function `layoutBatchIngestHandler` which processes the batch ingest requests, manages concurrency, handles errors (skip/abort), and interacts with the database.
export async function layoutBatchIngestHandler(input: unknown): Promise<LayoutBatchIngestOutput> { const startTime = Date.now(); // 開発環境でのログ出力 if (isDevelopment()) { logger.info("[MCP Tool] layout.batch_ingest called", { urlCount: Array.isArray((input as Record<string, unknown>)?.urls) ? ((input as Record<string, unknown>).urls as unknown[]).length : 0, }); } // 入力バリデーション let validated: LayoutBatchIngestInput; try { validated = layoutBatchIngestInputSchema.parse(input); } catch (error) { if (error instanceof ZodError) { const errorWithHints = createValidationErrorWithHints(error, "layout.batch_ingest"); const detailedMessage = formatMultipleDetailedErrors(errorWithHints.errors); const formattedErrors = formatZodError(error); if (isDevelopment()) { logger.error("[MCP Tool] layout.batch_ingest validation error", { errors: errorWithHints.errors, }); } return { success: false, error: { code: LAYOUT_MCP_ERROR_CODES.VALIDATION_ERROR, message: `Validation error:\n${detailedMessage}`, details: { errors: formattedErrors, detailedErrors: errorWithHints.errors, }, }, }; } throw error; } // オプションの取得 const concurrency = validated.options?.concurrency ?? 5; const onError = validated.options?.on_error ?? "skip"; const saveToDb = validated.options?.save_to_db ?? true; const autoAnalyze = validated.options?.auto_analyze ?? true; const jobId = uuidv7(); const results: BatchIngestResultItem[] = []; let completed = 0; let failed = 0; let totalPatterns = 0; if (isDevelopment()) { logger.info("[MCP Tool] layout.batch_ingest starting", { jobId, urlCount: validated.urls.length, concurrency, onError, saveToDb, autoAnalyze, }); } // on_error: 'abort' モードの場合は順次処理 if (onError === "abort") { for (const url of validated.urls) { const result = await ingestSingleUrl(url, saveToDb, autoAnalyze); results.push(result); if (result.status === "success") { completed++; totalPatterns += result.patterns_extracted ?? 0; } else { failed++; // abortモードでは最初の失敗で中止 const processingTimeMs = Date.now() - startTime; if (isDevelopment()) { logger.warn("[MCP Tool] layout.batch_ingest aborted", { jobId, failedUrl: url, error: result.error, completed, failed, processingTimeMs, }); } return { success: false, error: { code: LAYOUT_MCP_ERROR_CODES.BATCH_ABORTED, message: `Batch processing aborted due to failure: ${url} - ${result.error}`, details: { job_id: jobId, completed, failed, results, processing_time_ms: processingTimeMs, }, }, }; } } } else { // on_error: 'skip' モードの場合は並列処理 const tasks = validated.urls.map((url) => async (): Promise<BatchIngestResultItem> => { return await ingestSingleUrl(url, saveToDb, autoAnalyze); }); const batchResults = await runWithConcurrencyLimit(tasks, concurrency); for (const result of batchResults) { results.push(result); if (result.status === "success") { completed++; totalPatterns += result.patterns_extracted ?? 0; } else { failed++; } } } const processingTimeMs = Date.now() - startTime; const successRate = validated.urls.length > 0 ? Math.round((completed / validated.urls.length) * 10000) / 100 : 0; if (isDevelopment()) { logger.info("[MCP Tool] layout.batch_ingest completed", { jobId, total: validated.urls.length, completed, failed, successRate, totalPatterns, processingTimeMs, }); } return { success: true, data: { job_id: jobId, total: validated.urls.length, completed, failed, results, summary: { success_rate: successRate, total_patterns: totalPatterns, processing_time_ms: processingTimeMs, }, }, }; } - The MCP tool definition object `layoutBatchIngestToolDefinition`, which includes the schema and metadata for the `layout.batch_ingest` tool.
export const layoutBatchIngestToolDefinition = { name: "layout.batch_ingest", description: "Batch ingest multiple URLs for layout analysis. Processes URLs in parallel with configurable concurrency. Supports skip/abort modes for error handling.", annotations: { title: "Layout Batch Ingest", readOnlyHint: false, idempotentHint: false, openWorldHint: true, }, inputSchema: { type: "object" as const, properties: { urls: { type: "array", description: "Array of URLs to ingest (1-100 items)", items: { type: "string", format: "uri", }, minItems: 1, maxItems: 100, }, options: { type: "object", description: "Batch processing options", properties: { concurrency: { type: "number", description: "Number of concurrent requests (1-10, default: 5)", minimum: 1, maximum: 10, default: 5, }, on_error: { type: "string", enum: ["skip", "abort"], description: "Error handling mode: skip (continue on error) or abort (stop on first error). Default: skip", default: "skip", }, save_to_db: { type: "boolean", description: "Save to WebPage table (default: true)", default: true, }, auto_analyze: { type: "boolean", description: "Auto-analyze HTML and save SectionPattern with embeddings (default: true)", default: true, }, }, }, }, required: ["urls"], }, };