export interface SpiderConfig {
maxDepth: number;
maxPages: number;
concurrency: number;
userAgent: string;
timeout: number;
retryAttempts: number;
cacheTTL: number;
respectRobotsTxt: boolean;
includePatterns: string[];
excludePatterns: string[];
extractSelectors?: {
content?: string;
title?: string;
navigation?: string;
};
}
export const defaultConfig: SpiderConfig = {
maxDepth: 3,
maxPages: 100,
concurrency: 5,
userAgent: 'SpiderMCP/1.0 Documentation Crawler',
timeout: 10000,
retryAttempts: 3,
cacheTTL: 86400000, // 24 hours
respectRobotsTxt: true,
includePatterns: [],
excludePatterns: ['/blog/', '/news/', '*.pdf', '*.zip'],
};