Crawl4AI MCP Server

TODO.json•28.8 kB

{ "project": "crawl4ai-mcp", "tasks": [ { "id": "task-env-setup", "title": "Environment Setup and Dependencies", "description": "Set up Python virtual environment and install required dependencies for Crawl4AI MCP server", "mode": "DEVELOPMENT", "priority": "high", "status": "completed", "success_criteria": [ "Python virtual environment created with Python 3.10+", "FastMCP and Crawl4AI dependencies installed successfully", "Playwright browser binaries downloaded and configured", "Project directory structure established" ], "important_files": [ "requirements.txt", "pyproject.toml", "crawl4ai_mcp_server.py" ], "estimate": "1-2 hours", "requires_research": false, "created_at": "2025-07-29T20:03:48.171Z" }, { "id": "linter_task_active", "title": "Fix Linter Errors - IMMEDIATE", "description": "Fix 2 errors and 0 warnings found in recently edited files: test_tools.py", "mode": "DEVELOPMENT", "priority": "high", "status": "completed", "important_files": [ "development/linter-errors.md", "tests/test_tools.py" ], "success_criteria": [ "All linter errors in edited files resolved", "development/linter-errors.md shows no issues for edited files", "Code passes linting without warnings or errors" ], "created_at": "2025-07-29T23:08:18.568Z", "is_linter_task": true, "linter_summary": { "total_violations": 2, "errors": 2, "warnings": 0, "files_affected": 1 } }, { "id": "task-main-server", "title": "Create Main MCP Server File", "description": "Implement the basic FastMCP server boilerplate with proper logging configuration to prevent stdio corruption", "mode": "DEVELOPMENT", "priority": "high", "status": "completed", "dependencies": [ "task-env-setup" ], "success_criteria": [ "crawl4ai_mcp_server.py file created with FastMCP boilerplate", "Python logging configured to direct output to stderr", "Server starts without errors as empty MCP server", "Proper async/await patterns implemented" ], "important_files": [ "crawl4ai_mcp_server.py" ], "estimate": "2-3 hours", "requires_research": false, "created_at": "2025-07-29T20:03:48.171Z" }, { "id": "task-page-structure-tool", "title": "Implement get_page_structure Tool", "description": "Create the fundamental tool that fetches webpage structural content for client AI analysis", "mode": "DEVELOPMENT", "priority": "high", "status": "completed", "dependencies": [ "task-main-server" ], "success_criteria": [ "get_page_structure tool accepts url parameter", "AsyncWebCrawler integration working correctly", "Returns cleaned HTML or raw markdown as string", "Proper error handling for failed requests", "Tool documentation with clear parameter descriptions" ], "important_files": [ "crawl4ai_mcp_server.py" ], "estimate": "3-4 hours", "requires_research": false, "created_at": "2025-07-29T20:03:48.171Z" }, { "id": "task-schema-crawler-tool", "title": "Implement crawl_with_schema Tool", "description": "Create precision scraping tool that executes AI-generated extraction schemas", "mode": "DEVELOPMENT", "priority": "high", "status": "completed", "dependencies": [ "task-crawl-schema-extraction-research" ], "success_criteria": [ "crawl_with_schema tool accepts url and extraction_schema parameters", "JsonCssExtractionStrategy integration implemented", "Returns extracted data as JSON string", "Schema validation and error handling", "Comprehensive tool documentation" ], "important_files": [ "crawl4ai_mcp_server.py" ], "estimate": "4-5 hours", "requires_research": false, "created_at": "2025-07-29T20:03:48.171Z" }, { "id": "task-screenshot-tool", "title": "Implement take_screenshot Tool", "description": "Create media capture tool for webpage visual representation", "mode": "DEVELOPMENT", "priority": "medium", "status": "completed", "dependencies": [ "task-screenshot-image-processing-research" ], "success_criteria": [ "take_screenshot tool accepts url parameter", "CrawlerRunConfig configured with screenshot=True", "Base64 screenshot decoded and returned as ImageContent", "FastMCP native image handling working", "Error handling for screenshot failures" ], "important_files": [ "crawl4ai_mcp_server.py" ], "estimate": "2-3 hours", "requires_research": false, "created_at": "2025-07-29T20:03:48.171Z" }, { "id": "task-mcp-testing", "title": "Interactive Testing with MCP Inspector", "description": "Validate all tools using fastmcp dev command and MCP Inspector interface", "mode": "TESTING", "priority": "high", "status": "completed", "dependencies": [ "task-screenshot-tool" ], "success_criteria": [ "fastmcp dev command runs server successfully", "All three tools testable through MCP Inspector", "get_page_structure returns valid HTML content", "crawl_with_schema processes test schemas correctly", "take_screenshot displays images in Inspector" ], "important_files": [ "crawl4ai_mcp_server.py" ], "estimate": "2-3 hours", "requires_research": false, "created_at": "2025-07-29T20:03:48.171Z" }, { "id": "task-unit-tests", "title": "Create Unit Test Suite", "description": "Implement pytest-based automated testing using in-memory FastMCP client", "mode": "TESTING", "priority": "medium", "status": "completed", "dependencies": [ "task-mcp-testing" ], "success_criteria": [ "pytest testing framework configured", "Unit tests for all three tools implemented", "In-memory fastmcp.Client used for testing", "Mock responses for external web requests", "Test coverage above 80% for core functionality" ], "important_files": [ "test_crawl4ai_mcp.py", "pytest.ini", "crawl4ai_mcp_server.py" ], "estimate": "3-4 hours", "requires_research": false, "created_at": "2025-07-29T20:03:48.172Z", "subtasks": [ "subtask-pytest-setup-1753828755652", "subtask-mock-infrastructure-17538287556521", "subtask-unit-tests-tools-17538287556522", "subtask-integration-tests-17538287556523" ] }, { "id": "task-documentation", "title": "Finalize Documentation and Integration", "description": "Create comprehensive documentation with integration instructions for Claude Desktop", "mode": "DEVELOPMENT", "priority": "medium", "status": "completed", "dependencies": [ "task-unit-tests" ], "success_criteria": [ "All tool docstrings complete with Annotated and Field descriptions", "README.md with setup instructions created", "Claude Desktop JSON configuration snippet provided", "Integration troubleshooting guide included", "Example usage scenarios documented" ], "important_files": [ "README.md", "crawl4ai_mcp_server.py", "INTEGRATION.md" ], "estimate": "2-3 hours", "requires_research": false, "created_at": "2025-07-29T20:03:48.172Z" }, { "id": "task-server-startup-validation", "title": "Validate Server Startup and Basic Operations", "description": "Ensure the MCP server starts without errors and all basic operations work correctly", "mode": "TESTING", "priority": "high", "status": "completed", "dependencies": [ "task-page-structure-tool" ], "success_criteria": [ "Server starts without errors using stdio transport", "server_status tool returns proper status information", "get_page_structure tool can fetch and return webpage content", "No import errors or runtime exceptions during startup", "Logging configuration prevents stdio corruption" ], "important_files": [ "crawl4ai_mcp_server.py" ], "estimate": "1-2 hours", "requires_research": false, "created_at": "2025-07-29T20:14:00.000Z" }, { "id": "task-crawl-schema-extraction-research", "title": "Research Crawl4AI Schema-Based Extraction Patterns", "description": "Investigate JsonCssExtractionStrategy and schema validation patterns for the crawl_with_schema tool", "mode": "RESEARCH", "priority": "high", "status": "pending", "dependencies": [ "task-server-startup-validation" ], "success_criteria": [ "JsonCssExtractionStrategy usage patterns documented", "Schema validation approaches researched and compared", "Error handling strategies for invalid schemas identified", "Performance considerations for large schema extractions analyzed", "Research report created with implementation recommendations" ], "important_files": [ "./development/research-reports/research-report-crawl-schema-extraction.md", "crawl4ai_mcp_server.py" ], "estimate": "2-3 hours", "requires_research": true, "created_at": "2025-07-29T20:14:00.000Z" }, { "id": "task-screenshot-image-processing-research", "title": "Research Screenshot and Image Processing Integration", "description": "Investigate FastMCP ImageContent handling and base64 screenshot processing for take_screenshot tool", "mode": "RESEARCH", "priority": "medium", "status": "pending", "dependencies": [ "task-schema-crawler-tool" ], "success_criteria": [ "FastMCP ImageContent usage patterns documented", "Base64 image processing and validation methods researched", "Screenshot configuration options in Crawl4AI analyzed", "Error handling for screenshot failures investigated", "Performance considerations for large image processing documented" ], "important_files": [ "./development/research-reports/research-report-screenshot-processing.md", "crawl4ai_mcp_server.py" ], "estimate": "1-2 hours", "requires_research": true, "created_at": "2025-07-29T20:14:30.000Z" }, { "id": "task-fastmcp-integration-validation", "title": "Validate FastMCP Integration and Tool Registration", "description": "Ensure all tools are properly registered with FastMCP and work correctly with MCP protocol", "mode": "TESTING", "priority": "high", "status": "pending", "dependencies": [ "task-screenshot-tool" ], "success_criteria": [ "All three tools (server_status, get_page_structure, crawl_with_schema, take_screenshot) register successfully", "Tool parameter validation works correctly with Pydantic Field annotations", "MCP Context integration functions properly for logging and progress", "Error responses follow MCP protocol specifications", "Tools can be invoked through MCP Inspector without issues" ], "important_files": [ "crawl4ai_mcp_server.py" ], "estimate": "2-3 hours", "requires_research": false, "created_at": "2025-07-29T20:14:30.000Z" }, { "id": "task-performance-optimization", "title": "Optimize Server Performance and Resource Usage", "description": "Implement performance optimizations for web crawling and ensure proper resource cleanup", "mode": "DEVELOPMENT", "priority": "medium", "status": "pending", "dependencies": [ "task-fastmcp-integration-validation" ], "success_criteria": [ "AsyncWebCrawler instances properly created and cleaned up", "Connection pooling and timeout configurations optimized", "Memory usage monitored and kept within reasonable bounds", "Concurrent request handling implemented safely", "Resource cleanup on server shutdown implemented" ], "important_files": [ "crawl4ai_mcp_server.py" ], "estimate": "2-3 hours", "requires_research": false, "created_at": "2025-07-29T20:14:30.000Z" }, { "id": "quality-improvement-1753819225479", "title": "Create Quality Improvement Tasks", "description": "Analyze project quality issues and create specific tasks to reach 100% quality for all strikes", "mode": "DEVELOPMENT", "priority": "high", "status": "pending", "prompt": "Project quality assessment shows issues preventing 100% strike success:\n\nQuality Status:\n- Strike 1 (Build): 100%\n- Strike 2 (Lint): 70% \n- Strike 3 (Tests): 100%\n\nIssues Found:\n- Lint and Code Quality: No ESLint configuration found\n\nTASK: Analyze these quality gaps and create specific improvement tasks to bring ALL strikes to 100% quality. Create tasks for:\n1. Build issues (missing dependencies, build failures, configuration)\n2. Code quality issues (lint errors, style violations, code standards)\n3. Testing issues (failing tests, missing coverage, test setup)\n\nInsert all improvement tasks BEFORE the strike review tasks. Strikes should always remain last in the task list.", "success_criteria": [ "All quality issues identified and analyzed", "Specific improvement tasks created for each quality gap", "Tasks properly prioritized and ordered before strikes", "Clear path to 100% quality established" ], "important_files": [ "package.json", "eslint.config.js", "**/*.test.js" ], "requires_research": true, "is_quality_improvement_task": true, "quality_analysis": { "strike1": { "name": "Build Verification", "quality": 100, "issues": [] }, "strike2": { "name": "Lint and Code Quality", "quality": 70, "issues": [ "No ESLint configuration found" ] }, "strike3": { "name": "Test Coverage and Success", "quality": 100, "issues": [] }, "overallReady": false } }, { "id": "review-strike-1", "mode": "REVIEWER", "description": "Review Strike 1: Ensure the project builds completely without errors", "prompt": "Perform a comprehensive code review with focus on: Ensure the project builds completely without errors\n\nCheck the entire codebase and ensure this criterion is met.\n\n## CRITICAL FAILURE RESPONSE PROTOCOL\n\nIF BUILD FAILS: Create specific TASK CREATION tasks in TODO.json to fix build issues:\n- Missing dependencies installation tasks\n- Build configuration setup tasks \n- Compilation error resolution tasks\n- Environment setup tasks\n- Build script creation tasks\n\nCRITICAL: Use TaskManager API to add these tasks immediately when build failures are detected.\n\n## SUCCESS CRITERIA\n- Mark this review task as completed ONLY if the criterion is fully met\n- If criterion fails, you MUST create remediation tasks using TaskManager API before marking review as completed\n- All new tasks should be actionable, specific, and include proper dependencies/important_files parameters\n\n## TASK CREATION REQUIREMENT\nWhen creating remediation tasks, ensure each task includes:\n- Clear success criteria\n- Appropriate mode (DEVELOPMENT/TESTING/REFACTORING)\n- Specific file dependencies\n- Realistic time estimates\n- High priority for critical issues\n\nUse the task-creation.md guidelines for optimal task structure.", "dependencies": [], "important_files": [], "status": "pending", "requires_research": false, "subtasks": [], "is_review_task": true, "strike_number": 1 }, { "id": "review-strike-2", "mode": "REVIEWER", "description": "Review Strike 2: Verify no lint errors exist in the codebase", "prompt": "Perform a comprehensive code review with focus on: Verify no lint errors exist in the codebase\n\nCheck the entire codebase and ensure this criterion is met.\n\n## CRITICAL FAILURE RESPONSE PROTOCOL\n\nIF LINT ERRORS FOUND: Create specific TASK CREATION tasks in TODO.json to achieve zero lint errors:\n- Linting tool setup and configuration tasks\n- Code style correction tasks\n- Import organization tasks\n- Naming convention fixes tasks\n- Dead code removal tasks\n\nCRITICAL: Use TaskManager API to add these tasks immediately when lint errors are detected.\n\n## SUCCESS CRITERIA\n- Mark this review task as completed ONLY if the criterion is fully met\n- If criterion fails, you MUST create remediation tasks using TaskManager API before marking review as completed\n- All new tasks should be actionable, specific, and include proper dependencies/important_files parameters\n\n## TASK CREATION REQUIREMENT\nWhen creating remediation tasks, ensure each task includes:\n- Clear success criteria\n- Appropriate mode (DEVELOPMENT/TESTING/REFACTORING)\n- Specific file dependencies\n- Realistic time estimates\n- High priority for critical issues\n\nUse the task-creation.md guidelines for optimal task structure.", "dependencies": [], "important_files": [], "status": "pending", "requires_research": false, "subtasks": [], "is_review_task": true, "strike_number": 2 }, { "id": "review-strike-3", "mode": "REVIEWER", "description": "Review Strike 3: Confirm test coverage is 100% on important modules and 90%+ on others, with all tests passing", "prompt": "Perform a comprehensive code review with focus on: Confirm test coverage is 100% on important modules and 90%+ on others, with all tests passing\n\nCheck the entire codebase and ensure this criterion is met.\n\n## CRITICAL FAILURE RESPONSE PROTOCOL\n\nIF TEST COVERAGE INSUFFICIENT: Create specific TASK CREATION tasks in TODO.json to achieve required coverage:\n- Test framework setup tasks (Jest/Mocha/Vitest)\n- Unit test creation tasks for all modules\n- Integration test development tasks\n- Test coverage reporting setup tasks\n- CI/CD test integration tasks\n\nCRITICAL: Use TaskManager API to add these tasks immediately when coverage is below requirements.\n\n## SUCCESS CRITERIA\n- Mark this review task as completed ONLY if the criterion is fully met\n- If criterion fails, you MUST create remediation tasks using TaskManager API before marking review as completed\n- All new tasks should be actionable, specific, and include proper dependencies/important_files parameters\n\n## TASK CREATION REQUIREMENT\nWhen creating remediation tasks, ensure each task includes:\n- Clear success criteria\n- Appropriate mode (DEVELOPMENT/TESTING/REFACTORING)\n- Specific file dependencies\n- Realistic time estimates\n- High priority for critical issues\n\nUse the task-creation.md guidelines for optimal task structure.", "dependencies": [], "important_files": [], "status": "pending", "requires_research": false, "subtasks": [], "is_review_task": true, "strike_number": 3 }, { "id": "task-missing-dependencies-1753828182128", "title": "Install Missing Python Dependencies", "description": "Install required Python packages for Crawl4AI MCP server including fastmcp, crawl4ai, and pytest dependencies", "mode": "DEVELOPMENT", "priority": "high", "status": "pending", "success_criteria": [ "requirements.txt file exists with all dependencies", "All Python packages installed successfully", "No import errors when running server", "Development environment fully configured" ], "important_files": [ "requirements.txt", "crawl4ai_mcp_server.py" ], "estimate": "1-2 hours", "requires_research": false, "created_at": "2025-07-29T22:29:42.129Z" }, { "id": "task-project-validation-1753828182129", "title": "Validate Complete Project Functionality", "description": "Run comprehensive validation of the Crawl4AI MCP server to ensure all components work correctly", "mode": "TESTING", "priority": "high", "status": "pending", "success_criteria": [ "MCP server starts without errors", "All tools (get_page_structure, crawl_with_schema, take_screenshot) functional", "MCP Inspector can interact with all tools successfully", "No critical issues in development/linter-errors.md", "Project ready for production use" ], "important_files": [ "crawl4ai_mcp_server.py", "development/linter-errors.md" ], "estimate": "2-3 hours", "requires_research": false, "created_at": "2025-07-29T22:29:42.129Z" }, { "id": "task-final-documentation-1753828182129", "title": "Complete Project Documentation", "description": "Finalize all documentation including README, integration guides, and troubleshooting information", "mode": "DEVELOPMENT", "priority": "medium", "status": "pending", "success_criteria": [ "README.md with complete setup instructions", "Claude Desktop integration guide", "Tool usage examples and troubleshooting", "Development setup documentation", "All documentation professionally formatted" ], "important_files": [ "README.md", "INTEGRATION.md", "development/" ], "estimate": "2-3 hours", "requires_research": false, "created_at": "2025-07-29T22:29:42.129Z" }, { "id": "subtask-pytest-setup-1753828755652", "title": "Set up pytest testing framework and configuration", "description": "Configure pytest.ini, install test dependencies, and establish testing directory structure for the MCP server", "mode": "TESTING", "priority": "high", "status": "completed", "success_criteria": [ "pytest.ini configuration file created with proper settings", "Test dependencies installed (pytest, pytest-asyncio, pytest-mock)", "Testing directory structure established", "pytest command runs without configuration errors", "Test discovery working correctly" ], "important_files": [ "pytest.ini", "requirements.txt" ], "estimate": "1 hour", "requires_research": false, "created_at": "2025-07-29T22:39:15.652Z" }, { "id": "subtask-mock-infrastructure-17538287556521", "title": "Create mock infrastructure for external dependencies", "description": "Build comprehensive mocking infrastructure for Crawl4AI, AsyncWebCrawler, and external web requests to enable isolated unit testing", "mode": "TESTING", "priority": "high", "status": "completed", "success_criteria": [ "Mock classes for AsyncWebCrawler and crawl results created", "HTTP request mocking infrastructure established", "Playwright browser mocking implemented", "Test fixtures for common scenarios created", "Mock data generation utilities implemented" ], "important_files": [ "tests/conftest.py", "tests/mocks.py" ], "estimate": "2 hours", "requires_research": false, "created_at": "2025-07-29T22:39:15.652Z" }, { "id": "subtask-unit-tests-tools-17538287556522", "title": "Implement unit tests for all MCP tools", "description": "Create comprehensive unit tests for server_status, get_page_structure, crawl_with_schema, and take_screenshot tools using in-memory FastMCP client", "mode": "TESTING", "priority": "high", "status": "in_progress", "success_criteria": [ "Unit tests for server_status tool with all status scenarios", "Unit tests for get_page_structure tool with HTML/markdown formats", "Unit tests for crawl_with_schema tool with valid/invalid schemas", "Unit tests for take_screenshot tool with success/failure cases", "All tests use in-memory FastMCP client as specified", "Test coverage above 80% for all tool functions" ], "important_files": [ "tests/test_tools.py", "crawl4ai_mcp_server.py" ], "estimate": "2-3 hours", "requires_research": false, "created_at": "2025-07-29T22:39:15.652Z" }, { "id": "subtask-integration-tests-17538287556523", "title": "Create integration tests and coverage reporting", "description": "Implement integration tests for MCP protocol compliance and establish comprehensive test coverage reporting", "mode": "TESTING", "priority": "medium", "status": "pending", "success_criteria": [ "Integration tests for MCP protocol compliance implemented", "Test coverage reporting configured with pytest-cov", "Coverage threshold enforcement set to 80% minimum", "CI/CD compatible test output format configured", "Test performance benchmarks established" ], "important_files": [ "tests/test_integration.py", "pytest.ini", ".coveragerc" ], "estimate": "1-2 hours", "requires_research": false, "created_at": "2025-07-29T22:39:15.652Z" }, { "id": "task-linting-setup-1753833029261", "title": "Set up Python linting configuration", "description": "Configure ruff/black/isort for consistent Python code quality to address Strike 2 quality issues", "mode": "DEVELOPMENT", "priority": "high", "status": "pending", "success_criteria": [ "Python linting tools configured (ruff, black, isort)", "pyproject.toml updated with linting configuration", "All existing code passes linting checks", "Linting integrated into development workflow", "Strike 2 quality reaches 100%" ], "important_files": [ "pyproject.toml", "crawl4ai_mcp_server.py", ".ruff.toml" ], "estimate": "1-2 hours", "requires_research": false, "created_at": "2025-07-29T23:50:29.261Z" }, { "id": "task-performance-monitoring-1753833029261", "title": "Add performance monitoring and optimization", "description": "Implement performance monitoring and optimize server resource usage for production readiness", "mode": "DEVELOPMENT", "priority": "medium", "status": "pending", "success_criteria": [ "Memory usage monitoring implemented", "Request timeout optimization", "Connection pooling configured", "Performance benchmarks established", "Resource cleanup on server shutdown" ], "important_files": [ "crawl4ai_mcp_server.py" ], "estimate": "2-3 hours", "requires_research": false, "created_at": "2025-07-29T23:50:29.261Z" }, { "id": "task-deployment-readiness-1753833029261", "title": "Validate deployment readiness and create deployment guide", "description": "Ensure project is production-ready with deployment documentation and validation checklist", "mode": "DEVELOPMENT", "priority": "medium", "status": "pending", "success_criteria": [ "Complete deployment checklist created", "Production deployment guide written", "Security best practices documented", "Performance benchmarks documented", "All integration tests pass" ], "important_files": [ "DEPLOYMENT.md", "README.md", "crawl4ai_mcp_server.py" ], "estimate": "2-3 hours", "requires_research": false, "created_at": "2025-07-29T23:50:29.261Z" } ], "review_strikes": 0, "strikes_completed_last_run": false, "current_task_index": 0, "last_mode": "RESEARCH", "execution_count": 222, "last_hook_activation": 1753833207351, "__removedLinterTasks": { "removedCount": 1, "finalTaskCount": 24 } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Nexus-Digital-Automations/crawl4ai-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server