Dev Tool MCP

Overview Schema Related Servers Score Discussions

async_url_seeder.pyi•4.42 KiB

""" This type stub file was generated by pyright. """ import pathlib import httpx from datetime import timedelta from pathlib import Path from typing import Any, Dict, List, Optional, Sequence, TYPE_CHECKING, Union from .async_logger import AsyncLoggerBase from .async_configs import SeedingConfig """ async_url_seeder.py Fast async URL discovery for Crawl4AI Features -------- * Common-Crawl streaming via httpx.AsyncClient (HTTP/2, keep-alive) * robots.txt → sitemap chain (.gz + nested indexes) via async httpx * Per-domain CDX result cache on disk (~/.crawl4ai/<index>_<domain>_<hash>.jsonl) * Optional HEAD-only liveness check * Optional partial <head> download + meta parsing * Global hits-per-second rate-limit via asyncio.Semaphore * Concurrency in the thousands — fine on a single event-loop """ LXML = ... HAS_BROTLI = ... HAS_BM25 = ... if TYPE_CHECKING: ... COLLINFO_URL = ... TTL = ... _meta_rx = ... _charset_rx = ... _title_rx = ... _link_rx = ... class AsyncUrlSeeder: """ Async version of UrlSeeder. Call pattern is await/async for / async with. Public coroutines ----------------- await seed.urls(...) returns List[Dict[str,Any]] (url, status, head_data) await seed.many_urls(...) returns Dict[str, List[Dict[str,Any]]] await seed.close() closes the HTTP client if owned by seeder Usage examples -------------- # Manual cleanup: seeder = AsyncUrlSeeder() try: urls = await seeder.urls("example.com", config) finally: await seeder.close() # Using async context manager (recommended): async with AsyncUrlSeeder() as seeder: urls = await seeder.urls("example.com", config) # Reusing existing client: client = httpx.AsyncClient() seeder = AsyncUrlSeeder(client=client) urls = await seeder.urls("example.com", config) # No need to close seeder, as it doesn't own the client """ def __init__(self, ttl: timedelta = ..., client: Optional[httpx.AsyncClient] = ..., logger: Optional[AsyncLoggerBase] = ..., base_directory: Optional[Union[str, pathlib.Path]] = ..., cache_root: Optional[Union[str, Path]] = ...) -> None: ... async def urls(self, domain: str, config: SeedingConfig) -> List[Dict[str, Any]]: """ Fetch URLs for a domain using configuration from SeedingConfig. Parameters ---------- domain : str The domain to fetch URLs for (e.g., "example.com") config : SeedingConfig Configuration object containing all seeding parameters """ ... async def many_urls(self, domains: Sequence[str], config: SeedingConfig) -> Dict[str, List[Dict[str, Any]]]: """ Fetch URLs for many domains in parallel. Parameters ---------- domains : Sequence[str] List of domains to fetch URLs for config : SeedingConfig Configuration object containing all seeding parameters Returns a {domain: urls-list} dict. """ ... async def extract_head_for_urls(self, urls: List[str], config: Optional[SeedingConfig] = ..., concurrency: int = ..., timeout: int = ...) -> List[Dict[str, Any]]: """ Extract head content for a custom list of URLs using URLSeeder's parallel processing. This method reuses URLSeeder's efficient parallel processing, caching, and head extraction logic to process a custom list of URLs rather than discovering URLs from sources. Parameters ---------- urls : List[str] List of URLs to extract head content from config : SeedingConfig, optional Configuration object. If None, uses default settings for head extraction concurrency : int, default=10 Number of concurrent requests timeout : int, default=5 Timeout for each request in seconds Returns ------- List[Dict[str, Any]] List of dictionaries containing url, status, head_data, and optional relevance_score """ ... async def close(self): # -> None: """Close the HTTP client if we own it.""" ... async def __aenter__(self): # -> Self: """Async context manager entry.""" ... async def __aexit__(self, exc_type, exc_val, exc_tb): # -> Literal[False]: """Async context manager exit.""" ...

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/osins/dev-tool-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

async_url_seeder.pyi•4.42 KiB