Skip to main content
Glama
crawler_factory.py2.56 kB
import os from typing import Optional, Type, Dict from base import AbstractCrawler from utils import load_single_subclass_from_source, logger, singleton @singleton class CrawlerFactory: def __init__(self) -> None: self.crawler_module_extensions: Dict[str, AbstractCrawler] = {} self.crawler_setup_source: Dict[str, bool] = {} self.get_crawlers() def get_crawlers(self): current_path = os.path.abspath(__file__) crawler_module_path = os.path.dirname(current_path) crawler_module_dir_paths = [ os.path.join(crawler_module_path, crawler_module_dir) for crawler_module_dir in os.listdir(crawler_module_path) if not crawler_module_dir.startswith("__") and os.path.isdir(os.path.join(crawler_module_path, crawler_module_dir)) ] for crawler_module_dir_path in crawler_module_dir_paths: base_module_name = "crawler" crawler_module_name = os.path.basename(crawler_module_dir_path) file_names = os.listdir(crawler_module_dir_path) if (base_module_name + ".py") not in file_names: logger.info(f"Missing {base_module_name}.py file in {crawler_module_dir_path}, Skip.") continue py_path = os.path.join(crawler_module_dir_path, base_module_name + ".py") crawler_module_class = load_single_subclass_from_source( module_name=f"extension.{crawler_module_name}.{base_module_name}", script_path=py_path, parent_type=AbstractCrawler, ) if not crawler_module_class: logger.warning(f"Missing module Provider Class that extends moduleProvider in {py_path}, Skip.") continue self.crawler_module_extensions.update({crawler_module_name: crawler_module_class}) self.crawler_setup_source.update({crawler_module_name: False}) def create_crawler_instance(module_name: str) -> AbstractCrawler: crawler_module_class: Optional[Type[AbstractCrawler]] = CrawlerFactory().crawler_module_extensions.get(module_name) if not crawler_module_class: logger.error("Warning: Publish module not defined for this source...") return crawler_module_class() def get_crawler_setup_source() -> Dict[str, bool]: return CrawlerFactory().crawler_setup_source def get_crawler_reset_source() -> Dict[str, bool]: for key in get_crawler_setup_source(): get_crawler_setup_source()[key] = False return get_crawler_setup_source()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Cyanty/Arcs-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server