mcp_server_webcrawl.crawlers.html•17.1 kB
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="./">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>mcp_server_webcrawl.crawlers package — mcp-server-webcrawl documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=e59714d7" />
<script src="_static/jquery.js?v=5d32c60e"></script>
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="_static/documentation_options.js?v=5929fcd5"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="mcp_server_webcrawl.crawlers.base package" href="mcp_server_webcrawl.crawlers.base.html" />
<link rel="prev" title="mcp_server_webcrawl package" href="mcp_server_webcrawl.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home">
mcp-server-webcrawl
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="guides.html">Setup Guides</a></li>
<li class="toctree-l1"><a class="reference internal" href="usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="prompts.html">Prompt Routines</a></li>
<li class="toctree-l1"><a class="reference internal" href="interactive.html">Interactive Mode</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="modules.html">mcp_server_webcrawl</a><ul class="current">
<li class="toctree-l2 current"><a class="reference internal" href="mcp_server_webcrawl.html">mcp_server_webcrawl package</a></li>
</ul>
</li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">mcp-server-webcrawl</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="modules.html">mcp_server_webcrawl</a></li>
<li class="breadcrumb-item"><a href="mcp_server_webcrawl.html">mcp_server_webcrawl package</a></li>
<li class="breadcrumb-item active">mcp_server_webcrawl.crawlers package</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/mcp_server_webcrawl.crawlers.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="mcp-server-webcrawl-crawlers-package">
<h1>mcp_server_webcrawl.crawlers package<a class="headerlink" href="#mcp-server-webcrawl-crawlers-package" title="Link to this heading"></a></h1>
<section id="subpackages">
<h2>Subpackages<a class="headerlink" href="#subpackages" title="Link to this heading"></a></h2>
<div class="toctree-wrapper compound">
<ul>
<li class="toctree-l1"><a class="reference internal" href="mcp_server_webcrawl.crawlers.base.html">mcp_server_webcrawl.crawlers.base package</a><ul>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.base.html#submodules">Submodules</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.base.html#module-mcp_server_webcrawl.crawlers.base.adapter">mcp_server_webcrawl.crawlers.base.adapter module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.base.html#module-mcp_server_webcrawl.crawlers.base.api">mcp_server_webcrawl.crawlers.base.api module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.base.html#module-mcp_server_webcrawl.crawlers.base.crawler">mcp_server_webcrawl.crawlers.base.crawler module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.base.html#module-mcp_server_webcrawl.crawlers.base.indexed">mcp_server_webcrawl.crawlers.base.indexed module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.base.html#module-mcp_server_webcrawl.crawlers.base.tests">mcp_server_webcrawl.crawlers.base.tests module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.base.html#module-mcp_server_webcrawl.crawlers.base">Module contents</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="mcp_server_webcrawl.crawlers.archivebox.html">mcp_server_webcrawl.crawlers.archivebox package</a><ul>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.archivebox.html#submodules">Submodules</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.archivebox.html#module-mcp_server_webcrawl.crawlers.archivebox.adapter">mcp_server_webcrawl.crawlers.archivebox.adapter module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.archivebox.html#module-mcp_server_webcrawl.crawlers.archivebox.crawler">mcp_server_webcrawl.crawlers.archivebox.crawler module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.archivebox.html#module-mcp_server_webcrawl.crawlers.archivebox.tests">mcp_server_webcrawl.crawlers.archivebox.tests module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.archivebox.html#module-mcp_server_webcrawl.crawlers.archivebox">Module contents</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="mcp_server_webcrawl.crawlers.httrack.html">mcp_server_webcrawl.crawlers.httrack package</a><ul>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.httrack.html#submodules">Submodules</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.httrack.html#module-mcp_server_webcrawl.crawlers.httrack.adapter">mcp_server_webcrawl.crawlers.httrack.adapter module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.httrack.html#module-mcp_server_webcrawl.crawlers.httrack.crawler">mcp_server_webcrawl.crawlers.httrack.crawler module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.httrack.html#module-mcp_server_webcrawl.crawlers.httrack.tests">mcp_server_webcrawl.crawlers.httrack.tests module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.httrack.html#module-mcp_server_webcrawl.crawlers.httrack">Module contents</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="mcp_server_webcrawl.crawlers.interrobot.html">mcp_server_webcrawl.crawlers.interrobot package</a><ul>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.interrobot.html#submodules">Submodules</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.interrobot.html#module-mcp_server_webcrawl.crawlers.interrobot.adapter">mcp_server_webcrawl.crawlers.interrobot.adapter module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.interrobot.html#module-mcp_server_webcrawl.crawlers.interrobot.crawler">mcp_server_webcrawl.crawlers.interrobot.crawler module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.interrobot.html#module-mcp_server_webcrawl.crawlers.interrobot.tests">mcp_server_webcrawl.crawlers.interrobot.tests module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.interrobot.html#module-mcp_server_webcrawl.crawlers.interrobot">Module contents</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="mcp_server_webcrawl.crawlers.katana.html">mcp_server_webcrawl.crawlers.katana package</a><ul>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.katana.html#submodules">Submodules</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.katana.html#module-mcp_server_webcrawl.crawlers.katana.adapter">mcp_server_webcrawl.crawlers.katana.adapter module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.katana.html#module-mcp_server_webcrawl.crawlers.katana.crawler">mcp_server_webcrawl.crawlers.katana.crawler module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.katana.html#module-mcp_server_webcrawl.crawlers.katana.tests">mcp_server_webcrawl.crawlers.katana.tests module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.katana.html#module-mcp_server_webcrawl.crawlers.katana">Module contents</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="mcp_server_webcrawl.crawlers.siteone.html">mcp_server_webcrawl.crawlers.siteone package</a><ul>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.siteone.html#submodules">Submodules</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.siteone.html#module-mcp_server_webcrawl.crawlers.siteone.adapter">mcp_server_webcrawl.crawlers.siteone.adapter module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.siteone.html#module-mcp_server_webcrawl.crawlers.siteone.crawler">mcp_server_webcrawl.crawlers.siteone.crawler module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.siteone.html#module-mcp_server_webcrawl.crawlers.siteone.tests">mcp_server_webcrawl.crawlers.siteone.tests module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.siteone.html#module-mcp_server_webcrawl.crawlers.siteone">Module contents</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="mcp_server_webcrawl.crawlers.warc.html">mcp_server_webcrawl.crawlers.warc package</a><ul>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.warc.html#submodules">Submodules</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.warc.html#module-mcp_server_webcrawl.crawlers.warc.adapter">mcp_server_webcrawl.crawlers.warc.adapter module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.warc.html#module-mcp_server_webcrawl.crawlers.warc.crawler">mcp_server_webcrawl.crawlers.warc.crawler module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.warc.html#module-mcp_server_webcrawl.crawlers.warc.tests">mcp_server_webcrawl.crawlers.warc.tests module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.warc.html#module-mcp_server_webcrawl.crawlers.warc">Module contents</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="mcp_server_webcrawl.crawlers.wget.html">mcp_server_webcrawl.crawlers.wget package</a><ul>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.wget.html#submodules">Submodules</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.wget.html#module-mcp_server_webcrawl.crawlers.wget.adapter">mcp_server_webcrawl.crawlers.wget.adapter module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.wget.html#module-mcp_server_webcrawl.crawlers.wget.crawler">mcp_server_webcrawl.crawlers.wget.crawler module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.wget.html#module-mcp_server_webcrawl.crawlers.wget.tests">mcp_server_webcrawl.crawlers.wget.tests module</a></li>
<li class="toctree-l2"><a class="reference internal" href="mcp_server_webcrawl.crawlers.wget.html#module-mcp_server_webcrawl.crawlers.wget">Module contents</a></li>
</ul>
</li>
</ul>
</div>
</section>
<section id="module-mcp_server_webcrawl.crawlers">
<span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-mcp_server_webcrawl.crawlers" title="Link to this heading"></a></h2>
<dl class="py function">
<dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.get_fixture_directory">
<span class="sig-name descname"><span class="pre">get_fixture_directory</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers.html#get_fixture_directory"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.get_fixture_directory" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.get_crawler">
<span class="sig-name descname"><span class="pre">get_crawler</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">crawler_name</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers.html#get_crawler"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.get_crawler" title="Link to this definition"></a></dt>
<dd><p>lazy load crawler, some classes have additional package dependencies</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>crawler_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – </p>
</dd>
<dt class="field-even">Return type<span class="colon">:</span></dt>
<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)">str</a> | None</p>
</dd>
</dl>
</dd></dl>
</section>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="mcp_server_webcrawl.html" class="btn btn-neutral float-left" title="mcp_server_webcrawl package" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="mcp_server_webcrawl.crawlers.base.html" class="btn btn-neutral float-right" title="mcp_server_webcrawl.crawlers.base package" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>© Copyright 2025, pragmar.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>