index.html•10.1 kB
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="./">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>mcp-server-webcrawl — mcp-server-webcrawl documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=e59714d7" />
<script src="_static/jquery.js?v=5d32c60e"></script>
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="_static/documentation_options.js?v=5929fcd5"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="Installation" href="installation.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="#" class="icon icon-home">
mcp-server-webcrawl
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="guides.html">Setup Guides</a></li>
<li class="toctree-l1"><a class="reference internal" href="usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="prompts.html">Prompt Routines</a></li>
<li class="toctree-l1"><a class="reference internal" href="interactive.html">Interactive Mode</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules.html">mcp_server_webcrawl</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="#">mcp-server-webcrawl</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="#" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">mcp-server-webcrawl</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/index.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<a class="reference internal image-reference" href="_images/mcpswc.svg"><img alt="mcp-server-webcrawl heading" class="align-center" src="_images/mcpswc.svg" width="100%" /></a>
<div style="text-align: center; margin-bottom: 2em;">
<a href="https://pragmar.com/mcp-server-webcrawl/" style="margin: 0 4px;">Website</a> |
<a href="https://github.com/pragmar/mcp-server-webcrawl" style="margin: 0 4px;">Github</a> |
<a href="https://pragmar.github.io/mcp-server-webcrawl/" style="margin: 0 4px;">Docs</a> |
<a href="https://pypi.org/project/mcp-server-webcrawl/" style="margin: 0 4px;">PyPi</a>
</div><section id="mcp-server-webcrawl">
<h1>mcp-server-webcrawl<a class="headerlink" href="#mcp-server-webcrawl" title="Link to this heading"></a></h1>
<p>Advanced search and retrieval for web crawler data. With <strong>mcp-server-webcrawl</strong>, your AI client filters
and analyzes web content under your direction or autonomously. The server includes a full-text search
interface with boolean support, and resource filtering by type, HTTP status, and more.</p>
<p><strong>mcp-server-webcrawl</strong> provides the LLM a complete menu with which to search your web content, and works with
a variety of web crawlers:</p>
<table class="docutils align-default" id="id7">
<caption><span class="caption-text">Supported Crawlers</span><a class="headerlink" href="#id7" title="Link to this table"></a></caption>
<colgroup>
<col style="width: 30.0%" />
<col style="width: 50.0%" />
<col style="width: 20.0%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Crawler/Format</p></th>
<th class="head"><p>Description</p></th>
<th class="head"><p>Setup Guide</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><a class="reference external" href="https://archivebox.io">ArchiveBox</a></p></td>
<td><p>Self-hosted web archiving tool</p></td>
<td><p><a class="reference external" href="https://pragmar.github.io/mcp-server-webcrawl/guides/archivebox.html">Setup Guide</a></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference external" href="https://www.httrack.com">HTTrack</a></p></td>
<td><p>GUI/CLI website mirroring tool</p></td>
<td><p><a class="reference external" href="https://pragmar.github.io/mcp-server-webcrawl/guides/httrack.html">Setup Guide</a></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference external" href="https://interro.bot">InterroBot</a></p></td>
<td><p>GUI crawler and analyzer</p></td>
<td><p><a class="reference external" href="https://pragmar.github.io/mcp-server-webcrawl/guides/interrobot.html">Setup Guide</a></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference external" href="https://github.com/projectdiscovery/katana">Katana</a></p></td>
<td><p>CLI security-focused crawler</p></td>
<td><p><a class="reference external" href="https://pragmar.github.io/mcp-server-webcrawl/guides/katana.html">Setup Guide</a></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference external" href="https://crawler.siteone.io">SiteOne</a></p></td>
<td><p>GUI crawler and analyzer</p></td>
<td><p><a class="reference external" href="https://pragmar.github.io/mcp-server-webcrawl/guides/siteone.html">Setup Guide</a></p></td>
</tr>
<tr class="row-odd"><td><p><a class="reference external" href="https://en.wikipedia.org/wiki/WARC_(file_format)">WARC</a></p></td>
<td><p>Standard web archive format</p></td>
<td><p><a class="reference external" href="https://pragmar.github.io/mcp-server-webcrawl/guides/warc.html">Setup Guide</a></p></td>
</tr>
<tr class="row-even"><td><p><a class="reference external" href="https://en.wikipedia.org/wiki/Wget">wget</a></p></td>
<td><p>CLI website mirroring tool</p></td>
<td><p><a class="reference external" href="https://pragmar.github.io/mcp-server-webcrawl/guides/wget.html">Setup Guide</a></p></td>
</tr>
</tbody>
</table>
<p><strong>mcp-server-webcrawl</strong> is free and open source, and requires Claude Desktop, Python (>=3.10). It is installed on the command line, via pip install:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>mcp-server-webcrawl
</pre></div>
</div>
<iframe width="560" height="315" style="display: block;margin-bottom:1rem;" src="https://www.youtube.com/embed/Sid-GBxII1o" frameborder="0" allowfullscreen></iframe><div class="toctree-wrapper compound">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="guides.html">Setup Guides</a></li>
<li class="toctree-l1"><a class="reference internal" href="usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="prompts.html">Prompt Routines</a></li>
<li class="toctree-l1"><a class="reference internal" href="interactive.html">Interactive Mode</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules.html">mcp_server_webcrawl</a></li>
</ul>
</div>
<section id="indices-and-tables">
<h2>Indices and tables<a class="headerlink" href="#indices-and-tables" title="Link to this heading"></a></h2>
<ul class="simple">
<li><p><a class="reference internal" href="genindex.html"><span class="std std-ref">Index</span></a></p></li>
<li><p><a class="reference internal" href="py-modindex.html"><span class="std std-ref">Module Index</span></a></p></li>
<li><p><a class="reference internal" href="search.html"><span class="std std-ref">Search Page</span></a></p></li>
</ul>
</section>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="installation.html" class="btn btn-neutral float-right" title="Installation" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>© Copyright 2025, pragmar.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>