re.html•52.7 kB
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>re — mcp-server-webcrawl documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=e59714d7" />
<script src="../_static/jquery.js?v=5d32c60e"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../_static/doctools.js?v=888ff710"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
mcp-server-webcrawl
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../guides.html">Setup Guides</a></li>
<li class="toctree-l1"><a class="reference internal" href="../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../prompts.html">Prompt Routines</a></li>
<li class="toctree-l1"><a class="reference internal" href="../modules.html">mcp_server_webcrawl</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">mcp-server-webcrawl</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="index.html">Module code</a></li>
<li class="breadcrumb-item active">re</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for re</h1><div class="highlight"><pre>
<span></span><span class="c1">#</span>
<span class="c1"># Secret Labs' Regular Expression Engine</span>
<span class="c1">#</span>
<span class="c1"># re-compatible interface for the sre matching engine</span>
<span class="c1">#</span>
<span class="c1"># Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.</span>
<span class="c1">#</span>
<span class="c1"># This version of the SRE library can be redistributed under CNRI's</span>
<span class="c1"># Python 1.6 license. For any other use, please contact Secret Labs</span>
<span class="c1"># AB (info@pythonware.com).</span>
<span class="c1">#</span>
<span class="c1"># Portions of this engine have been developed in cooperation with</span>
<span class="c1"># CNRI. Hewlett-Packard provided funding for 1.6 integration and</span>
<span class="c1"># other compatibility work.</span>
<span class="c1">#</span>
<span class="sa">r</span><span class="sd">"""Support for regular expressions (RE).</span>
<span class="sd">This module provides regular expression matching operations similar to</span>
<span class="sd">those found in Perl. It supports both 8-bit and Unicode strings; both</span>
<span class="sd">the pattern and the strings being processed can contain null bytes and</span>
<span class="sd">characters outside the US ASCII range.</span>
<span class="sd">Regular expressions can contain both special and ordinary characters.</span>
<span class="sd">Most ordinary characters, like "A", "a", or "0", are the simplest</span>
<span class="sd">regular expressions; they simply match themselves. You can</span>
<span class="sd">concatenate ordinary characters, so last matches the string 'last'.</span>
<span class="sd">The special characters are:</span>
<span class="sd"> "." Matches any character except a newline.</span>
<span class="sd"> "^" Matches the start of the string.</span>
<span class="sd"> "$" Matches the end of the string or just before the newline at</span>
<span class="sd"> the end of the string.</span>
<span class="sd"> "*" Matches 0 or more (greedy) repetitions of the preceding RE.</span>
<span class="sd"> Greedy means that it will match as many repetitions as possible.</span>
<span class="sd"> "+" Matches 1 or more (greedy) repetitions of the preceding RE.</span>
<span class="sd"> "?" Matches 0 or 1 (greedy) of the preceding RE.</span>
<span class="sd"> *?,+?,?? Non-greedy versions of the previous three special characters.</span>
<span class="sd"> {m,n} Matches from m to n repetitions of the preceding RE.</span>
<span class="sd"> {m,n}? Non-greedy version of the above.</span>
<span class="sd"> "\\" Either escapes special characters or signals a special sequence.</span>
<span class="sd"> [] Indicates a set of characters.</span>
<span class="sd"> A "^" as the first character indicates a complementing set.</span>
<span class="sd"> "|" A|B, creates an RE that will match either A or B.</span>
<span class="sd"> (...) Matches the RE inside the parentheses.</span>
<span class="sd"> The contents can be retrieved or matched later in the string.</span>
<span class="sd"> (?aiLmsux) The letters set the corresponding flags defined below.</span>
<span class="sd"> (?:...) Non-grouping version of regular parentheses.</span>
<span class="sd"> (?P<name>...) The substring matched by the group is accessible by name.</span>
<span class="sd"> (?P=name) Matches the text matched earlier by the group named name.</span>
<span class="sd"> (?#...) A comment; ignored.</span>
<span class="sd"> (?=...) Matches if ... matches next, but doesn't consume the string.</span>
<span class="sd"> (?!...) Matches if ... doesn't match next.</span>
<span class="sd"> (?<=...) Matches if preceded by ... (must be fixed length).</span>
<span class="sd"> (?<!...) Matches if not preceded by ... (must be fixed length).</span>
<span class="sd"> (?(id/name)yes|no) Matches yes pattern if the group with id/name matched,</span>
<span class="sd"> the (optional) no pattern otherwise.</span>
<span class="sd">The special sequences consist of "\\" and a character from the list</span>
<span class="sd">below. If the ordinary character is not on the list, then the</span>
<span class="sd">resulting RE will match the second character.</span>
<span class="sd"> \number Matches the contents of the group of the same number.</span>
<span class="sd"> \A Matches only at the start of the string.</span>
<span class="sd"> \Z Matches only at the end of the string.</span>
<span class="sd"> \b Matches the empty string, but only at the start or end of a word.</span>
<span class="sd"> \B Matches the empty string, but not at the start or end of a word.</span>
<span class="sd"> \d Matches any decimal digit; equivalent to the set [0-9] in</span>
<span class="sd"> bytes patterns or string patterns with the ASCII flag.</span>
<span class="sd"> In string patterns without the ASCII flag, it will match the whole</span>
<span class="sd"> range of Unicode digits.</span>
<span class="sd"> \D Matches any non-digit character; equivalent to [^\d].</span>
<span class="sd"> \s Matches any whitespace character; equivalent to [ \t\n\r\f\v] in</span>
<span class="sd"> bytes patterns or string patterns with the ASCII flag.</span>
<span class="sd"> In string patterns without the ASCII flag, it will match the whole</span>
<span class="sd"> range of Unicode whitespace characters.</span>
<span class="sd"> \S Matches any non-whitespace character; equivalent to [^\s].</span>
<span class="sd"> \w Matches any alphanumeric character; equivalent to [a-zA-Z0-9_]</span>
<span class="sd"> in bytes patterns or string patterns with the ASCII flag.</span>
<span class="sd"> In string patterns without the ASCII flag, it will match the</span>
<span class="sd"> range of Unicode alphanumeric characters (letters plus digits</span>
<span class="sd"> plus underscore).</span>
<span class="sd"> With LOCALE, it will match the set [0-9_] plus characters defined</span>
<span class="sd"> as letters for the current locale.</span>
<span class="sd"> \W Matches the complement of \w.</span>
<span class="sd"> \\ Matches a literal backslash.</span>
<span class="sd">This module exports the following functions:</span>
<span class="sd"> match Match a regular expression pattern to the beginning of a string.</span>
<span class="sd"> fullmatch Match a regular expression pattern to all of a string.</span>
<span class="sd"> search Search a string for the presence of a pattern.</span>
<span class="sd"> sub Substitute occurrences of a pattern found in a string.</span>
<span class="sd"> subn Same as sub, but also return the number of substitutions made.</span>
<span class="sd"> split Split a string by the occurrences of a pattern.</span>
<span class="sd"> findall Find all occurrences of a pattern in a string.</span>
<span class="sd"> finditer Return an iterator yielding a Match object for each match.</span>
<span class="sd"> compile Compile a pattern into a Pattern object.</span>
<span class="sd"> purge Clear the regular expression cache.</span>
<span class="sd"> escape Backslash all non-alphanumerics in a string.</span>
<span class="sd">Each function other than purge and escape can take an optional 'flags' argument</span>
<span class="sd">consisting of one or more of the following module constants, joined by "|".</span>
<span class="sd">A, L, and U are mutually exclusive.</span>
<span class="sd"> A ASCII For string patterns, make \w, \W, \b, \B, \d, \D</span>
<span class="sd"> match the corresponding ASCII character categories</span>
<span class="sd"> (rather than the whole Unicode categories, which is the</span>
<span class="sd"> default).</span>
<span class="sd"> For bytes patterns, this flag is the only available</span>
<span class="sd"> behaviour and needn't be specified.</span>
<span class="sd"> I IGNORECASE Perform case-insensitive matching.</span>
<span class="sd"> L LOCALE Make \w, \W, \b, \B, dependent on the current locale.</span>
<span class="sd"> M MULTILINE "^" matches the beginning of lines (after a newline)</span>
<span class="sd"> as well as the string.</span>
<span class="sd"> "$" matches the end of lines (before a newline) as well</span>
<span class="sd"> as the end of the string.</span>
<span class="sd"> S DOTALL "." matches any character at all, including the newline.</span>
<span class="sd"> X VERBOSE Ignore whitespace and comments for nicer looking RE's.</span>
<span class="sd"> U UNICODE For compatibility only. Ignored for string patterns (it</span>
<span class="sd"> is the default), and forbidden for bytes patterns.</span>
<span class="sd">This module also defines an exception 'error'.</span>
<span class="sd">"""</span>
<span class="kn">import</span> <span class="nn">enum</span>
<span class="kn">import</span> <span class="nn">sre_compile</span>
<span class="kn">import</span> <span class="nn">sre_parse</span>
<span class="kn">import</span> <span class="nn">functools</span>
<span class="k">try</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">_locale</span>
<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
<span class="n">_locale</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1"># public symbols</span>
<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span>
<span class="s2">"match"</span><span class="p">,</span> <span class="s2">"fullmatch"</span><span class="p">,</span> <span class="s2">"search"</span><span class="p">,</span> <span class="s2">"sub"</span><span class="p">,</span> <span class="s2">"subn"</span><span class="p">,</span> <span class="s2">"split"</span><span class="p">,</span>
<span class="s2">"findall"</span><span class="p">,</span> <span class="s2">"finditer"</span><span class="p">,</span> <span class="s2">"compile"</span><span class="p">,</span> <span class="s2">"purge"</span><span class="p">,</span> <span class="s2">"template"</span><span class="p">,</span> <span class="s2">"escape"</span><span class="p">,</span>
<span class="s2">"error"</span><span class="p">,</span> <span class="s2">"Pattern"</span><span class="p">,</span> <span class="s2">"Match"</span><span class="p">,</span> <span class="s2">"A"</span><span class="p">,</span> <span class="s2">"I"</span><span class="p">,</span> <span class="s2">"L"</span><span class="p">,</span> <span class="s2">"M"</span><span class="p">,</span> <span class="s2">"S"</span><span class="p">,</span> <span class="s2">"X"</span><span class="p">,</span> <span class="s2">"U"</span><span class="p">,</span>
<span class="s2">"ASCII"</span><span class="p">,</span> <span class="s2">"IGNORECASE"</span><span class="p">,</span> <span class="s2">"LOCALE"</span><span class="p">,</span> <span class="s2">"MULTILINE"</span><span class="p">,</span> <span class="s2">"DOTALL"</span><span class="p">,</span> <span class="s2">"VERBOSE"</span><span class="p">,</span>
<span class="s2">"UNICODE"</span><span class="p">,</span>
<span class="p">]</span>
<span class="n">__version__</span> <span class="o">=</span> <span class="s2">"2.2.1"</span>
<span class="k">class</span> <span class="nc">RegexFlag</span><span class="p">(</span><span class="n">enum</span><span class="o">.</span><span class="n">IntFlag</span><span class="p">):</span>
<span class="n">ASCII</span> <span class="o">=</span> <span class="n">A</span> <span class="o">=</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">SRE_FLAG_ASCII</span> <span class="c1"># assume ascii "locale"</span>
<span class="n">IGNORECASE</span> <span class="o">=</span> <span class="n">I</span> <span class="o">=</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">SRE_FLAG_IGNORECASE</span> <span class="c1"># ignore case</span>
<span class="n">LOCALE</span> <span class="o">=</span> <span class="n">L</span> <span class="o">=</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">SRE_FLAG_LOCALE</span> <span class="c1"># assume current 8-bit locale</span>
<span class="n">UNICODE</span> <span class="o">=</span> <span class="n">U</span> <span class="o">=</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">SRE_FLAG_UNICODE</span> <span class="c1"># assume unicode "locale"</span>
<span class="n">MULTILINE</span> <span class="o">=</span> <span class="n">M</span> <span class="o">=</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">SRE_FLAG_MULTILINE</span> <span class="c1"># make anchors look for newline</span>
<span class="n">DOTALL</span> <span class="o">=</span> <span class="n">S</span> <span class="o">=</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">SRE_FLAG_DOTALL</span> <span class="c1"># make dot match newline</span>
<span class="n">VERBOSE</span> <span class="o">=</span> <span class="n">X</span> <span class="o">=</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">SRE_FLAG_VERBOSE</span> <span class="c1"># ignore whitespace and comments</span>
<span class="c1"># sre extensions (experimental, don't rely on these)</span>
<span class="n">TEMPLATE</span> <span class="o">=</span> <span class="n">T</span> <span class="o">=</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">SRE_FLAG_TEMPLATE</span> <span class="c1"># disable backtracking</span>
<span class="n">DEBUG</span> <span class="o">=</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">SRE_FLAG_DEBUG</span> <span class="c1"># dump pattern after compilation</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_name_</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="sa">f</span><span class="s1">'re.</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">_name_</span><span class="si">}</span><span class="s1">'</span>
<span class="n">value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_value_</span>
<span class="n">members</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">negative</span> <span class="o">=</span> <span class="n">value</span> <span class="o"><</span> <span class="mi">0</span>
<span class="k">if</span> <span class="n">negative</span><span class="p">:</span>
<span class="n">value</span> <span class="o">=</span> <span class="o">~</span><span class="n">value</span>
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="p">:</span>
<span class="k">if</span> <span class="n">value</span> <span class="o">&</span> <span class="n">m</span><span class="o">.</span><span class="n">_value_</span><span class="p">:</span>
<span class="n">value</span> <span class="o">&=</span> <span class="o">~</span><span class="n">m</span><span class="o">.</span><span class="n">_value_</span>
<span class="n">members</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s1">'re.</span><span class="si">{</span><span class="n">m</span><span class="o">.</span><span class="n">_name_</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
<span class="k">if</span> <span class="n">value</span><span class="p">:</span>
<span class="n">members</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">hex</span><span class="p">(</span><span class="n">value</span><span class="p">))</span>
<span class="n">res</span> <span class="o">=</span> <span class="s1">'|'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">members</span><span class="p">)</span>
<span class="k">if</span> <span class="n">negative</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">members</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
<span class="n">res</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'~(</span><span class="si">{</span><span class="n">res</span><span class="si">}</span><span class="s1">)'</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">res</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'~</span><span class="si">{</span><span class="n">res</span><span class="si">}</span><span class="s1">'</span>
<span class="k">return</span> <span class="n">res</span>
<span class="fm">__str__</span> <span class="o">=</span> <span class="nb">object</span><span class="o">.</span><span class="fm">__str__</span>
<span class="nb">globals</span><span class="p">()</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">RegexFlag</span><span class="o">.</span><span class="n">__members__</span><span class="p">)</span>
<span class="c1"># sre exception</span>
<span class="n">error</span> <span class="o">=</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">error</span>
<span class="c1"># --------------------------------------------------------------------</span>
<span class="c1"># public interface</span>
<span class="k">def</span> <span class="nf">match</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">string</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="w"> </span><span class="sd">"""Try to apply the pattern at the start of the string, returning</span>
<span class="sd"> a Match object, or None if no match was found."""</span>
<span class="k">return</span> <span class="n">_compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">)</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">string</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">fullmatch</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">string</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="w"> </span><span class="sd">"""Try to apply the pattern to all of the string, returning</span>
<span class="sd"> a Match object, or None if no match was found."""</span>
<span class="k">return</span> <span class="n">_compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">)</span><span class="o">.</span><span class="n">fullmatch</span><span class="p">(</span><span class="n">string</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">search</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">string</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="w"> </span><span class="sd">"""Scan through string looking for a match to the pattern, returning</span>
<span class="sd"> a Match object, or None if no match was found."""</span>
<span class="k">return</span> <span class="n">_compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">)</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">string</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">sub</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">repl</span><span class="p">,</span> <span class="n">string</span><span class="p">,</span> <span class="n">count</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="w"> </span><span class="sd">"""Return the string obtained by replacing the leftmost</span>
<span class="sd"> non-overlapping occurrences of the pattern in string by the</span>
<span class="sd"> replacement repl. repl can be either a string or a callable;</span>
<span class="sd"> if a string, backslash escapes in it are processed. If it is</span>
<span class="sd"> a callable, it's passed the Match object and must return</span>
<span class="sd"> a replacement string to be used."""</span>
<span class="k">return</span> <span class="n">_compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">)</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">repl</span><span class="p">,</span> <span class="n">string</span><span class="p">,</span> <span class="n">count</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">subn</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">repl</span><span class="p">,</span> <span class="n">string</span><span class="p">,</span> <span class="n">count</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="w"> </span><span class="sd">"""Return a 2-tuple containing (new_string, number).</span>
<span class="sd"> new_string is the string obtained by replacing the leftmost</span>
<span class="sd"> non-overlapping occurrences of the pattern in the source</span>
<span class="sd"> string by the replacement repl. number is the number of</span>
<span class="sd"> substitutions that were made. repl can be either a string or a</span>
<span class="sd"> callable; if a string, backslash escapes in it are processed.</span>
<span class="sd"> If it is a callable, it's passed the Match object and must</span>
<span class="sd"> return a replacement string to be used."""</span>
<span class="k">return</span> <span class="n">_compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">)</span><span class="o">.</span><span class="n">subn</span><span class="p">(</span><span class="n">repl</span><span class="p">,</span> <span class="n">string</span><span class="p">,</span> <span class="n">count</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">split</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">string</span><span class="p">,</span> <span class="n">maxsplit</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="w"> </span><span class="sd">"""Split the source string by the occurrences of the pattern,</span>
<span class="sd"> returning a list containing the resulting substrings. If</span>
<span class="sd"> capturing parentheses are used in pattern, then the text of all</span>
<span class="sd"> groups in the pattern are also returned as part of the resulting</span>
<span class="sd"> list. If maxsplit is nonzero, at most maxsplit splits occur,</span>
<span class="sd"> and the remainder of the string is returned as the final element</span>
<span class="sd"> of the list."""</span>
<span class="k">return</span> <span class="n">_compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">string</span><span class="p">,</span> <span class="n">maxsplit</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">findall</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">string</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="w"> </span><span class="sd">"""Return a list of all non-overlapping matches in the string.</span>
<span class="sd"> If one or more capturing groups are present in the pattern, return</span>
<span class="sd"> a list of groups; this will be a list of tuples if the pattern</span>
<span class="sd"> has more than one group.</span>
<span class="sd"> Empty matches are included in the result."""</span>
<span class="k">return</span> <span class="n">_compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">)</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="n">string</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">finditer</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">string</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="w"> </span><span class="sd">"""Return an iterator over all non-overlapping matches in the</span>
<span class="sd"> string. For each match, the iterator returns a Match object.</span>
<span class="sd"> Empty matches are included in the result."""</span>
<span class="k">return</span> <span class="n">_compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">)</span><span class="o">.</span><span class="n">finditer</span><span class="p">(</span><span class="n">string</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="s2">"Compile a regular expression pattern, returning a Pattern object."</span>
<span class="k">return</span> <span class="n">_compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">purge</span><span class="p">():</span>
<span class="s2">"Clear the regular expression caches"</span>
<span class="n">_cache</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
<span class="n">_compile_repl</span><span class="o">.</span><span class="n">cache_clear</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">template</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="s2">"Compile a template pattern, returning a Pattern object"</span>
<span class="k">return</span> <span class="n">_compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="o">|</span><span class="n">T</span><span class="p">)</span>
<span class="c1"># SPECIAL_CHARS</span>
<span class="c1"># closing ')', '}' and ']'</span>
<span class="c1"># '-' (a range in character set)</span>
<span class="c1"># '&', '~', (extended character set operations)</span>
<span class="c1"># '#' (comment) and WHITESPACE (ignored) in verbose mode</span>
<span class="n">_special_chars_map</span> <span class="o">=</span> <span class="p">{</span><span class="n">i</span><span class="p">:</span> <span class="s1">'</span><span class="se">\\</span><span class="s1">'</span> <span class="o">+</span> <span class="nb">chr</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="sa">b</span><span class="s1">'()[]</span><span class="si">{}</span><span class="s1">?*+-|^$</span><span class="se">\\</span><span class="s1">.&~# </span><span class="se">\t\n\r\v\f</span><span class="s1">'</span><span class="p">}</span>
<span class="k">def</span> <span class="nf">escape</span><span class="p">(</span><span class="n">pattern</span><span class="p">):</span>
<span class="w"> </span><span class="sd">"""</span>
<span class="sd"> Escape special characters in a string.</span>
<span class="sd"> """</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">return</span> <span class="n">pattern</span><span class="o">.</span><span class="n">translate</span><span class="p">(</span><span class="n">_special_chars_map</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">pattern</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="s1">'latin1'</span><span class="p">)</span>
<span class="k">return</span> <span class="n">pattern</span><span class="o">.</span><span class="n">translate</span><span class="p">(</span><span class="n">_special_chars_map</span><span class="p">)</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">'latin1'</span><span class="p">)</span>
<span class="n">Pattern</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">sre_compile</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">''</span><span class="p">,</span> <span class="mi">0</span><span class="p">))</span>
<span class="n">Match</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">sre_compile</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s1">''</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="s1">''</span><span class="p">))</span>
<span class="c1"># --------------------------------------------------------------------</span>
<span class="c1"># internals</span>
<span class="n">_cache</span> <span class="o">=</span> <span class="p">{}</span> <span class="c1"># ordered!</span>
<span class="n">_MAXCACHE</span> <span class="o">=</span> <span class="mi">512</span>
<span class="k">def</span> <span class="nf">_compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">):</span>
<span class="c1"># internal: compile pattern</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">flags</span><span class="p">,</span> <span class="n">RegexFlag</span><span class="p">):</span>
<span class="n">flags</span> <span class="o">=</span> <span class="n">flags</span><span class="o">.</span><span class="n">value</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">return</span> <span class="n">_cache</span><span class="p">[</span><span class="nb">type</span><span class="p">(</span><span class="n">pattern</span><span class="p">),</span> <span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">]</span>
<span class="k">except</span> <span class="ne">KeyError</span><span class="p">:</span>
<span class="k">pass</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">Pattern</span><span class="p">):</span>
<span class="k">if</span> <span class="n">flags</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">"cannot process flags argument with a compiled pattern"</span><span class="p">)</span>
<span class="k">return</span> <span class="n">pattern</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">isstring</span><span class="p">(</span><span class="n">pattern</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">"first argument must be string or compiled pattern"</span><span class="p">)</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">flags</span> <span class="o">&</span> <span class="n">DEBUG</span><span class="p">):</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">_cache</span><span class="p">)</span> <span class="o">>=</span> <span class="n">_MAXCACHE</span><span class="p">:</span>
<span class="c1"># Drop the oldest item</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">del</span> <span class="n">_cache</span><span class="p">[</span><span class="nb">next</span><span class="p">(</span><span class="nb">iter</span><span class="p">(</span><span class="n">_cache</span><span class="p">))]</span>
<span class="k">except</span> <span class="p">(</span><span class="ne">StopIteration</span><span class="p">,</span> <span class="ne">RuntimeError</span><span class="p">,</span> <span class="ne">KeyError</span><span class="p">):</span>
<span class="k">pass</span>
<span class="n">_cache</span><span class="p">[</span><span class="nb">type</span><span class="p">(</span><span class="n">pattern</span><span class="p">),</span> <span class="n">pattern</span><span class="p">,</span> <span class="n">flags</span><span class="p">]</span> <span class="o">=</span> <span class="n">p</span>
<span class="k">return</span> <span class="n">p</span>
<span class="nd">@functools</span><span class="o">.</span><span class="n">lru_cache</span><span class="p">(</span><span class="n">_MAXCACHE</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_compile_repl</span><span class="p">(</span><span class="n">repl</span><span class="p">,</span> <span class="n">pattern</span><span class="p">):</span>
<span class="c1"># internal: compile replacement pattern</span>
<span class="k">return</span> <span class="n">sre_parse</span><span class="o">.</span><span class="n">parse_template</span><span class="p">(</span><span class="n">repl</span><span class="p">,</span> <span class="n">pattern</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_expand</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">match</span><span class="p">,</span> <span class="n">template</span><span class="p">):</span>
<span class="c1"># internal: Match.expand implementation hook</span>
<span class="n">template</span> <span class="o">=</span> <span class="n">sre_parse</span><span class="o">.</span><span class="n">parse_template</span><span class="p">(</span><span class="n">template</span><span class="p">,</span> <span class="n">pattern</span><span class="p">)</span>
<span class="k">return</span> <span class="n">sre_parse</span><span class="o">.</span><span class="n">expand_template</span><span class="p">(</span><span class="n">template</span><span class="p">,</span> <span class="n">match</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_subx</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span> <span class="n">template</span><span class="p">):</span>
<span class="c1"># internal: Pattern.sub/subn implementation helper</span>
<span class="n">template</span> <span class="o">=</span> <span class="n">_compile_repl</span><span class="p">(</span><span class="n">template</span><span class="p">,</span> <span class="n">pattern</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">template</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">template</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="c1"># literal replacement</span>
<span class="k">return</span> <span class="n">template</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span>
<span class="k">def</span> <span class="nf">filter</span><span class="p">(</span><span class="n">match</span><span class="p">,</span> <span class="n">template</span><span class="o">=</span><span class="n">template</span><span class="p">):</span>
<span class="k">return</span> <span class="n">sre_parse</span><span class="o">.</span><span class="n">expand_template</span><span class="p">(</span><span class="n">template</span><span class="p">,</span> <span class="n">match</span><span class="p">)</span>
<span class="k">return</span> <span class="nb">filter</span>
<span class="c1"># register myself for pickling</span>
<span class="kn">import</span> <span class="nn">copyreg</span>
<span class="k">def</span> <span class="nf">_pickle</span><span class="p">(</span><span class="n">p</span><span class="p">):</span>
<span class="k">return</span> <span class="n">_compile</span><span class="p">,</span> <span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">pattern</span><span class="p">,</span> <span class="n">p</span><span class="o">.</span><span class="n">flags</span><span class="p">)</span>
<span class="n">copyreg</span><span class="o">.</span><span class="n">pickle</span><span class="p">(</span><span class="n">Pattern</span><span class="p">,</span> <span class="n">_pickle</span><span class="p">,</span> <span class="n">_compile</span><span class="p">)</span>
<span class="c1"># --------------------------------------------------------------------</span>
<span class="c1"># experimental stuff (see python-dev discussions for details)</span>
<span class="k">class</span> <span class="nc">Scanner</span><span class="p">:</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">lexicon</span><span class="p">,</span> <span class="n">flags</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
<span class="kn">from</span> <span class="nn">sre_constants</span> <span class="kn">import</span> <span class="n">BRANCH</span><span class="p">,</span> <span class="n">SUBPATTERN</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">flags</span><span class="p">,</span> <span class="n">RegexFlag</span><span class="p">):</span>
<span class="n">flags</span> <span class="o">=</span> <span class="n">flags</span><span class="o">.</span><span class="n">value</span>
<span class="bp">self</span><span class="o">.</span><span class="n">lexicon</span> <span class="o">=</span> <span class="n">lexicon</span>
<span class="c1"># combine phrases into a compound pattern</span>
<span class="n">p</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">sre_parse</span><span class="o">.</span><span class="n">State</span><span class="p">()</span>
<span class="n">s</span><span class="o">.</span><span class="n">flags</span> <span class="o">=</span> <span class="n">flags</span>
<span class="k">for</span> <span class="n">phrase</span><span class="p">,</span> <span class="n">action</span> <span class="ow">in</span> <span class="n">lexicon</span><span class="p">:</span>
<span class="n">gid</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">opengroup</span><span class="p">()</span>
<span class="n">p</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sre_parse</span><span class="o">.</span><span class="n">SubPattern</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="p">[</span>
<span class="p">(</span><span class="n">SUBPATTERN</span><span class="p">,</span> <span class="p">(</span><span class="n">gid</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">sre_parse</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">phrase</span><span class="p">,</span> <span class="n">flags</span><span class="p">))),</span>
<span class="p">]))</span>
<span class="n">s</span><span class="o">.</span><span class="n">closegroup</span><span class="p">(</span><span class="n">gid</span><span class="p">,</span> <span class="n">p</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
<span class="n">p</span> <span class="o">=</span> <span class="n">sre_parse</span><span class="o">.</span><span class="n">SubPattern</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="p">[(</span><span class="n">BRANCH</span><span class="p">,</span> <span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">p</span><span class="p">))])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">scanner</span> <span class="o">=</span> <span class="n">sre_compile</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">scan</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">string</span><span class="p">):</span>
<span class="n">result</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">append</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">append</span>
<span class="n">match</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">scanner</span><span class="o">.</span><span class="n">scanner</span><span class="p">(</span><span class="n">string</span><span class="p">)</span><span class="o">.</span><span class="n">match</span>
<span class="n">i</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
<span class="n">m</span> <span class="o">=</span> <span class="n">match</span><span class="p">()</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">m</span><span class="p">:</span>
<span class="k">break</span>
<span class="n">j</span> <span class="o">=</span> <span class="n">m</span><span class="o">.</span><span class="n">end</span><span class="p">()</span>
<span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="n">j</span><span class="p">:</span>
<span class="k">break</span>
<span class="n">action</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lexicon</span><span class="p">[</span><span class="n">m</span><span class="o">.</span><span class="n">lastindex</span><span class="o">-</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">callable</span><span class="p">(</span><span class="n">action</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">match</span> <span class="o">=</span> <span class="n">m</span>
<span class="n">action</span> <span class="o">=</span> <span class="n">action</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">())</span>
<span class="k">if</span> <span class="n">action</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">append</span><span class="p">(</span><span class="n">action</span><span class="p">)</span>
<span class="n">i</span> <span class="o">=</span> <span class="n">j</span>
<span class="k">return</span> <span class="n">result</span><span class="p">,</span> <span class="n">string</span><span class="p">[</span><span class="n">i</span><span class="p">:]</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>© Copyright 2025, pragmar.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>