# $Id: nodes.py 10185 2025-07-28 20:10:46Z milde $
# Author: David Goodger <goodger@python.org>
# Maintainer: docutils-develop@lists.sourceforge.net
# Copyright: This module has been placed in the public domain.
"""
Docutils document tree element class library.
The relationships and semantics of elements and attributes is documented in
`The Docutils Document Tree`__.
Classes in CamelCase are abstract base classes or auxiliary classes. The one
exception is `Text`, for a text (PCDATA) node; uppercase is used to
differentiate from element classes. Classes in lower_case_with_underscores
are element classes, matching the XML element generic identifiers in the DTD_.
The position of each node (the level at which it can occur) is significant and
is represented by abstract base classes (`Root`, `Structural`, `Body`,
`Inline`, etc.). Certain transformations will be easier because we can use
``isinstance(node, base_class)`` to determine the position of the node in the
hierarchy.
__ https://docutils.sourceforge.io/docs/ref/doctree.html
.. _DTD: https://docutils.sourceforge.io/docs/ref/docutils.dtd
"""
from __future__ import annotations
__docformat__ = 'reStructuredText'
import os
import re
import sys
import unicodedata
import warnings
from collections import Counter
# import xml.dom.minidom as dom # -> conditional import in Node.asdom()
# and document.asdom()
# import docutils.transforms # -> delayed import in document.__init__()
TYPE_CHECKING = False
if TYPE_CHECKING:
from collections.abc import (Callable, Iterable, Iterator,
Mapping, Sequence)
from types import ModuleType
from typing import Any, ClassVar, Final, Literal, Self, SupportsIndex
from docutils.utils._typing import TypeAlias
from xml.dom import minidom
from docutils.frontend import Values
from docutils.transforms import Transformer, Transform
from docutils.utils import Reporter
_ContentModelCategory: TypeAlias = tuple['Element' | tuple['Element', ...]]
_ContentModelQuantifier = Literal['.', '?', '+', '*']
_ContentModelItem: TypeAlias = tuple[_ContentModelCategory,
_ContentModelQuantifier]
_ContentModelTuple: TypeAlias = tuple[_ContentModelItem, ...]
StrPath: TypeAlias = str | os.PathLike[str]
"""File system path. No bytes!"""
_UpdateFun: TypeAlias = Callable[[str, Any, bool], None]
# ==============================
# Functional Node Base Classes
# ==============================
class Node:
"""Abstract base class of nodes in a document tree."""
parent: Element | None = None
"""Back-reference to the Node immediately containing this Node."""
children: Sequence # defined in subclasses
"""List of child nodes (Elements or Text).
Override in subclass instances that are not terminal nodes.
"""
source: StrPath | None = None
"""Path or description of the input source which generated this Node."""
line: int | None = None
"""The line number (1-based) of the beginning of this Node in `source`."""
tagname: str # defined in subclasses
"""The element generic identifier."""
_document: document | None = None
@property
def document(self) -> document | None:
"""Return the `document` root node of the tree containing this Node.
"""
try:
return self._document or self.parent.document
except AttributeError:
return None
@document.setter
def document(self, value: document) -> None:
self._document = value
def __bool__(self) -> Literal[True]:
"""
Node instances are always true, even if they're empty. A node is more
than a simple container. Its boolean "truth" does not depend on
having one or more subnodes in the doctree.
Use `len()` to check node length.
"""
return True
def asdom(self,
dom: ModuleType | None = None,
) -> minidom.Document | minidom.Element | minidom.Text:
# TODO: minidom.Document is only returned by document.asdom()
# (which overwrites this base-class implementation)
"""Return a DOM **fragment** representation of this Node."""
if dom is None:
import xml.dom.minidom as dom
domroot = dom.Document()
return self._dom_node(domroot)
def pformat(self, indent: str = ' ', level: int = 0) -> str:
"""
Return an indented pseudo-XML representation, for test purposes.
Override in subclasses.
"""
raise NotImplementedError
def copy(self) -> Self:
"""Return a copy of self."""
raise NotImplementedError
def deepcopy(self) -> Self:
"""Return a deep copy of self (also copying children)."""
raise NotImplementedError
def astext(self) -> str:
"""Return a string representation of this Node."""
raise NotImplementedError
def setup_child(self, child) -> None:
child.parent = self
if self.document:
child.document = self.document
if child.source is None:
child.source = self.document.current_source
if child.line is None:
child.line = self.document.current_line
def walk(self, visitor: NodeVisitor) -> bool:
"""
Traverse a tree of `Node` objects, calling the
`dispatch_visit()` method of `visitor` when entering each
node. (The `walkabout()` method is similar, except it also
calls the `dispatch_departure()` method before exiting each
node.)
This tree traversal supports limited in-place tree
modifications. Replacing one node with one or more nodes is
OK, as is removing an element. However, if the node removed
or replaced occurs after the current node, the old node will
still be traversed, and any new nodes will not.
Within ``visit`` methods (and ``depart`` methods for
`walkabout()`), `TreePruningException` subclasses may be raised
(`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
Parameter `visitor`: A `NodeVisitor` object, containing a
``visit`` implementation for each `Node` subclass encountered.
Return true if we should stop the traversal.
"""
stop = False
visitor.document.reporter.debug(
'docutils.nodes.Node.walk calling dispatch_visit for %s'
% self.__class__.__name__)
try:
try:
visitor.dispatch_visit(self)
except (SkipChildren, SkipNode):
return stop
except SkipDeparture: # not applicable; ignore
pass
children = self.children
try:
for child in children[:]:
if child.walk(visitor):
stop = True
break
except SkipSiblings:
pass
except StopTraversal:
stop = True
return stop
def walkabout(self, visitor: NodeVisitor) -> bool:
"""
Perform a tree traversal similarly to `Node.walk()` (which
see), except also call the `dispatch_departure()` method
before exiting each node.
Parameter `visitor`: A `NodeVisitor` object, containing a
``visit`` and ``depart`` implementation for each `Node`
subclass encountered.
Return true if we should stop the traversal.
"""
call_depart = True
stop = False
visitor.document.reporter.debug(
'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
% self.__class__.__name__)
try:
try:
visitor.dispatch_visit(self)
except SkipNode:
return stop
except SkipDeparture:
call_depart = False
children = self.children
try:
for child in children[:]:
if child.walkabout(visitor):
stop = True
break
except SkipSiblings:
pass
except SkipChildren:
pass
except StopTraversal:
stop = True
if call_depart:
visitor.document.reporter.debug(
'docutils.nodes.Node.walkabout calling dispatch_departure '
'for %s' % self.__class__.__name__)
visitor.dispatch_departure(self)
return stop
def _fast_findall(self, cls: type) -> Iterator:
"""Return iterator that only supports instance checks."""
if isinstance(self, cls):
yield self
for child in self.children:
yield from child._fast_findall(cls)
def _superfast_findall(self) -> Iterator:
"""Return iterator that doesn't check for a condition."""
# This is different from ``iter(self)`` implemented via
# __getitem__() and __len__() in the Element subclass,
# which yields only the direct children.
yield self
for child in self.children:
yield from child._superfast_findall()
def findall(self,
condition: type | Callable[[Node], bool] | None = None,
include_self: bool = True,
descend: bool = True,
siblings: bool = False,
ascend: bool = False,
) -> Iterator:
"""
Return an iterator yielding nodes following `self`:
* self (if `include_self` is true)
* all descendants in tree traversal order (if `descend` is true)
* the following siblings (if `siblings` is true) and their
descendants (if also `descend` is true)
* the following siblings of the parent (if `ascend` is true) and
their descendants (if also `descend` is true), and so on.
If `condition` is not None, the iterator yields only nodes
for which ``condition(node)`` is true. If `condition` is a
type ``cls``, it is equivalent to a function consisting
of ``return isinstance(node, cls)``.
If `ascend` is true, assume `siblings` to be true as well.
If the tree structure is modified during iteration, the result
is undefined.
For example, given the following tree::
<paragraph>
<emphasis> <--- emphasis.traverse() and
<strong> <--- strong.traverse() are called.
Foo
Bar
<reference name="Baz" refid="baz">
Baz
Then tuple(emphasis.traverse()) equals ::
(<emphasis>, <strong>, <#text: Foo>, <#text: Bar>)
and list(strong.traverse(ascend=True) equals ::
[<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
"""
if ascend:
siblings = True
# Check for special argument combinations that allow using an
# optimized version of traverse()
if include_self and descend and not siblings:
if condition is None:
yield from self._superfast_findall()
return
elif isinstance(condition, type):
yield from self._fast_findall(condition)
return
# Check if `condition` is a class (check for TypeType for Python
# implementations that use only new-style classes, like PyPy).
if isinstance(condition, type):
node_class = condition
def condition(node, node_class=node_class):
return isinstance(node, node_class)
if include_self and (condition is None or condition(self)):
yield self
if descend and len(self.children):
for child in self:
yield from child.findall(condition=condition,
include_self=True, descend=True,
siblings=False, ascend=False)
if siblings or ascend:
node = self
while node.parent:
index = node.parent.index(node)
# extra check since Text nodes have value-equality
while node.parent[index] is not node:
index = node.parent.index(node, index + 1)
for sibling in node.parent[index+1:]:
yield from sibling.findall(
condition=condition,
include_self=True, descend=descend,
siblings=False, ascend=False)
if not ascend:
break
else:
node = node.parent
def traverse(self,
condition: type | Callable[[Node], bool] | None = None,
include_self: bool = True,
descend: bool = True,
siblings: bool = False,
ascend: bool = False,
) -> list:
"""Return list of nodes following `self`.
For looping, Node.findall() is faster and more memory efficient.
"""
# traverse() may be eventually removed:
warnings.warn('nodes.Node.traverse() is obsoleted by Node.findall().',
DeprecationWarning, stacklevel=2)
return list(self.findall(condition, include_self, descend,
siblings, ascend))
def next_node(self,
condition: type | Callable[[Node], bool] | None = None,
include_self: bool = False,
descend: bool = True,
siblings: bool = False,
ascend: bool = False,
) -> Node | None:
"""
Return the first node in the iterator returned by findall(),
or None if the iterable is empty.
Parameter list is the same as of `findall()`. Note that `include_self`
defaults to False, though.
"""
try:
return next(self.findall(condition, include_self,
descend, siblings, ascend))
except StopIteration:
return None
def validate(self, recursive: bool = True) -> None:
"""Raise ValidationError if this node is not valid.
Override in subclasses that define validity constraints.
"""
def validate_position(self) -> None:
"""Hook for additional checks of the parent's content model.
Raise ValidationError, if `self` is at an invalid position.
Override in subclasses with complex validity constraints. See
`subtitle.validate_position()` and `transition.validate_position()`.
"""
class Text(Node, str): # NoQA: SLOT000 (Node doesn't define __slots__)
"""
Instances are terminal nodes (leaves) containing text only; no child
nodes or attributes. Initialize by passing a string to the constructor.
Access the raw (null-escaped) text with ``str(<instance>)``
and unescaped text with ``<instance>.astext()``.
"""
tagname: Final = '#text'
children: Final = ()
"""Text nodes have no children, and cannot have children."""
def __new__(cls, data: str, rawsource: None = None) -> Self:
"""Assert that `data` is not an array of bytes
and warn if the deprecated `rawsource` argument is used.
"""
if isinstance(data, bytes):
raise TypeError('expecting str data, not bytes')
if rawsource is not None:
warnings.warn('nodes.Text: initialization argument "rawsource" '
'is ignored and will be removed in Docutils 2.0.',
DeprecationWarning, stacklevel=2)
return str.__new__(cls, data)
def shortrepr(self, maxlen: int = 18) -> str:
data = self
if len(data) > maxlen:
data = data[:maxlen-4] + ' ...'
return '<%s: %r>' % (self.tagname, str(data))
def __repr__(self) -> str:
return self.shortrepr(maxlen=68)
def astext(self) -> str:
return str(unescape(self))
def _dom_node(self, domroot: minidom.Document) -> minidom.Text:
return domroot.createTextNode(str(self))
def copy(self) -> Self:
return self.__class__(str(self))
def deepcopy(self) -> Self:
return self.copy()
def pformat(self, indent: str = ' ', level: int = 0) -> str:
try:
if self.document.settings.detailed:
tag = '%s%s' % (indent*level, '<#text>')
lines = (indent*(level+1) + repr(line)
for line in self.splitlines(True))
return '\n'.join((tag, *lines)) + '\n'
except AttributeError:
pass
indent = indent * level
lines = [indent+line for line in self.astext().splitlines()]
if not lines:
return ''
return '\n'.join(lines) + '\n'
# rstrip and lstrip are used by substitution definitions where
# they are expected to return a Text instance, this was formerly
# taken care of by UserString.
def rstrip(self, chars: str | None = None) -> Self:
return self.__class__(str.rstrip(self, chars))
def lstrip(self, chars: str | None = None) -> Self:
return self.__class__(str.lstrip(self, chars))
class Element(Node):
"""
`Element` is the superclass to all specific elements.
Elements contain attributes and child nodes.
They can be described as a cross between a list and a dictionary.
Elements emulate dictionaries for external [#]_ attributes, indexing by
attribute name (a string). To set the attribute 'att' to 'value', do::
element['att'] = 'value'
.. [#] External attributes correspond to the XML element attributes.
From its `Node` superclass, Element also inherits "internal"
class attributes that are accessed using the standard syntax, e.g.
``element.parent``.
There are two special attributes: 'ids' and 'names'. Both are
lists of unique identifiers: 'ids' conform to the regular expression
``[a-z](-?[a-z0-9]+)*`` (see the make_id() function for rationale and
details). 'names' serve as user-friendly interfaces to IDs; they are
case- and whitespace-normalized (see the fully_normalize_name() function).
Elements emulate lists for child nodes (element nodes and/or text
nodes), indexing by integer. To get the first child node, use::
element[0]
to iterate over the child nodes (without descending), use::
for child in element:
...
Elements may be constructed using the ``+=`` operator. To add one new
child node to element, do::
element += node
This is equivalent to ``element.append(node)``.
To add a list of multiple child nodes at once, use the same ``+=``
operator::
element += [node1, node2]
This is equivalent to ``element.extend([node1, node2])``.
"""
list_attributes: Final = ('ids', 'classes', 'names', 'dupnames')
"""Tuple of attributes that are initialized to empty lists.
NOTE: Derived classes should update this value when supporting
additional list attributes.
"""
valid_attributes: Final = list_attributes + ('source',)
"""Tuple of attributes that are valid for elements of this class.
NOTE: Derived classes should update this value when supporting
additional attributes.
"""
common_attributes: Final = valid_attributes
"""Tuple of `common attributes`__ known to all Doctree Element classes.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#common-attributes
"""
known_attributes: Final = common_attributes
"""Alias for `common_attributes`. Will be removed in Docutils 2.0."""
basic_attributes: Final = list_attributes
"""Common list attributes. Deprecated. Will be removed in Docutils 2.0."""
local_attributes: Final = ('backrefs',)
"""Obsolete. Will be removed in Docutils 2.0."""
content_model: ClassVar[_ContentModelTuple] = ()
"""Python representation of the element's content model (cf. docutils.dtd).
A tuple of ``(category, quantifier)`` tuples with
:category: class or tuple of classes that are expected at this place(s)
in the list of children
:quantifier: string representation stating how many elements
of `category` are expected. Value is one of:
'.' (exactly one), '?' (zero or one),
'+' (one or more), '*' (zero or more).
NOTE: The default describes the empty element. Derived classes should
update this value to match their content model.
Provisional.
"""
tagname: str | None = None
"""The element generic identifier.
If None, it is set as an instance attribute to the name of the class.
"""
child_text_separator: Final = '\n\n'
"""Separator for child nodes, used by `astext()` method."""
def __init__(self,
rawsource: str = '',
*children,
**attributes: Any,
) -> None:
self.rawsource = rawsource
"""The raw text from which this element was constructed.
For informative and debugging purposes. Don't rely on its value!
NOTE: some elements do not set this value (default '').
"""
if isinstance(rawsource, Element):
raise TypeError('First argument "rawsource" must be a string.')
self.children: list = []
"""List of child nodes (elements and/or `Text`)."""
self.extend(children) # maintain parent info
self.attributes: dict[str, Any] = {}
"""Dictionary of attribute {name: value}."""
# Initialize list attributes.
for att in self.list_attributes:
self.attributes[att] = []
for att, value in attributes.items():
att = att.lower() # normalize attribute name
if att in self.list_attributes:
# lists are mutable; make a copy for this node
self.attributes[att] = value[:]
else:
self.attributes[att] = value
if self.tagname is None:
self.tagname: str = self.__class__.__name__
def _dom_node(self, domroot: minidom.Document) -> minidom.Element:
element = domroot.createElement(self.tagname)
for attribute, value in self.attlist():
if isinstance(value, list):
value = ' '.join(serial_escape('%s' % (v,)) for v in value)
element.setAttribute(attribute, '%s' % value)
for child in self.children:
element.appendChild(child._dom_node(domroot))
return element
def __repr__(self) -> str:
data = ''
for c in self.children:
data += c.shortrepr()
if len(data) > 60:
data = data[:56] + ' ...'
break
if self['names']:
return '<%s "%s": %s>' % (self.tagname,
'; '.join(self['names']), data)
else:
return '<%s: %s>' % (self.tagname, data)
def shortrepr(self) -> str:
if self['names']:
return '<%s "%s"...>' % (self.tagname, '; '.join(self['names']))
else:
return '<%s...>' % self.tagname
def __str__(self) -> str:
if self.children:
return '%s%s%s' % (self.starttag(),
''.join(str(c) for c in self.children),
self.endtag())
else:
return self.emptytag()
def starttag(self, quoteattr: Callable[[str], str] | None = None) -> str:
# the optional arg is used by the docutils_xml writer
if quoteattr is None:
quoteattr = pseudo_quoteattr
parts = [self.tagname]
for name, value in self.attlist():
if value is None: # boolean attribute
parts.append('%s="True"' % name)
continue
if isinstance(value, bool):
value = str(int(value))
if isinstance(value, list):
values = [serial_escape('%s' % (v,)) for v in value]
value = ' '.join(values)
else:
value = str(value)
value = quoteattr(value)
parts.append('%s=%s' % (name, value))
return '<%s>' % ' '.join(parts)
def endtag(self) -> str:
return '</%s>' % self.tagname
def emptytag(self) -> str:
attributes = ('%s="%s"' % (n, v) for n, v in self.attlist())
return '<%s/>' % ' '.join((self.tagname, *attributes))
def __len__(self) -> int:
return len(self.children)
def __contains__(self, key) -> bool:
# Test for both, children and attributes with operator ``in``.
if isinstance(key, str):
return key in self.attributes
return key in self.children
def __getitem__(self, key: str | int | slice) -> Any:
if isinstance(key, str):
return self.attributes[key]
elif isinstance(key, int):
return self.children[key]
elif isinstance(key, slice):
assert key.step in (None, 1), 'cannot handle slice with stride'
return self.children[key.start:key.stop]
else:
raise TypeError('element index must be an integer, a slice, or '
'an attribute name string')
def __setitem__(self, key, item) -> None:
if isinstance(key, str):
self.attributes[str(key)] = item
elif isinstance(key, int):
self.setup_child(item)
self.children[key] = item
elif isinstance(key, slice):
assert key.step in (None, 1), 'cannot handle slice with stride'
for node in item:
self.setup_child(node)
self.children[key.start:key.stop] = item
else:
raise TypeError('element index must be an integer, a slice, or '
'an attribute name string')
def __delitem__(self, key: str | int | slice) -> None:
if isinstance(key, str):
del self.attributes[key]
elif isinstance(key, int):
del self.children[key]
elif isinstance(key, slice):
assert key.step in (None, 1), 'cannot handle slice with stride'
del self.children[key.start:key.stop]
else:
raise TypeError('element index must be an integer, a simple '
'slice, or an attribute name string')
def __add__(self, other: list) -> list:
return self.children + other
def __radd__(self, other: list) -> list:
return other + self.children
def __iadd__(self, other) -> Self:
"""Append a node or a list of nodes to `self.children`."""
if isinstance(other, Node):
self.append(other)
elif other is not None:
self.extend(other)
return self
def astext(self) -> str:
return self.child_text_separator.join(
[child.astext() for child in self.children])
def non_default_attributes(self) -> dict[str, Any]:
atts = {key: value for key, value in self.attributes.items()
if self.is_not_default(key)}
return atts
def attlist(self) -> list[tuple[str, Any]]:
return sorted(self.non_default_attributes().items())
def get(self, key: str, failobj: Any | None = None) -> Any:
return self.attributes.get(key, failobj)
def hasattr(self, attr: str) -> bool:
return attr in self.attributes
def delattr(self, attr: str) -> None:
if attr in self.attributes:
del self.attributes[attr]
def setdefault(self, key: str, failobj: Any | None = None) -> Any:
return self.attributes.setdefault(key, failobj)
has_key = hasattr
def get_language_code(self, fallback: str = '') -> str:
"""Return node's language tag.
Look iteratively in self and parents for a class argument
starting with ``language-`` and return the remainder of it
(which should be a `BCP49` language tag) or the `fallback`.
"""
for cls in self.get('classes', []):
if cls.startswith('language-'):
return cls.removeprefix('language-')
try:
return self.parent.get_language_code(fallback)
except AttributeError:
return fallback
def append(self, item) -> None:
self.setup_child(item)
self.children.append(item)
def extend(self, item: Iterable) -> None:
for node in item:
self.append(node)
def insert(self, index: SupportsIndex, item) -> None:
if isinstance(item, Node):
self.setup_child(item)
self.children.insert(index, item)
elif item is not None:
self[index:index] = item
def pop(self, i: int = -1):
return self.children.pop(i)
def remove(self, item) -> None:
self.children.remove(item)
def index(self, item, start: int = 0, stop: int = sys.maxsize) -> int:
return self.children.index(item, start, stop)
def previous_sibling(self):
"""Return preceding sibling node or ``None``."""
try:
i = self.parent.index(self)
except (AttributeError):
return None
return self.parent[i-1] if i > 0 else None
def section_hierarchy(self) -> list[section]:
"""Return the element's section hierarchy.
Return a list of all <section> elements containing `self`
(including `self` if it is a <section>).
List item ``[i]`` is the parent <section> of level i+1
(1: section, 2: subsection, 3: subsubsection, ...).
The length of the list is the element's section level.
Provisional. May be changed or removed without warning.
"""
sections = []
node = self
while node is not None:
if isinstance(node, section):
sections.append(node)
node = node.parent
sections.reverse()
return sections
def is_not_default(self, key: str) -> bool:
if self[key] == [] and key in self.list_attributes:
return False
else:
return True
def update_basic_atts(self, dict_: Mapping[str, Any] | Element) -> None:
"""
Update basic attributes ('ids', 'names', 'classes',
'dupnames', but not 'source') from node or dictionary `dict_`.
Provisional.
"""
if isinstance(dict_, Node):
dict_ = dict_.attributes
for att in self.basic_attributes:
self.append_attr_list(att, dict_.get(att, []))
def append_attr_list(self, attr: str, values: Iterable[Any]) -> None:
"""
For each element in values, if it does not exist in self[attr], append
it.
NOTE: Requires self[attr] and values to be sequence type and the
former should specifically be a list.
"""
# List Concatenation
for value in values:
if value not in self[attr]:
self[attr].append(value)
def coerce_append_attr_list(
self, attr: str, value: list[Any] | Any) -> None:
"""
First, convert both self[attr] and value to a non-string sequence
type; if either is not already a sequence, convert it to a list of one
element. Then call append_attr_list.
NOTE: self[attr] and value both must not be None.
"""
# List Concatenation
if not isinstance(self.get(attr), list):
self[attr] = [self[attr]]
if not isinstance(value, list):
value = [value]
self.append_attr_list(attr, value)
def replace_attr(self, attr: str, value: Any, force: bool = True) -> None:
"""
If self[attr] does not exist or force is True or omitted, set
self[attr] to value, otherwise do nothing.
"""
# One or the other
if force or self.get(attr) is None:
self[attr] = value
def copy_attr_convert(
self, attr: str, value: Any, replace: bool = True) -> None:
"""
If attr is an attribute of self, set self[attr] to
[self[attr], value], otherwise set self[attr] to value.
NOTE: replace is not used by this function and is kept only for
compatibility with the other copy functions.
"""
if self.get(attr) is not value:
self.coerce_append_attr_list(attr, value)
def copy_attr_coerce(self, attr: str, value: Any, replace: bool) -> None:
"""
If attr is an attribute of self and either self[attr] or value is a
list, convert all non-sequence values to a sequence of 1 element and
then concatenate the two sequence, setting the result to self[attr].
If both self[attr] and value are non-sequences and replace is True or
self[attr] is None, replace self[attr] with value. Otherwise, do
nothing.
"""
if self.get(attr) is not value:
if isinstance(self.get(attr), list) or \
isinstance(value, list):
self.coerce_append_attr_list(attr, value)
else:
self.replace_attr(attr, value, replace)
def copy_attr_concatenate(
self, attr: str, value: Any, replace: bool) -> None:
"""
If attr is an attribute of self and both self[attr] and value are
lists, concatenate the two sequences, setting the result to
self[attr]. If either self[attr] or value are non-sequences and
replace is True or self[attr] is None, replace self[attr] with value.
Otherwise, do nothing.
"""
if self.get(attr) is not value:
if isinstance(self.get(attr), list) and \
isinstance(value, list):
self.append_attr_list(attr, value)
else:
self.replace_attr(attr, value, replace)
def copy_attr_consistent(
self, attr: str, value: Any, replace: bool) -> None:
"""
If replace is True or self[attr] is None, replace self[attr] with
value. Otherwise, do nothing.
"""
if self.get(attr) is not value:
self.replace_attr(attr, value, replace)
def update_all_atts(self,
dict_: Mapping[str, Any] | Element,
update_fun: _UpdateFun = copy_attr_consistent,
replace: bool = True,
and_source: bool = False,
) -> None:
"""
Updates all attributes from node or dictionary `dict_`.
Appends the basic attributes ('ids', 'names', 'classes',
'dupnames', but not 'source') and then, for all other attributes in
dict_, updates the same attribute in self. When attributes with the
same identifier appear in both self and dict_, the two values are
merged based on the value of update_fun. Generally, when replace is
True, the values in self are replaced or merged with the values in
dict_; otherwise, the values in self may be preserved or merged. When
and_source is True, the 'source' attribute is included in the copy.
NOTE: When replace is False, and self contains a 'source' attribute,
'source' is not replaced even when dict_ has a 'source'
attribute, though it may still be merged into a list depending
on the value of update_fun.
NOTE: It is easier to call the update-specific methods then to pass
the update_fun method to this function.
"""
if isinstance(dict_, Node):
dict_ = dict_.attributes
# Include the source attribute when copying?
if and_source:
filter_fun = self.is_not_list_attribute
else:
filter_fun = self.is_not_known_attribute
# Copy the basic attributes
self.update_basic_atts(dict_)
# Grab other attributes in dict_ not in self except the
# (All basic attributes should be copied already)
for att in filter(filter_fun, dict_):
update_fun(self, att, dict_[att], replace)
def update_all_atts_consistantly(self,
dict_: Mapping[str, Any] | Element,
replace: bool = True,
and_source: bool = False,
) -> None:
"""
Updates all attributes from node or dictionary `dict_`.
Appends the basic attributes ('ids', 'names', 'classes',
'dupnames', but not 'source') and then, for all other attributes in
dict_, updates the same attribute in self. When attributes with the
same identifier appear in both self and dict_ and replace is True, the
values in self are replaced with the values in dict_; otherwise, the
values in self are preserved. When and_source is True, the 'source'
attribute is included in the copy.
NOTE: When replace is False, and self contains a 'source' attribute,
'source' is not replaced even when dict_ has a 'source'
attribute, though it may still be merged into a list depending
on the value of update_fun.
"""
self.update_all_atts(dict_, Element.copy_attr_consistent, replace,
and_source)
def update_all_atts_concatenating(self,
dict_: Mapping[str, Any] | Element,
replace: bool = True,
and_source: bool = False,
) -> None:
"""
Updates all attributes from node or dictionary `dict_`.
Appends the basic attributes ('ids', 'names', 'classes',
'dupnames', but not 'source') and then, for all other attributes in
dict_, updates the same attribute in self. When attributes with the
same identifier appear in both self and dict_ whose values aren't each
lists and replace is True, the values in self are replaced with the
values in dict_; if the values from self and dict_ for the given
identifier are both of list type, then the two lists are concatenated
and the result stored in self; otherwise, the values in self are
preserved. When and_source is True, the 'source' attribute is
included in the copy.
NOTE: When replace is False, and self contains a 'source' attribute,
'source' is not replaced even when dict_ has a 'source'
attribute, though it may still be merged into a list depending
on the value of update_fun.
"""
self.update_all_atts(dict_, Element.copy_attr_concatenate, replace,
and_source)
def update_all_atts_coercion(self,
dict_: Mapping[str, Any] | Element,
replace: bool = True,
and_source: bool = False,
) -> None:
"""
Updates all attributes from node or dictionary `dict_`.
Appends the basic attributes ('ids', 'names', 'classes',
'dupnames', but not 'source') and then, for all other attributes in
dict_, updates the same attribute in self. When attributes with the
same identifier appear in both self and dict_ whose values are both
not lists and replace is True, the values in self are replaced with
the values in dict_; if either of the values from self and dict_ for
the given identifier are of list type, then first any non-lists are
converted to 1-element lists and then the two lists are concatenated
and the result stored in self; otherwise, the values in self are
preserved. When and_source is True, the 'source' attribute is
included in the copy.
NOTE: When replace is False, and self contains a 'source' attribute,
'source' is not replaced even when dict_ has a 'source'
attribute, though it may still be merged into a list depending
on the value of update_fun.
"""
self.update_all_atts(dict_, Element.copy_attr_coerce, replace,
and_source)
def update_all_atts_convert(self,
dict_: Mapping[str, Any] | Element,
and_source: bool = False,
) -> None:
"""
Updates all attributes from node or dictionary `dict_`.
Appends the basic attributes ('ids', 'names', 'classes',
'dupnames', but not 'source') and then, for all other attributes in
dict_, updates the same attribute in self. When attributes with the
same identifier appear in both self and dict_ then first any non-lists
are converted to 1-element lists and then the two lists are
concatenated and the result stored in self; otherwise, the values in
self are preserved. When and_source is True, the 'source' attribute
is included in the copy.
NOTE: When replace is False, and self contains a 'source' attribute,
'source' is not replaced even when dict_ has a 'source'
attribute, though it may still be merged into a list depending
on the value of update_fun.
"""
self.update_all_atts(dict_, Element.copy_attr_convert,
and_source=and_source)
def clear(self) -> None:
self.children = []
def replace(self, old, new) -> None:
"""Replace one child `Node` with another child or children."""
index = self.index(old)
if isinstance(new, Node):
self.setup_child(new)
self[index] = new
elif new is not None:
self[index:index+1] = new
def replace_self(self, new) -> None:
"""
Replace `self` node with `new`, where `new` is a node or a
list of nodes.
Provisional: the handling of node attributes will be revised.
"""
update = new
if not isinstance(new, Node):
# `new` is a list; update first child.
try:
update = new[0]
except IndexError:
update = None
if isinstance(update, Element):
update.update_basic_atts(self)
else:
# `update` is a Text node or `new` is an empty list.
# Assert that we aren't losing any attributes.
for att in self.basic_attributes:
assert not self[att], \
'Losing "%s" attribute: %s' % (att, self[att])
self.parent.replace(self, new)
def first_child_matching_class(self,
childclass: type[Element] | type[Text]
| tuple[type[Element] | type[Text], ...],
start: int = 0,
end: int = sys.maxsize,
) -> int | None:
"""
Return the index of the first child whose class exactly matches.
Parameters:
- `childclass`: A `Node` subclass to search for, or a tuple of `Node`
classes. If a tuple, any of the classes may match.
- `start`: Initial index to check.
- `end`: Initial index to *not* check.
"""
if not isinstance(childclass, tuple):
childclass = (childclass,)
for index in range(start, min(len(self), end)):
for c in childclass:
if isinstance(self[index], c):
return index
return None
def first_child_not_matching_class(
self,
childclass: type[Element] | type[Text]
| tuple[type[Element] | type[Text], ...],
start: int = 0,
end: int = sys.maxsize,
) -> int | None:
"""
Return the index of the first child whose class does *not* match.
Parameters:
- `childclass`: A `Node` subclass to skip, or a tuple of `Node`
classes. If a tuple, none of the classes may match.
- `start`: Initial index to check.
- `end`: Initial index to *not* check.
"""
if not isinstance(childclass, tuple):
childclass = (childclass,)
for index in range(start, min(len(self), end)):
for c in childclass:
if isinstance(self.children[index], c):
break
else:
return index
return None
def pformat(self, indent: str = ' ', level: int = 0) -> str:
tagline = '%s%s\n' % (indent*level, self.starttag())
childreps = (c.pformat(indent, level+1) for c in self.children)
return ''.join((tagline, *childreps))
def copy(self) -> Self:
obj = self.__class__(rawsource=self.rawsource, **self.attributes)
obj._document = self._document
obj.source = self.source
obj.line = self.line
return obj
def deepcopy(self) -> Self:
copy = self.copy()
copy.extend([child.deepcopy() for child in self.children])
return copy
def note_referenced_by(self,
name: str | None = None,
id: str | None = None,
) -> None:
"""Note that this Element has been referenced by its name
`name` or id `id`."""
self.referenced = True
# Element.expect_referenced_by_* dictionaries map names or ids
# to nodes whose ``referenced`` attribute is set to true as
# soon as this node is referenced by the given name or id.
# Needed for target propagation.
by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
if by_name:
assert name is not None
by_name.referenced = True
if by_id:
assert id is not None
by_id.referenced = True
@classmethod
def is_not_list_attribute(cls, attr: str) -> bool:
"""
Returns True if and only if the given attribute is NOT one of the
basic list attributes defined for all Elements.
"""
return attr not in cls.list_attributes
@classmethod
def is_not_known_attribute(cls, attr: str) -> bool:
"""
Return True if `attr` is NOT defined for all Element instances.
Provisional. May be removed in Docutils 2.0.
"""
return attr not in cls.common_attributes
def validate_attributes(self) -> None:
"""Normalize and validate element attributes.
Convert string values to expected datatype.
Normalize values.
Raise `ValidationError` for invalid attributes or attribute values.
Provisional.
"""
messages = []
for key, value in self.attributes.items():
if key.startswith('internal:'):
continue # see docs/user/config.html#expose-internals
if key not in self.valid_attributes:
va = '", "'.join(self.valid_attributes)
messages.append(f'Attribute "{key}" not one of "{va}".')
continue
try:
self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)
except (ValueError, TypeError, KeyError) as e:
messages.append(
f'Attribute "{key}" has invalid value "{value}".\n {e}')
if messages:
raise ValidationError(f'Element {self.starttag()} invalid:\n '
+ '\n '.join(messages),
problematic_element=self)
def validate_content(self,
model: _ContentModelTuple | None = None,
elements: Sequence | None = None,
) -> list:
"""Test compliance of `elements` with `model`.
:model: content model description, default `self.content_model`,
:elements: list of doctree elements, default `self.children`.
Return list of children that do not fit in the model or raise
`ValidationError` if the content does not comply with the `model`.
Provisional.
"""
if model is None:
model = self.content_model
if elements is None:
elements = self.children
ichildren = iter(elements)
child = next(ichildren, None)
for category, quantifier in model:
if not isinstance(child, category):
if quantifier in ('.', '+'):
raise ValidationError(self._report_child(child, category),
problematic_element=child)
else: # quantifier in ('?', '*') -> optional child
continue # try same child with next part of content model
else:
# Check additional placement constraints (if applicable):
child.validate_position()
# advance:
if quantifier in ('.', '?'): # go to next element
child = next(ichildren, None)
else: # if quantifier in ('*', '+'): # pass all matching elements
for child in ichildren:
if not isinstance(child, category):
break
try:
child.validate_position()
except AttributeError:
pass
else:
child = None
return [] if child is None else [child, *ichildren]
def _report_child(self,
child,
category: Element | Iterable[Element],
) -> str:
# Return a str reporting a missing child or child of wrong category.
try:
_type = category.__name__
except AttributeError:
_type = '> or <'.join(c.__name__ for c in category)
msg = f'Element {self.starttag()} invalid:\n'
if child is None:
return f'{msg} Missing child of type <{_type}>.'
if isinstance(child, Text):
return (f'{msg} Expecting child of type <{_type}>, '
f'not text data "{child.astext()}".')
return (f'{msg} Expecting child of type <{_type}>, '
f'not {child.starttag()}.')
def validate(self, recursive: bool = True) -> None:
"""Validate Docutils Document Tree element ("doctree").
Raise ValidationError if there are violations.
If `recursive` is True, validate also the element's descendants.
See `The Docutils Document Tree`__ for details of the
Docutils Document Model.
__ https://docutils.sourceforge.io/docs/ref/doctree.html
Provisional (work in progress).
"""
self.validate_attributes()
leftover_childs = self.validate_content()
for child in leftover_childs:
if isinstance(child, Text):
raise ValidationError(f'Element {self.starttag()} invalid:\n'
f' Spurious text: "{child.astext()}".',
problematic_element=self)
else:
raise ValidationError(f'Element {self.starttag()} invalid:\n'
f' Child element {child.starttag()} '
'not allowed at this position.',
problematic_element=child)
if recursive:
for child in self:
child.validate(recursive=recursive)
# ====================
# Element Categories
# ====================
#
# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-hierarchy.
class Root:
"""Element at the root of a document tree."""
class Structural:
"""`Structural elements`__.
__ https://docutils.sourceforge.io/docs/ref/doctree.html
#structural-elements
"""
class SubStructural:
"""`Structural subelements`__ are children of `Structural` elements.
Most Structural elements accept only specific `SubStructural` elements.
__ https://docutils.sourceforge.io/docs/ref/doctree.html
#structural-subelements
"""
class Bibliographic:
"""`Bibliographic Elements`__ (displayed document meta-data).
__ https://docutils.sourceforge.io/docs/ref/doctree.html
#bibliographic-elements
"""
class Body:
"""`Body elements`__.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#body-elements
"""
class Admonition(Body):
"""Admonitions (distinctive and self-contained notices)."""
content_model: Final = ((Body, '+'),) # (%body.elements;)+
class Sequential(Body):
"""List-like body elements."""
class General(Body):
"""Miscellaneous body elements."""
class Special(Body):
"""Special internal body elements."""
class Part:
"""`Body Subelements`__ always occur within specific parent elements.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#body-subelements
"""
class Decorative:
"""Decorative elements (`header` and `footer`).
Children of `decoration`.
"""
content_model: Final = ((Body, '+'),) # (%body.elements;)+
class Inline:
"""Inline elements contain text data and possibly other inline elements.
"""
# Orthogonal categories and Mixins
# ================================
class PreBibliographic:
"""Elements which may occur before Bibliographic Elements."""
class Invisible(Special, PreBibliographic):
"""Internal elements that don't appear in output."""
class Labeled:
"""Contains a `label` as its first element."""
class Resolvable:
resolved: bool = False
class BackLinkable:
"""Mixin for Elements that accept a "backrefs" attribute."""
list_attributes: Final = Element.list_attributes + ('backrefs',)
valid_attributes: Final = Element.valid_attributes + ('backrefs',)
def add_backref(self: Element, refid: str) -> None:
self['backrefs'].append(refid)
class Referential(Resolvable):
"""Elements holding a cross-reference (outgoing hyperlink)."""
class Targetable(Resolvable):
"""Cross-reference targets (incoming hyperlink)."""
referenced: int = 0
indirect_reference_name: str | None = None
"""Holds the whitespace_normalized_name (contains mixed case) of a target.
This was required for MoinMoin <= 1.9 compatibility.
Deprecated, will be removed in Docutils 1.0.
"""
class Titular:
"""Title, sub-title, or informal heading (rubric)."""
class TextElement(Element):
"""
An element which directly contains text.
Its children are all `Text` or `Inline` subclass nodes. You can
check whether an element's context is inline simply by checking whether
its immediate parent is a `TextElement` instance (including subclasses).
This is handy for nodes like `image` that can appear both inline and as
standalone body elements.
If passing children to `__init__()`, make sure to set `text` to
``''`` or some other suitable value.
"""
content_model: Final = (((Text, Inline), '*'),)
# (#PCDATA | %inline.elements;)*
child_text_separator: Final = ''
"""Separator for child nodes, used by `astext()` method."""
def __init__(self,
rawsource: str = '',
text: str = '',
*children,
**attributes: Any,
) -> None:
if text:
textnode = Text(text)
Element.__init__(self, rawsource, textnode, *children,
**attributes)
else:
Element.__init__(self, rawsource, *children, **attributes)
class FixedTextElement(TextElement):
"""An element which directly contains preformatted text."""
valid_attributes: Final = Element.valid_attributes + ('xml:space',)
def __init__(self,
rawsource: str = '',
text: str = '',
*children,
**attributes: Any,
) -> None:
super().__init__(rawsource, text, *children, **attributes)
self.attributes['xml:space'] = 'preserve'
class PureTextElement(TextElement):
"""An element which only contains text, no children."""
content_model: Final = ((Text, '?'),) # (#PCDATA)
# =================================
# Concrete Document Tree Elements
# =================================
#
# See https://docutils.sourceforge.io/docs/ref/doctree.html#element-reference
# Decorative Elements
# ===================
class header(Decorative, Element): pass
class footer(Decorative, Element): pass
# Structural Subelements
# ======================
class title(Titular, PreBibliographic, SubStructural, TextElement):
"""Title of `document`, `section`, `topic` and generic `admonition`.
"""
valid_attributes: Final = Element.valid_attributes + ('auto', 'refid')
class subtitle(Titular, PreBibliographic, SubStructural, TextElement):
"""Sub-title of `document`, `section` and `sidebar`."""
def validate_position(self) -> None:
"""Check position of subtitle: must follow a title."""
if self.parent and self.parent.index(self) == 0:
raise ValidationError(f'Element {self.parent.starttag()} invalid:'
'\n <subtitle> only allowed after <title>.',
problematic_element=self)
class meta(PreBibliographic, SubStructural, Element):
"""Container for "invisible" bibliographic data, or meta-data."""
valid_attributes: Final = Element.valid_attributes + (
'content', 'dir', 'http-equiv', 'lang', 'media', 'name', 'scheme')
class docinfo(SubStructural, Element):
"""Container for displayed document meta-data."""
content_model: Final = ((Bibliographic, '+'),)
# (%bibliographic.elements;)+
class decoration(PreBibliographic, SubStructural, Element):
"""Container for `header` and `footer`."""
content_model: Final = ((header, '?'), # Empty element doesn't make sense,
(footer, '?'), # but is simpler to define.
)
# (header?, footer?)
def get_header(self) -> header:
if not len(self.children) or not isinstance(self.children[0], header):
self.insert(0, header())
return self.children[0]
def get_footer(self) -> footer:
if not len(self.children) or not isinstance(self.children[-1], footer):
self.append(footer())
return self.children[-1]
class transition(SubStructural, Element):
"""Transitions__ are breaks between untitled text parts.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#transition
"""
def validate_position(self) -> None:
"""Check additional constraints on `transition` placement.
A transition may not begin or end a section or document,
nor may two transitions be immediately adjacent.
"""
messages = [f'Element {self.parent.starttag()} invalid:']
predecessor = self.previous_sibling()
if (predecessor is None # index == 0
or isinstance(predecessor, (title, subtitle, meta, decoration))
# A transition following these elements still counts as
# "at the beginning of a document or section".
):
messages.append(
'<transition> may not begin a section or document.')
if self.parent.index(self) == len(self.parent) - 1:
messages.append('<transition> may not end a section or document.')
if isinstance(predecessor, transition):
messages.append(
'<transition> may not directly follow another transition.')
if len(messages) > 1:
raise ValidationError('\n '.join(messages),
problematic_element=self)
# Structural Elements
# ===================
class topic(Structural, Element):
"""
Topics__ are non-recursive, mini-sections.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#topic
"""
content_model: Final = ((title, '?'), (Body, '+'))
# (title?, (%body.elements;)+)
class sidebar(Structural, Element):
"""
Sidebars__ are like parallel documents providing related material.
A sidebar is typically offset by a border and "floats" to the side
of the page
__ https://docutils.sourceforge.io/docs/ref/doctree.html#sidebar
"""
content_model: Final = ((title, '?'),
(subtitle, '?'),
((topic, Body), '+'),
)
# ((title, subtitle?)?, (%body.elements; | topic)+)
# "subtitle only after title" is ensured in `subtitle.validate_position()`.
class section(Structural, Element):
"""Document section__. The main unit of hierarchy.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#section
"""
# recursive content model, see below
section.content_model = ((title, '.'),
(subtitle, '?'),
((Body, topic, sidebar, transition), '*'),
((section, transition), '*'),
)
# (title, subtitle?, %structure.model;)
# Correct transition placement is ensured in `transition.validate_position()`.
# Root Element
# ============
class document(Root, Element):
"""
The document root element.
Do not instantiate this class directly; use
`docutils.utils.new_document()` instead.
"""
valid_attributes: Final = Element.valid_attributes + ('title',)
content_model: Final = ((title, '?'),
(subtitle, '?'),
(meta, '*'),
(decoration, '?'),
(docinfo, '?'),
(transition, '?'),
((Body, topic, sidebar, transition), '*'),
((section, transition), '*'),
)
# ( (title, subtitle?)?,
# meta*,
# decoration?,
# (docinfo, transition?)?,
# %structure.model; )
# Additional restrictions for `subtitle` and `transition` are tested
# with the respective `validate_position()` methods.
def __init__(self,
settings: Values,
reporter: Reporter,
*args,
**kwargs: Any,
) -> None:
Element.__init__(self, *args, **kwargs)
self.current_source: StrPath | None = None
"""Path to or description of the input source being processed."""
self.current_line: int | None = None
"""Line number (1-based) of `current_source`."""
self.settings: Values = settings
"""Runtime settings data record."""
self.reporter: Reporter = reporter
"""System message generator."""
self.indirect_targets: list[target] = []
"""List of indirect target nodes."""
self.substitution_defs: dict[str, substitution_definition] = {}
"""Mapping of substitution names to substitution_definition nodes."""
self.substitution_names: dict[str, str] = {}
"""Mapping of case-normalized to case-sensitive substitution names."""
self.refnames: dict[str, list[Element]] = {}
"""Mapping of names to lists of referencing nodes."""
self.refids: dict[str, list[Element]] = {}
"""Mapping of ids to lists of referencing nodes."""
self.nameids: dict[str, str] = {}
"""Mapping of names to unique id's."""
self.nametypes: dict[str, bool] = {}
"""Mapping of names to hyperlink type. True: explicit, False: implicit.
"""
self.ids: dict[str, Element] = {}
"""Mapping of ids to nodes."""
self.footnote_refs: dict[str, list[footnote_reference]] = {}
"""Mapping of footnote labels to lists of footnote_reference nodes."""
self.citation_refs: dict[str, list[citation_reference]] = {}
"""Mapping of citation labels to lists of citation_reference nodes."""
self.autofootnotes: list[footnote] = []
"""List of auto-numbered footnote nodes."""
self.autofootnote_refs: list[footnote_reference] = []
"""List of auto-numbered footnote_reference nodes."""
self.symbol_footnotes: list[footnote] = []
"""List of symbol footnote nodes."""
self.symbol_footnote_refs: list[footnote_reference] = []
"""List of symbol footnote_reference nodes."""
self.footnotes: list[footnote] = []
"""List of manually-numbered footnote nodes."""
self.citations: list[citation] = []
"""List of citation nodes."""
self.autofootnote_start: int = 1
"""Initial auto-numbered footnote number."""
self.symbol_footnote_start: int = 0
"""Initial symbol footnote symbol index."""
self.id_counter: Counter[int] = Counter()
"""Numbers added to otherwise identical IDs."""
self.parse_messages: list[system_message] = []
"""System messages generated while parsing."""
self.transform_messages: list[system_message] = []
"""System messages generated while applying transforms."""
import docutils.transforms
self.transformer: Transformer = docutils.transforms.Transformer(self)
"""Storage for transforms to be applied to this document."""
self.include_log: list[tuple[StrPath, tuple]] = []
"""The current source's parents (to detect inclusion loops)."""
self.decoration: decoration | None = None
"""Document's `decoration` node."""
self._document: document = self
def __getstate__(self) -> dict[str, Any]:
"""
Return dict with unpicklable references removed.
"""
state = self.__dict__.copy()
state['reporter'] = None
state['transformer'] = None
return state
def asdom(self, dom: ModuleType | None = None) -> minidom.Document:
"""Return a DOM representation of this document."""
if dom is None:
import xml.dom.minidom as dom
domroot = dom.Document()
domroot.appendChild(self._dom_node(domroot))
return domroot
def set_id(self,
node: Element,
msgnode: Element | None = None,
suggested_prefix: str = '',
) -> str:
if node['ids']:
# register and check for duplicates
for id in node['ids']:
self.ids.setdefault(id, node)
if self.ids[id] is not node:
msg = self.reporter.error(f'Duplicate ID: "{id}" used by '
f'{self.ids[id].starttag()} '
f'and {node.starttag()}',
base_node=node)
if msgnode is not None:
msgnode += msg
return id
# generate and set id
id_prefix = self.settings.id_prefix
auto_id_prefix = self.settings.auto_id_prefix
base_id = ''
id = ''
for name in node['names']:
if id_prefix: # allow names starting with numbers
base_id = make_id('x'+name)[1:]
else:
base_id = make_id(name)
# TODO: normalize id-prefix? (would make code simpler)
id = id_prefix + base_id
if base_id and id not in self.ids:
break
else:
if base_id and auto_id_prefix.endswith('%'):
# disambiguate name-derived ID
# TODO: remove second condition after announcing change
prefix = id + '-'
else:
prefix = id_prefix + auto_id_prefix
if prefix.endswith('%'):
prefix = f"""{prefix[:-1]}{suggested_prefix
or make_id(node.tagname)}-"""
while True:
self.id_counter[prefix] += 1
id = f'{prefix}{self.id_counter[prefix]}'
if id not in self.ids:
break
node['ids'].append(id)
self.ids[id] = node
return id
def set_name_id_map(self,
node: Element,
id: str,
msgnode: Element | None = None,
explicit: bool = False,
) -> None:
"""
Update the name/id mappings.
`self.nameids` maps names to IDs. The value ``None`` indicates
that the name is a "dupname" (i.e. there are already at least
two targets with the same name and type).
`self.nametypes` maps names to booleans representing
hyperlink target type (True==explicit, False==implicit).
The following state transition table shows how `self.nameids` items
("id") and `self.nametypes` items ("type") change with new input
(a call to this method), and what actions are performed:
======== ==== ======== ==== ======== ======== ======= ======
Input Old State New State Action Notes
-------- -------------- -------------- ---------------- ------
type id type id type dupname report
======== ==== ======== ==== ======== ======== ======= ======
explicit new explicit
implicit new implicit
explicit old explicit None explicit new,old WARNING [#ex]_
implicit old explicit old explicit new INFO [#ex]_
explicit old implicit new explicit old INFO [#ex]_
implicit old implicit None implicit new,old INFO [#ex]_
explicit None explicit None explicit new WARNING
implicit None explicit None explicit new INFO
explicit None implicit new explicit
implicit None implicit None implicit new INFO
======== ==== ======== ==== ======== ======== ======= ======
.. [#] Do not clear the name-to-id map or invalidate the old target if
both old and new targets refer to identical URIs or reference names.
The new target is invalidated regardless.
Provisional. There will be changes to prefer explicit reference names
as base for an element's ID.
"""
for name in tuple(node['names']):
if name in self.nameids:
self.set_duplicate_name_id(node, id, name, msgnode, explicit)
# attention: modifies node['names']
else:
self.nameids[name] = id
self.nametypes[name] = explicit
def set_duplicate_name_id(self,
node: Element,
id: str,
name: str,
msgnode: Element,
explicit: bool,
) -> None:
old_id = self.nameids[name] # None if name is only dupname
old_explicit = self.nametypes[name]
old_node = self.ids.get(old_id)
level = 0 # system message level: 1-info, 2-warning
self.nametypes[name] = old_explicit or explicit
if old_id is not None and (
'refname' in node and node['refname'] == old_node.get('refname')
or 'refuri' in node and node['refuri'] == old_node.get('refuri')
):
# indirect targets with same reference -> keep old target
level = 1
ref = node.get('refuri') or node.get('refname')
s = f'Duplicate name "{name}" for external target "{ref}".'
dupname(node, name)
elif explicit:
if old_explicit:
level = 2
s = f'Duplicate explicit target name: "{name}".'
dupname(node, name)
if old_id is not None:
dupname(old_node, name)
self.nameids[name] = None
else: # new explicit, old implicit -> override
self.nameids[name] = id
if old_id is not None:
level = 1
s = f'Target name overrides implicit target name "{name}".'
dupname(old_node, name)
else: # new name is implicit
level = 1
s = f'Duplicate implicit target name: "{name}".'
dupname(node, name)
if old_id is not None and not old_explicit:
dupname(old_node, name)
self.nameids[name] = None
if level:
backrefs = [id]
# don't add backref id for empty targets (not shown in output)
if isinstance(node, target) and 'refuri' in node:
backrefs = []
msg = self.reporter.system_message(level, s,
backrefs=backrefs,
base_node=node)
# try appending near to the problem:
if msgnode is not None:
msgnode += msg
try:
msgnode.validate(recursive=False)
except ValidationError:
# detach -> will be handled by `Messages` transform
msgnode.pop()
msg.parent = None
def has_name(self, name: str) -> bool:
return name in self.nameids
# "note" here is an imperative verb: "take note of".
def note_implicit_target(
self, target: Element, msgnode: Element | None = None) -> None:
# TODO: Postpone ID creation. Register reference name instead of ID
# to allow for IDs based on explicit target pointing to the same
# element. https://github.com/sphinx-doc/sphinx/issues/1961
id = self.set_id(target, msgnode)
self.set_name_id_map(target, id, msgnode, explicit=False)
def note_explicit_target(
self, target: Element, msgnode: Element | None = None) -> None:
id = self.set_id(target, msgnode)
self.set_name_id_map(target, id, msgnode, explicit=True)
def note_refname(self, node: Element) -> None:
self.refnames.setdefault(node['refname'], []).append(node)
def note_refid(self, node: Element) -> None:
self.refids.setdefault(node['refid'], []).append(node)
def note_indirect_target(self, target: target) -> None:
self.indirect_targets.append(target)
if target['names']:
self.note_refname(target)
def note_anonymous_target(self, target: target) -> None:
self.set_id(target)
def note_autofootnote(self, footnote: footnote) -> None:
self.set_id(footnote)
self.autofootnotes.append(footnote)
def note_autofootnote_ref(self, ref: footnote_reference) -> None:
self.set_id(ref)
self.autofootnote_refs.append(ref)
def note_symbol_footnote(self, footnote: footnote) -> None:
self.set_id(footnote)
self.symbol_footnotes.append(footnote)
def note_symbol_footnote_ref(self, ref: footnote_reference) -> None:
self.set_id(ref)
self.symbol_footnote_refs.append(ref)
def note_footnote(self, footnote: footnote) -> None:
self.set_id(footnote)
self.footnotes.append(footnote)
def note_footnote_ref(self, ref: footnote_reference) -> None:
self.set_id(ref)
self.footnote_refs.setdefault(ref['refname'], []).append(ref)
self.note_refname(ref)
def note_citation(self, citation: citation) -> None:
self.citations.append(citation)
def note_citation_ref(self, ref: citation_reference) -> None:
self.set_id(ref)
self.citation_refs.setdefault(ref['refname'], []).append(ref)
self.note_refname(ref)
def note_substitution_def(self,
subdef: substitution_definition,
def_name: str,
msgnode: Element | None = None,
) -> None:
name = whitespace_normalize_name(def_name)
if name in self.substitution_defs:
msg = self.reporter.error(
'Duplicate substitution definition name: "%s".' % name,
base_node=subdef)
if msgnode is not None:
msgnode += msg
oldnode = self.substitution_defs[name]
dupname(oldnode, name)
# keep only the last definition:
self.substitution_defs[name] = subdef
# case-insensitive mapping:
self.substitution_names[fully_normalize_name(name)] = name
def note_substitution_ref(self,
subref: substitution_reference,
refname: str,
) -> None:
subref['refname'] = whitespace_normalize_name(refname)
def note_pending(
self, pending: pending, priority: int | None = None) -> None:
self.transformer.add_pending(pending, priority)
def note_parse_message(self, message: system_message) -> None:
self.parse_messages.append(message)
def note_transform_message(self, message: system_message) -> None:
self.transform_messages.append(message)
def note_source(self,
source: StrPath | None,
offset: int | None,
) -> None:
self.current_source = source and os.fspath(source)
if offset is None:
self.current_line = offset
else:
self.current_line = offset + 1
def copy(self) -> Self:
obj = self.__class__(self.settings, self.reporter,
**self.attributes)
obj.source = self.source
obj.line = self.line
return obj
def get_decoration(self) -> decoration:
if not self.decoration:
self.decoration: decoration = decoration()
index = self.first_child_not_matching_class((Titular, meta))
if index is None:
self.append(self.decoration)
else:
self.insert(index, self.decoration)
return self.decoration
# Bibliographic Elements
# ======================
class author(Bibliographic, TextElement): pass
class organization(Bibliographic, TextElement): pass
class address(Bibliographic, FixedTextElement): pass
class contact(Bibliographic, TextElement): pass
class version(Bibliographic, TextElement): pass
class revision(Bibliographic, TextElement): pass
class status(Bibliographic, TextElement): pass
class date(Bibliographic, TextElement): pass
class copyright(Bibliographic, TextElement): pass # NoQA: A001 (builtin name)
class authors(Bibliographic, Element):
"""Container for author information for documents with multiple authors.
"""
content_model: Final = ((author, '+'),
(organization, '?'),
(address, '?'),
(contact, '?'),
)
# (author, organization?, address?, contact?)+
def validate_content(self,
model: _ContentModelTuple | None = None,
elements: Sequence | None = None,
) -> list:
"""Repeatedly test for children matching the content model.
Provisional.
"""
relics = super().validate_content()
while relics:
relics = super().validate_content(elements=relics)
return relics
# Body Elements
# =============
#
# General
# -------
#
# Miscellaneous Body Elements and related Body Subelements (Part)
class paragraph(General, TextElement): pass
class rubric(Titular, General, TextElement): pass
class compound(General, Element):
content_model: Final = ((Body, '+'),) # (%body.elements;)+
class container(General, Element):
content_model: Final = ((Body, '+'),) # (%body.elements;)+
class attribution(Part, TextElement):
"""Visible reference to the source of a `block_quote`."""
class block_quote(General, Element):
"""An extended quotation, set off from the main text."""
content_model: Final = ((Body, '+'), (attribution, '?'))
# ((%body.elements;)+, attribution?)
class reference(General, Inline, Referential, TextElement):
valid_attributes: Final = Element.valid_attributes + (
'anonymous', 'name', 'refid', 'refname', 'refuri')
# Lists
# -----
#
# Lists (Sequential) and related Body Subelements (Part)
class list_item(Part, Element):
content_model: Final = ((Body, '*'),) # (%body.elements;)*
class bullet_list(Sequential, Element):
valid_attributes: Final = Element.valid_attributes + ('bullet',)
content_model: Final = ((list_item, '+'),) # (list_item+)
class enumerated_list(Sequential, Element):
valid_attributes: Final = Element.valid_attributes + (
'enumtype', 'prefix', 'suffix', 'start')
content_model: Final = ((list_item, '+'),) # (list_item+)
class term(Part, TextElement): pass
class classifier(Part, TextElement): pass
class definition(Part, Element):
"""Definition of a `term` in a `definition_list`."""
content_model: Final = ((Body, '+'),) # (%body.elements;)+
class definition_list_item(Part, Element):
content_model: Final = ((term, '.'),
((classifier, term), '*'),
(definition, '.'),
)
# ((term, classifier*)+, definition)
class definition_list(Sequential, Element):
"""List of terms and their definitions.
Can be used for glossaries or dictionaries, to describe or
classify things, for dialogues, or to itemize subtopics.
"""
content_model: Final = ((definition_list_item, '+'),)
# (definition_list_item+)
class field_name(Part, TextElement): pass
class field_body(Part, Element):
content_model: Final = ((Body, '*'),) # (%body.elements;)*
class field(Part, Bibliographic, Element):
content_model: Final = ((field_name, '.'), (field_body, '.'))
# (field_name, field_body)
class field_list(Sequential, Element):
"""List of label & data pairs.
Typically rendered as a two-column list.
Also used for extension syntax or special processing.
"""
content_model: Final = ((field, '+'),) # (field+)
class option_string(Part, PureTextElement):
"""A literal command-line option. Typically monospaced."""
class option_argument(Part, PureTextElement):
"""Placeholder text for option arguments."""
valid_attributes: Final = Element.valid_attributes + ('delimiter',)
def astext(self) -> str:
return self.get('delimiter', ' ') + TextElement.astext(self)
class option(Part, Element):
"""Option element in an `option_list_item`.
Groups an option string with zero or more option argument placeholders.
"""
child_text_separator: Final = ''
content_model: Final = ((option_string, '.'), (option_argument, '*'))
# (option_string, option_argument*)
class option_group(Part, Element):
"""Groups together one or more `option` elements, all synonyms."""
child_text_separator: Final = ', '
content_model: Final = ((option, '+'),) # (option+)
class description(Part, Element):
"""Describtion of a command-line option."""
content_model: Final = ((Body, '+'),) # (%body.elements;)+
class option_list_item(Part, Element):
"""Container for a pair of `option_group` and `description` elements.
"""
child_text_separator: Final = ' '
content_model: Final = ((option_group, '.'), (description, '.'))
# (option_group, description)
class option_list(Sequential, Element):
"""Two-column list of command-line options and descriptions."""
content_model: Final = ((option_list_item, '+'),) # (option_list_item+)
# Pre-formatted text blocks
# -------------------------
class literal_block(General, FixedTextElement): pass
class doctest_block(General, FixedTextElement): pass
class math_block(General, FixedTextElement, PureTextElement):
"""Mathematical notation (display formula)."""
class line(Part, TextElement):
"""Single line of text in a `line_block`."""
indent: str | None = None
class line_block(General, Element):
"""Sequence of lines and nested line blocks.
"""
# recursive content model: (line | line_block)+
line_block.content_model = (((line, line_block), '+'),)
# Admonitions
# -----------
# distinctive and self-contained notices
class attention(Admonition, Element): pass
class caution(Admonition, Element): pass
class danger(Admonition, Element): pass
class error(Admonition, Element): pass
class important(Admonition, Element): pass
class note(Admonition, Element): pass
class tip(Admonition, Element): pass
class hint(Admonition, Element): pass
class warning(Admonition, Element): pass
class admonition(Admonition, Element):
content_model: Final = ((title, '.'), (Body, '+'))
# (title, (%body.elements;)+)
# Footnote and citation
# ---------------------
class label(Part, PureTextElement):
"""Visible identifier for footnotes and citations."""
class footnote(General, BackLinkable, Element, Labeled, Targetable):
"""Labelled note providing additional context (footnote or endnote)."""
valid_attributes: Final = Element.valid_attributes + ('auto', 'backrefs')
content_model: Final = ((label, '?'), (Body, '+'))
# (label?, (%body.elements;)+)
# The label will become required in Docutils 1.0.
class citation(General, BackLinkable, Element, Labeled, Targetable):
content_model: Final = ((label, '.'), (Body, '+'))
# (label, (%body.elements;)+)
# Graphical elements
# ------------------
class image(General, Inline, Element):
"""Reference to an image resource.
May be body element or inline element.
"""
valid_attributes: Final = Element.valid_attributes + (
'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')
def astext(self) -> str:
return self.get('alt', '')
class caption(Part, TextElement): pass
class legend(Part, Element):
"""A wrapper for text accompanying a `figure` that is not the caption."""
content_model: Final = ((Body, '+'),) # (%body.elements;)+
class figure(General, Element):
"""A formal figure, generally an illustration, with a title."""
valid_attributes: Final = Element.valid_attributes + ('align', 'width')
content_model: Final = (((image, reference), '.'),
(caption, '?'),
(legend, '?'),
)
# (image, ((caption, legend?) | legend))
# TODO: According to the DTD, a caption or legend is required
# but rST allows "bare" figures which are formatted differently from
# images (floating in LaTeX, nested in a <figure> in HTML). [bugs: #489]
# Tables
# ------
class entry(Part, Element):
"""An entry in a `row` (a table cell)."""
valid_attributes: Final = Element.valid_attributes + (
'align', 'char', 'charoff', 'colname', 'colsep', 'morecols',
'morerows', 'namest', 'nameend', 'rowsep', 'valign')
content_model: Final = ((Body, '*'),)
# %tbl.entry.mdl -> (%body.elements;)*
class row(Part, Element):
"""Row of table cells."""
valid_attributes: Final = Element.valid_attributes + ('rowsep', 'valign')
content_model: Final = ((entry, '+'),) # (%tbl.row.mdl;) -> entry+
class colspec(Part, Element):
"""Specifications for a column in a `tgroup`."""
valid_attributes: Final = Element.valid_attributes + (
'align', 'char', 'charoff', 'colname', 'colnum',
'colsep', 'colwidth', 'rowsep', 'stub')
def propwidth(self) -> int|float:
"""Return numerical value of "colwidth__" attribute. Default 1.
Raise ValueError if "colwidth" is zero, negative, or a *fixed value*.
Provisional.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
"""
# Move current implementation of validate_colwidth() here
# in Docutils 1.0
return validate_colwidth(self.get('colwidth', ''))
class thead(Part, Element):
"""Row(s) that form the head of a `tgroup`."""
valid_attributes: Final = Element.valid_attributes + ('valign',)
content_model: Final = ((row, '+'),) # (row+)
class tbody(Part, Element):
"""Body of a `tgroup`."""
valid_attributes: Final = Element.valid_attributes + ('valign',)
content_model: Final = ((row, '+'),) # (row+)
class tgroup(Part, Element):
"""A portion of a table. Most tables have just one `tgroup`."""
valid_attributes: Final = Element.valid_attributes + (
'align', 'cols', 'colsep', 'rowsep')
content_model: Final = ((colspec, '*'), (thead, '?'), (tbody, '.'))
# (colspec*, thead?, tbody)
class table(General, Element):
"""A data arrangement with rows and columns."""
valid_attributes: Final = Element.valid_attributes + (
'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')
content_model: Final = ((title, '?'), (tgroup, '+'))
# (title?, tgroup+)
# Special purpose elements
# ------------------------
# Body elements for internal use or special requests.
class comment(Invisible, FixedTextElement, PureTextElement):
"""Author notes, hidden from the output."""
class substitution_definition(Invisible, TextElement):
valid_attributes: Final = Element.valid_attributes + ('ltrim', 'rtrim')
class target(Invisible, Inline, TextElement, Targetable):
valid_attributes: Final = Element.valid_attributes + (
'anonymous', 'refid', 'refname', 'refuri')
class system_message(Special, BackLinkable, PreBibliographic, Element):
"""
System message element.
Do not instantiate this class directly; use
``document.reporter.info/warning/error/severe()`` instead.
"""
valid_attributes: Final = BackLinkable.valid_attributes + (
'level', 'line', 'type')
content_model: Final = ((Body, '+'),) # (%body.elements;)+
def __init__(self,
message: str | None = None,
*children,
**attributes: Any,
) -> None:
rawsource = attributes.pop('rawsource', '')
if message:
p = paragraph('', message)
children = (p,) + children
try:
Element.__init__(self, rawsource, *children, **attributes)
except: # NoQA: E722 (catchall)
print('system_message: children=%r' % (children,))
raise
def astext(self) -> str:
line = self.get('line', '')
return '%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
self['level'], Element.astext(self))
class pending(Invisible, Element):
"""
Placeholder for pending operations.
The "pending" element is used to encapsulate a pending operation: the
operation (transform), the point at which to apply it, and any data it
requires. Only the pending operation's location within the document is
stored in the public document tree (by the "pending" object itself); the
operation and its data are stored in the "pending" object's internal
instance attributes.
For example, say you want a table of contents in your reStructuredText
document. The easiest way to specify where to put it is from within the
document, with a directive::
.. contents::
But the "contents" directive can't do its work until the entire document
has been parsed and possibly transformed to some extent. So the directive
code leaves a placeholder behind that will trigger the second phase of its
processing, something like this::
<pending ...public attributes...> + internal attributes
Use `document.note_pending()` so that the
`docutils.transforms.Transformer` stage of processing can run all pending
transforms.
"""
def __init__(self,
transform: Transform,
details: Mapping[str, Any] | None = None,
rawsource: str = '',
*children,
**attributes: Any,
) -> None:
Element.__init__(self, rawsource, *children, **attributes)
self.transform: Transform = transform
"""The `docutils.transforms.Transform` class implementing the pending
operation."""
self.details: Mapping[str, Any] = details or {}
"""Detail data (dictionary) required by the pending operation."""
def pformat(self, indent: str = ' ', level: int = 0) -> str:
internals = ['.. internal attributes:',
' .transform: %s.%s' % (self.transform.__module__,
self.transform.__name__),
' .details:']
details = sorted(self.details.items())
for key, value in details:
if isinstance(value, Node):
internals.append('%7s%s:' % ('', key))
internals.extend(['%9s%s' % ('', line)
for line in value.pformat().splitlines()])
elif (value
and isinstance(value, list)
and isinstance(value[0], Node)):
internals.append('%7s%s:' % ('', key))
for v in value:
internals.extend(['%9s%s' % ('', line)
for line in v.pformat().splitlines()])
else:
internals.append('%7s%s: %r' % ('', key, value))
return (Element.pformat(self, indent, level)
+ ''.join((' %s%s\n' % (indent * level, line))
for line in internals))
def copy(self) -> Self:
obj = self.__class__(self.transform, self.details, self.rawsource,
**self.attributes)
obj._document = self._document
obj.source = self.source
obj.line = self.line
return obj
class raw(Special, Inline, PreBibliographic,
FixedTextElement, PureTextElement):
"""Raw data that is to be passed untouched to the Writer.
Can be used as Body element or Inline element.
"""
valid_attributes: Final = Element.valid_attributes + (
'format', 'xml:space')
# Inline Elements
# ===============
class abbreviation(Inline, TextElement): pass
class acronym(Inline, TextElement): pass
class emphasis(Inline, TextElement): pass
class generated(Inline, TextElement): pass
class inline(Inline, TextElement): pass
class literal(Inline, TextElement): pass
class strong(Inline, TextElement): pass
class subscript(Inline, TextElement): pass
class superscript(Inline, TextElement): pass
class title_reference(Inline, TextElement): pass
class footnote_reference(Inline, Referential, PureTextElement):
valid_attributes: Final = Element.valid_attributes + (
'auto', 'refid', 'refname')
class citation_reference(Inline, Referential, PureTextElement):
valid_attributes: Final = Element.valid_attributes + ('refid', 'refname')
class substitution_reference(Inline, TextElement):
valid_attributes: Final = Element.valid_attributes + ('refname',)
class math(Inline, PureTextElement):
"""Mathematical notation in running text."""
class problematic(Inline, TextElement):
valid_attributes: Final = Element.valid_attributes + (
'refid', 'refname', 'refuri')
# ========================================
# Auxiliary Classes, Functions, and Data
# ========================================
node_class_names: Sequence[str] = """
Text
abbreviation acronym address admonition attention attribution author
authors
block_quote bullet_list
caption caution citation citation_reference classifier colspec comment
compound contact container copyright
danger date decoration definition definition_list definition_list_item
description docinfo doctest_block document
emphasis entry enumerated_list error
field field_body field_list field_name figure footer
footnote footnote_reference
generated
header hint
image important inline
label legend line line_block list_item literal literal_block
math math_block meta
note
option option_argument option_group option_list option_list_item
option_string organization
paragraph pending problematic
raw reference revision row rubric
section sidebar status strong subscript substitution_definition
substitution_reference subtitle superscript system_message
table target tbody term tgroup thead tip title title_reference topic
transition
version
warning""".split()
"""A list of names of all concrete Node subclasses."""
class NodeVisitor:
"""
"Visitor" pattern [GoF95]_ abstract superclass implementation for
document tree traversals.
Each node class has corresponding methods, doing nothing by
default; override individual methods for specific and useful
behaviour. The `dispatch_visit()` method is called by
`Node.walk()` upon entering a node. `Node.walkabout()` also calls
the `dispatch_departure()` method before exiting a node.
The dispatch methods call "``visit_`` + node class name" or
"``depart_`` + node class name", resp.
This is a base class for visitors whose ``visit_...`` & ``depart_...``
methods must be implemented for *all* compulsory node types encountered
(such as for `docutils.writers.Writer` subclasses).
Unimplemented methods will raise exceptions (except for optional nodes).
For sparse traversals, where only certain node types are of interest, use
subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
processing is desired, subclass `GenericNodeVisitor`.
.. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
1995.
"""
optional: ClassVar[tuple[str, ...]] = ('meta',)
"""
Tuple containing node class names (as strings).
No exception will be raised if writers do not implement visit
or departure functions for these node classes.
Used to ensure transitional compatibility with existing 3rd-party writers.
"""
def __init__(self, document: document, /) -> None:
self.document: document = document
def dispatch_visit(self, node) -> None:
"""
Call self."``visit_`` + node class name" with `node` as
parameter. If the ``visit_...`` method does not exist, call
self.unknown_visit.
"""
node_name = node.__class__.__name__
method = getattr(self, 'visit_' + node_name, self.unknown_visit)
self.document.reporter.debug(
'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
% (method.__name__, node_name))
return method(node)
def dispatch_departure(self, node) -> None:
"""
Call self."``depart_`` + node class name" with `node` as
parameter. If the ``depart_...`` method does not exist, call
self.unknown_departure.
"""
node_name = node.__class__.__name__
method = getattr(self, 'depart_' + node_name, self.unknown_departure)
self.document.reporter.debug(
'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
% (method.__name__, node_name))
return method(node)
def unknown_visit(self, node) -> None:
"""
Called when entering unknown `Node` types.
Raise an exception unless overridden.
"""
if (self.document.settings.strict_visitor
or node.__class__.__name__ not in self.optional):
raise NotImplementedError(
'%s visiting unknown node type: %s'
% (self.__class__, node.__class__.__name__))
def unknown_departure(self, node) -> None:
"""
Called before exiting unknown `Node` types.
Raise exception unless overridden.
"""
if (self.document.settings.strict_visitor
or node.__class__.__name__ not in self.optional):
raise NotImplementedError(
'%s departing unknown node type: %s'
% (self.__class__, node.__class__.__name__))
class SparseNodeVisitor(NodeVisitor):
"""
Base class for sparse traversals, where only certain node types are of
interest. When ``visit_...`` & ``depart_...`` methods should be
implemented for *all* node types (such as for `docutils.writers.Writer`
subclasses), subclass `NodeVisitor` instead.
"""
class GenericNodeVisitor(NodeVisitor):
"""
Generic "Visitor" abstract superclass, for simple traversals.
Unless overridden, each ``visit_...`` method calls `default_visit()`, and
each ``depart_...`` method (when using `Node.walkabout()`) calls
`default_departure()`. `default_visit()` (and `default_departure()`) must
be overridden in subclasses.
Define fully generic visitors by overriding `default_visit()` (and
`default_departure()`) only. Define semi-generic visitors by overriding
individual ``visit_...()`` (and ``depart_...()``) methods also.
`NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
be overridden for default behavior.
"""
def default_visit(self, node):
"""Override for generic, uniform traversals."""
raise NotImplementedError
def default_departure(self, node):
"""Override for generic, uniform traversals."""
raise NotImplementedError
def _call_default_visit(self: GenericNodeVisitor, node) -> None:
self.default_visit(node)
def _call_default_departure(self: GenericNodeVisitor, node) -> None:
self.default_departure(node)
def _nop(self: SparseNodeVisitor, node) -> None:
pass
def _add_node_class_names(names) -> None:
"""Save typing with dynamic assignments:"""
for _name in names:
setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
_add_node_class_names(node_class_names)
class TreeCopyVisitor(GenericNodeVisitor):
"""
Make a complete copy of a tree or branch, including element attributes.
"""
def __init__(self, document: document) -> None:
super().__init__(document)
self.parent_stack: list[list] = []
self.parent: list = []
def get_tree_copy(self):
return self.parent[0]
def default_visit(self, node) -> None:
"""Copy the current node, and make it the new acting parent."""
newnode = node.copy()
self.parent.append(newnode)
self.parent_stack.append(self.parent)
self.parent = newnode
def default_departure(self, node) -> None:
"""Restore the previous acting parent."""
self.parent = self.parent_stack.pop()
# Custom Exceptions
# =================
class ValidationError(ValueError):
"""Invalid Docutils Document Tree Element."""
def __init__(self, msg: str, problematic_element: Element = None) -> None:
super().__init__(msg)
self.problematic_element = problematic_element
class TreePruningException(Exception):
"""
Base class for `NodeVisitor`-related tree pruning exceptions.
Raise subclasses from within ``visit_...`` or ``depart_...`` methods
called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
the tree traversed.
"""
class SkipChildren(TreePruningException):
"""
Do not visit any children of the current node. The current node's
siblings and ``depart_...`` method are not affected.
"""
class SkipSiblings(TreePruningException):
"""
Do not visit any more siblings (to the right) of the current node. The
current node's children and its ``depart_...`` method are not affected.
"""
class SkipNode(TreePruningException):
"""
Do not visit the current node's children, and do not call the current
node's ``depart_...`` method.
"""
class SkipDeparture(TreePruningException):
"""
Do not call the current node's ``depart_...`` method. The current node's
children and siblings are not affected.
"""
class NodeFound(TreePruningException):
"""
Raise to indicate that the target of a search has been found. This
exception must be caught by the client; it is not caught by the traversal
code.
"""
class StopTraversal(TreePruningException):
"""
Stop the traversal altogether. The current node's ``depart_...`` method
is not affected. The parent nodes ``depart_...`` methods are also called
as usual. No other nodes are visited. This is an alternative to
NodeFound that does not cause exception handling to trickle up to the
caller.
"""
# definition moved here from `utils` to avoid circular import dependency
def unescape(text: str,
restore_backslashes: bool = False,
respect_whitespace: bool = False,
) -> str:
"""
Return a string with nulls removed or restored to backslashes.
Backslash-escaped spaces are also removed.
"""
# `respect_whitespace` is ignored (since introduction 2016-12-16)
if restore_backslashes:
return text.replace('\x00', '\\')
else:
for sep in ['\x00 ', '\x00\n', '\x00']:
text = ''.join(text.split(sep))
return text
def make_id(string: str) -> str:
"""
Convert `string` into an identifier and return it.
Docutils identifiers will conform to the regular expression
``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
and "id" attributes) should have no underscores, colons, or periods.
Hyphens may be used.
- The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
followed by any number of letters, digits ([0-9]), hyphens ("-"),
underscores ("_"), colons (":"), and periods (".").
- However the `CSS1 spec`_ defines identifiers based on the "name" token,
a tighter interpretation ("flex" tokenizer notation; "latin1" and
"escape" 8-bit characters have been replaced with entities)::
unicode \\[0-9a-f]{1,4}
latin1 [¡-ÿ]
escape {unicode}|\\[ -~¡-ÿ]
nmchar [-a-z0-9]|{latin1}|{escape}
name {nmchar}+
The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
or periods ("."), therefore "class" and "id" attributes should not contain
these characters. They should be replaced with hyphens ("-"). Combined
with HTML's requirements (the first character must be a letter; no
"unicode", "latin1", or "escape" characters), this results in the
``[a-z](-?[a-z0-9]+)*`` pattern.
.. _HTML 4.01 spec: https://www.w3.org/TR/html401
.. _CSS1 spec: https://www.w3.org/TR/REC-CSS1
"""
id = string.lower()
id = id.translate(_non_id_translate_digraphs)
id = id.translate(_non_id_translate)
# get rid of non-ascii characters.
# 'ascii' lowercase to prevent problems with turkish locale.
id = unicodedata.normalize(
'NFKD', id).encode('ascii', 'ignore').decode('ascii')
# shrink runs of whitespace and replace by hyphen
id = _non_id_chars.sub('-', ' '.join(id.split()))
id = _non_id_at_ends.sub('', id)
return str(id)
_non_id_chars: re.Pattern[str] = re.compile('[^a-z0-9]+')
_non_id_at_ends: re.Pattern[str] = re.compile('^[-0-9]+|-+$')
_non_id_translate: dict[int, str] = {
0x00f8: 'o', # o with stroke
0x0111: 'd', # d with stroke
0x0127: 'h', # h with stroke
0x0131: 'i', # dotless i
0x0142: 'l', # l with stroke
0x0167: 't', # t with stroke
0x0180: 'b', # b with stroke
0x0183: 'b', # b with topbar
0x0188: 'c', # c with hook
0x018c: 'd', # d with topbar
0x0192: 'f', # f with hook
0x0199: 'k', # k with hook
0x019a: 'l', # l with bar
0x019e: 'n', # n with long right leg
0x01a5: 'p', # p with hook
0x01ab: 't', # t with palatal hook
0x01ad: 't', # t with hook
0x01b4: 'y', # y with hook
0x01b6: 'z', # z with stroke
0x01e5: 'g', # g with stroke
0x0225: 'z', # z with hook
0x0234: 'l', # l with curl
0x0235: 'n', # n with curl
0x0236: 't', # t with curl
0x0237: 'j', # dotless j
0x023c: 'c', # c with stroke
0x023f: 's', # s with swash tail
0x0240: 'z', # z with swash tail
0x0247: 'e', # e with stroke
0x0249: 'j', # j with stroke
0x024b: 'q', # q with hook tail
0x024d: 'r', # r with stroke
0x024f: 'y', # y with stroke
}
_non_id_translate_digraphs: dict[int, str] = {
0x00df: 'sz', # ligature sz
0x00e6: 'ae', # ae
0x0153: 'oe', # ligature oe
0x0238: 'db', # db digraph
0x0239: 'qp', # qp digraph
}
def dupname(node: Element, name: str) -> None:
node['dupnames'].append(name)
node['names'].remove(name)
# Assume that `node` is referenced, even though it isn't;
# we don't want to throw unnecessary system_messages.
node.referenced = True
def fully_normalize_name(name: str) -> str:
"""Return a case- and whitespace-normalized name."""
return ' '.join(name.lower().split())
def whitespace_normalize_name(name: str) -> str:
"""Return a whitespace-normalized name."""
return ' '.join(name.split())
def serial_escape(value: str) -> str:
"""Escape string values that are elements of a list, for serialization."""
return value.replace('\\', r'\\').replace(' ', r'\ ')
def split_name_list(s: str) -> list[str]:
r"""Split a string at non-escaped whitespace.
Backslashes escape internal whitespace (cf. `serial_escape()`).
Return list of "names" (after removing escaping backslashes).
>>> split_name_list(r'a\ n\ame two\\ n\\ames'),
['a name', 'two\\', r'n\ames']
Provisional.
"""
s = s.replace('\\', '\x00') # escape with NULL char
s = s.replace('\x00\x00', '\\') # unescape backslashes
s = s.replace('\x00 ', '\x00\x00') # escaped spaces -> NULL NULL
names = s.split(' ')
# restore internal spaces, drop other escaping characters
return [name.replace('\x00\x00', ' ').replace('\x00', '')
for name in names]
def pseudo_quoteattr(value: str) -> str:
"""Quote attributes for pseudo-xml"""
return '"%s"' % value
def parse_measure(measure: str, unit_pattern: str = '[a-zA-Zµ]*|%?'
) -> tuple[int|float, str]:
"""Parse a measure__, return value + unit.
`unit_pattern` is a regular expression describing recognized units.
The default is suited for (but not limited to) CSS3 units and SI units.
It matches runs of ASCII letters or Greek mu, a single percent sign,
or no unit.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
Provisional.
"""
match = re.fullmatch(f'(-?[0-9.]+) *({unit_pattern})', measure)
try:
try:
value = int(match.group(1))
except ValueError:
value = float(match.group(1))
unit = match.group(2)
except (AttributeError, ValueError):
raise ValueError(f'"{measure}" is no valid measure.')
return value, unit
# Methods to validate `Element attribute`__ values.
# Ensure the expected Python `data type`__, normalize, and check for
# restrictions.
#
# The methods can be used to convert `str` values (eg. from an XML
# representation) or to validate an existing document tree or node.
#
# Cf. `Element.validate_attributes()`, `docutils.parsers.docutils_xml`,
# and the `attribute_validating_functions` mapping below.
#
# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types
def create_keyword_validator(*keywords: str) -> Callable[[str], str]:
"""
Return a function that validates a `str` against given `keywords`.
Provisional.
"""
def validate_keywords(value: str) -> str:
if value not in keywords:
allowed = '", \"'.join(keywords)
raise ValueError(f'"{value}" is not one of "{allowed}".')
return value
return validate_keywords
def validate_identifier(value: str) -> str:
"""
Validate identifier key or class name.
Used in `idref.type`__ and for the tokens in `validate_identifier_list()`.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type
Provisional.
"""
if value != make_id(value):
raise ValueError(f'"{value}" is no valid id or class name.')
return value
def validate_identifier_list(value: str | list[str]) -> list[str]:
"""
A (space-separated) list of ids or class names.
`value` may be a `list` or a `str` with space separated
ids or class names (cf. `validate_identifier()`).
Used in `classnames.type`__, `ids.type`__, and `idrefs.type`__.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#classnames-type
__ https://docutils.sourceforge.io/docs/ref/doctree.html#ids-type
__ https://docutils.sourceforge.io/docs/ref/doctree.html#idrefs-type
Provisional.
"""
if isinstance(value, str):
value = value.split()
for token in value:
validate_identifier(token)
return value
def validate_measure(measure: str) -> str:
"""
Validate a measure__ (number + optional unit). Return normalized `str`.
See `parse_measure()` for a function returning a "number + unit" tuple.
The unit may be a run of ASCII letters or Greek mu, a single percent sign,
or the empty string. Case is preserved.
Provisional.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
"""
value, unit = parse_measure(measure)
return f'{value}{unit}'
def validate_colwidth(measure: str|int|float) -> int|float:
"""Validate the "colwidth__" attribute.
Provisional:
`measure` must be a `str` and will be returned as normalized `str`
(with unit "*" for proportional values) in Docutils 1.0.
The default unit will change to "pt" in Docutils 2.0.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
"""
if isinstance(measure, (int, float)):
value = measure
elif measure in ('*', ''): # short for '1*'
value = 1
else:
try:
value, _unit = parse_measure(measure, unit_pattern='[*]?')
except ValueError:
value = -1
if value <= 0:
raise ValueError(f'"{measure}" is no proportional measure.')
return value
def validate_NMTOKEN(value: str) -> str:
"""
Validate a "name token": a `str` of ASCII letters, digits, and [-._].
Provisional.
"""
if not re.fullmatch('[-._A-Za-z0-9]+', value):
raise ValueError(f'"{value}" is no NMTOKEN.')
return value
def validate_NMTOKENS(value: str | list[str]) -> list[str]:
"""
Validate a list of "name tokens".
Provisional.
"""
if isinstance(value, str):
value = value.split()
for token in value:
validate_NMTOKEN(token)
return value
def validate_refname_list(value: str | list[str]) -> list[str]:
"""
Validate a list of `reference names`__.
Reference names may contain all characters;
whitespace is normalized (cf, `whitespace_normalize_name()`).
`value` may be either a `list` of names or a `str` with
space separated names (with internal spaces backslash escaped
and literal backslashes doubled cf. `serial_escape()`).
Return a list of whitespace-normalized, unescaped reference names.
Provisional.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#reference-name
"""
if isinstance(value, str):
value = split_name_list(value)
return [whitespace_normalize_name(name) for name in value]
def validate_yesorno(value: str | int | bool) -> bool:
"""Validate a `%yesorno`__ (flag) value.
The string literal "0" evaluates to ``False``, all other
values are converterd with `bool()`.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#yesorno
"""
if value == "0":
return False
return bool(value)
ATTRIBUTE_VALIDATORS: dict[str, Callable[[str], Any]] = {
'alt': str, # CDATA
'align': str,
'anonymous': validate_yesorno,
'auto': str, # CDATA (only '1' or '*' are used in rST)
'backrefs': validate_identifier_list,
'bullet': str, # CDATA (only '-', '+', or '*' are used in rST)
'classes': validate_identifier_list,
'char': str, # from Exchange Table Model (CALS), currently ignored
'charoff': validate_NMTOKEN, # from CALS, currently ignored
'colname': validate_NMTOKEN, # from CALS, currently ignored
'colnum': int, # from CALS, currently ignored
'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".
'colsep': validate_yesorno,
'colwidth': validate_colwidth, # see docstring for pending changes
'content': str, # <meta>
'delimiter': str,
'dir': create_keyword_validator('ltr', 'rtl', 'auto'), # <meta>
'dupnames': validate_refname_list,
'enumtype': create_keyword_validator('arabic', 'loweralpha', 'lowerroman',
'upperalpha', 'upperroman'),
'format': str, # CDATA (space separated format names)
'frame': create_keyword_validator('top', 'bottom', 'topbot', 'all',
'sides', 'none'), # from CALS, ignored
'height': validate_measure,
'http-equiv': str, # <meta>
'ids': validate_identifier_list,
'lang': str, # <meta>
'level': int,
'line': int,
'ltrim': validate_yesorno,
'loading': create_keyword_validator('embed', 'link', 'lazy'),
'media': str, # <meta>
'morecols': int,
'morerows': int,
'name': whitespace_normalize_name, # in <reference> (deprecated)
# 'name': node_attributes.validate_NMTOKEN, # in <meta>
'names': validate_refname_list,
'namest': validate_NMTOKEN, # start of span, from CALS, currently ignored
'nameend': validate_NMTOKEN, # end of span, from CALS, currently ignored
'pgwide': validate_yesorno, # from CALS, currently ignored
'prefix': str,
'refid': validate_identifier,
'refname': whitespace_normalize_name,
'refuri': str,
'rowsep': validate_yesorno,
'rtrim': validate_yesorno,
'scale': int,
'scheme': str,
'source': str,
'start': int,
'stub': validate_yesorno,
'suffix': str,
'title': str,
'type': validate_NMTOKEN,
'uri': str,
'valign': create_keyword_validator('top', 'middle', 'bottom'), # from CALS
'width': validate_measure,
'xml:space': create_keyword_validator('default', 'preserve'),
}
"""
Mapping of `attribute names`__ to validating functions.
Provisional.
__ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
"""