CCR/.venv/lib/python3.12/site-packages/parsimonious/nodes.py

326 lines
13 KiB
Python

"""Nodes that make up parse trees
Parsing spits out a tree of these, which you can then tell to walk itself and
spit out a useful value. Or you can walk it yourself; the structural attributes
are public.
"""
# TODO: If this is slow, think about using cElementTree or something.
from inspect import isfunction
from sys import version_info, exc_info
from parsimonious.exceptions import VisitationError, UndefinedLabel
class Node(object):
"""A parse tree node
Consider these immutable once constructed. As a side effect of a
memory-saving strategy in the cache, multiple references to a single
``Node`` might be returned in a single parse tree. So, if you start
messing with one, you'll see surprising parallel changes pop up elsewhere.
My philosophy is that parse trees (and their nodes) should be
representation-agnostic. That is, they shouldn't get all mixed up with what
the final rendered form of a wiki page (or the intermediate representation
of a programming language, or whatever) is going to be: you should be able
to parse once and render several representations from the tree, one after
another.
"""
# I tried making this subclass list, but it got ugly. I had to construct
# invalid ones and patch them up later, and there were other problems.
__slots__ = ['expr', # The expression that generated me
'full_text', # The full text fed to the parser
'start', # The position in the text where that expr started matching
'end', # The position after start where the expr first didn't
# match. [start:end] follow Python slice conventions.
'children'] # List of child parse tree nodes
def __init__(self, expr, full_text, start, end, children=None):
self.expr = expr
self.full_text = full_text
self.start = start
self.end = end
self.children = children or []
@property
def expr_name(self):
# backwards compatibility
return self.expr.name
def __iter__(self):
"""Support looping over my children and doing tuple unpacks on me.
It can be very handy to unpack nodes in arg lists; see
:class:`PegVisitor` for an example.
"""
return iter(self.children)
@property
def text(self):
"""Return the text this node matched."""
return self.full_text[self.start:self.end]
# From here down is just stuff for testing and debugging.
def prettily(self, error=None):
"""Return a unicode, pretty-printed representation of me.
:arg error: The node to highlight because an error occurred there
"""
# TODO: If a Node appears multiple times in the tree, we'll point to
# them all. Whoops.
def indent(text):
return '\n'.join((' ' + line) for line in text.splitlines())
ret = [u'<%s%s matching "%s">%s' % (
self.__class__.__name__,
(' called "%s"' % self.expr_name) if self.expr_name else '',
self.text,
' <-- *** We were here. ***' if error is self else '')]
for n in self:
ret.append(indent(n.prettily(error=error)))
return '\n'.join(ret)
def __str__(self):
"""Return a compact, human-readable representation of me."""
return self.prettily()
def __eq__(self, other):
"""Support by-value deep comparison with other nodes for testing."""
if not isinstance(other, Node):
return NotImplemented
return (self.expr == other.expr and
self.full_text == other.full_text and
self.start == other.start and
self.end == other.end and
self.children == other.children)
def __ne__(self, other):
return not self == other
def __repr__(self, top_level=True):
"""Return a bit of code (though not an expression) that will recreate
me."""
# repr() of unicode flattens everything out to ASCII, so we don't need
# to explicitly encode things afterward.
ret = ["s = %r" % self.full_text] if top_level else []
ret.append("%s(%r, s, %s, %s%s)" % (
self.__class__.__name__,
self.expr,
self.start,
self.end,
(', children=[%s]' %
', '.join([c.__repr__(top_level=False) for c in self.children]))
if self.children else ''))
return '\n'.join(ret)
class RegexNode(Node):
"""Node returned from a ``Regex`` expression
Grants access to the ``re.Match`` object, in case you want to access
capturing groups, etc.
"""
__slots__ = ['match']
class RuleDecoratorMeta(type):
def __new__(metaclass, name, bases, namespace):
def unvisit(name):
"""Remove any leading "visit_" from a method name."""
return name[6:] if name.startswith('visit_') else name
methods = [v for k, v in namespace.items() if
hasattr(v, '_rule') and isfunction(v)]
if methods:
from parsimonious.grammar import Grammar # circular import dodge
methods.sort(key=(lambda x: x.func_code.co_firstlineno)
if version_info[0] < 3 else
(lambda x: x.__code__.co_firstlineno))
# Possible enhancement: once we get the Grammar extensibility story
# solidified, we can have @rules *add* to the default grammar
# rather than pave over it.
namespace['grammar'] = Grammar(
'\n'.join('{name} = {expr}'.format(name=unvisit(m.__name__),
expr=m._rule)
for m in methods))
return super(RuleDecoratorMeta,
metaclass).__new__(metaclass, name, bases, namespace)
class NodeVisitor(object, metaclass=RuleDecoratorMeta):
"""A shell for writing things that turn parse trees into something useful
Performs a depth-first traversal of an AST. Subclass this, add methods for
each expr you care about, instantiate, and call
``visit(top_node_of_parse_tree)``. It'll return the useful stuff. This API
is very similar to that of ``ast.NodeVisitor``.
These could easily all be static methods, but that would add at least as
much weirdness at the call site as the ``()`` for instantiation. And this
way, we support subclasses that require state: options, for example, or a
symbol table constructed from a programming language's AST.
We never transform the parse tree in place, because...
* There are likely multiple references to the same ``Node`` object in a
parse tree, and changes to one reference would surprise you elsewhere.
* It makes it impossible to report errors: you'd end up with the "error"
arrow pointing someplace in a half-transformed mishmash of nodes--and
that's assuming you're even transforming the tree into another tree.
Heaven forbid you're making it into a string or something else.
"""
#: The :term:`default grammar`: the one recommended for use with this
#: visitor. If you populate this, you will be able to call
#: :meth:`NodeVisitor.parse()` as a shortcut.
grammar = None
#: Classes of exceptions you actually intend to raise during visitation
#: and which should propagate out of the visitor. These will not be
#: wrapped in a VisitationError when they arise.
unwrapped_exceptions = ()
# TODO: If we need to optimize this, we can go back to putting subclasses
# in charge of visiting children; they know when not to bother. Or we can
# mark nodes as not descent-worthy in the grammar.
def visit(self, node):
"""Walk a parse tree, transforming it into another representation.
Recursively descend a parse tree, dispatching to the method named after
the rule in the :class:`~parsimonious.grammar.Grammar` that produced
each node. If, for example, a rule was... ::
bold = '<b>'
...the ``visit_bold()`` method would be called. It is your
responsibility to subclass :class:`NodeVisitor` and implement those
methods.
"""
method = getattr(self, 'visit_' + node.expr_name, self.generic_visit)
# Call that method, and show where in the tree it failed if it blows
# up.
try:
return method(node, [self.visit(n) for n in node])
except (VisitationError, UndefinedLabel):
# Don't catch and re-wrap already-wrapped exceptions.
raise
except Exception as exc:
# implentors may define exception classes that should not be
# wrapped.
if isinstance(exc, self.unwrapped_exceptions):
raise
# Catch any exception, and tack on a parse tree so it's easier to
# see where it went wrong.
exc_class = type(exc)
raise VisitationError(exc, exc_class, node) from exc
def generic_visit(self, node, visited_children):
"""Default visitor method
:arg node: The node we're visiting
:arg visited_children: The results of visiting the children of that
node, in a list
I'm not sure there's an implementation of this that makes sense across
all (or even most) use cases, so we leave it to subclasses to implement
for now.
"""
raise NotImplementedError('No visitor method was defined for this expression: %s' %
node.expr.as_rule())
# Convenience methods:
def parse(self, text, pos=0):
"""Parse some text with this Visitor's default grammar and return the
result of visiting it.
``SomeVisitor().parse('some_string')`` is a shortcut for
``SomeVisitor().visit(some_grammar.parse('some_string'))``.
"""
return self._parse_or_match(text, pos, 'parse')
def match(self, text, pos=0):
"""Parse and visit some text with this Visitor's default grammar, but
don't insist on parsing all the way to the end.
``SomeVisitor().match('some_string')`` is a shortcut for
``SomeVisitor().visit(some_grammar.match('some_string'))``.
"""
return self._parse_or_match(text, pos, 'match')
# Internal convenience methods to help you write your own visitors:
def lift_child(self, node, children):
"""Lift the sole child of ``node`` up to replace the node."""
first_child, = children
return first_child
# Private methods:
def _parse_or_match(self, text, pos, method_name):
"""Execute a parse or match on the default grammar, followed by a
visitation.
Raise RuntimeError if there is no default grammar specified.
"""
if not self.grammar:
raise RuntimeError(
"The {cls}.{method}() shortcut won't work because {cls} was "
"never associated with a specific " "grammar. Fill out its "
"`grammar` attribute, and try again.".format(
cls=self.__class__.__name__,
method=method_name))
return self.visit(getattr(self.grammar, method_name)(text, pos=pos))
def rule(rule_string):
"""Decorate a NodeVisitor ``visit_*`` method to tie a grammar rule to it.
The following will arrange for the ``visit_digit`` method to receive the
results of the ``~"[0-9]"`` parse rule::
@rule('~"[0-9]"')
def visit_digit(self, node, visited_children):
...
Notice that there is no "digit = " as part of the rule; that gets inferred
from the method name.
In cases where there is only one kind of visitor interested in a grammar,
using ``@rule`` saves you having to look back and forth between the visitor
and the grammar definition.
On an implementation level, all ``@rule`` rules get stitched together into
a :class:`~parsimonious.Grammar` that becomes the NodeVisitor's
:term:`default grammar`.
Typically, the choice of a default rule for this grammar is simple: whatever
``@rule`` comes first in the class is the default. But the choice may become
surprising if you divide the ``@rule`` calls among subclasses. At the
moment, which method "comes first" is decided simply by comparing line
numbers, so whatever method is on the smallest-numbered line will be the
default. In a future release, this will change to pick the
first ``@rule`` call on the basemost class that has one. That way, a
subclass which does not override the default rule's ``visit_*`` method
won't unintentionally change which rule is the default.
"""
def decorator(method):
method._rule = rule_string # XXX: Maybe register them on a class var instead so we can just override a @rule'd visitor method on a subclass without blowing away the rule string that comes with it.
return method
return decorator