CCR/.venv/lib/python3.12/site-packages/parsimonious/tests/test_expressions.py

336 lines
14 KiB
Python

# coding=utf-8
from unittest import TestCase
from parsimonious.exceptions import ParseError, IncompleteParseError
from parsimonious.expressions import (Literal, Regex, Sequence, OneOf, Not,
Quantifier, Optional, ZeroOrMore, OneOrMore, Expression)
from parsimonious.grammar import Grammar, rule_grammar
from parsimonious.nodes import Node
class LengthTests(TestCase):
"""Tests for returning the right lengths
I wrote these before parse tree generation was implemented. They're
partially redundant with TreeTests.
"""
def len_eq(self, node, length):
"""Return whether the match lengths of 2 nodes are equal.
Makes tests shorter and lets them omit positional stuff they don't care
about.
"""
node_length = None if node is None else node.end - node.start
assert node_length == length
def test_regex(self):
self.len_eq(Literal('hello').match('ehello', 1), 5) # simple
self.len_eq(Regex('hello*').match('hellooo'), 7) # *
self.assertRaises(ParseError, Regex('hello*').match, 'goodbye') # no match
self.len_eq(Regex('hello', ignore_case=True).match('HELLO'), 5)
def test_sequence(self):
self.len_eq(Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobingo1234'), 12) # succeed
self.assertRaises(ParseError, Sequence(Regex('hi*'), Literal('lo'),
Regex('.ingo')).match, 'hiiiilobing') # don't
self.len_eq(Sequence(Regex('hi*')).match('>hiiii', 1), 5) # non-0 pos
def test_one_of(self):
self.len_eq(OneOf(Literal('aaa'), Literal('bb')).match('aaa'), 3) # first alternative
self.len_eq(OneOf(Literal('aaa'), Literal('bb')).match('bbaaa'), 2) # second
self.assertRaises(ParseError, OneOf(Literal('aaa'), Literal('bb')).match, 'aa') # no match
def test_not(self):
self.len_eq(Not(Regex('.')).match(''), 0) # match
self.assertRaises(ParseError, Not(Regex('.')).match, 'Hi') # don't
def test_optional(self):
self.len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('b'), 1) # contained expr fails
self.len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('ab'), 2) # contained expr succeeds
self.len_eq(Optional(Literal('a')).match('aa'), 1)
self.len_eq(Optional(Literal('a')).match('bb'), 0)
def test_zero_or_more(self):
self.len_eq(ZeroOrMore(Literal('b')).match(''), 0) # zero
self.len_eq(ZeroOrMore(Literal('b')).match('bbb'), 3) # more
self.len_eq(Regex('^').match(''), 0) # Validate the next test.
# Try to make it loop infinitely using a zero-length contained expression:
self.len_eq(ZeroOrMore(Regex('^')).match(''), 0)
def test_one_or_more(self):
self.len_eq(OneOrMore(Literal('b')).match('b'), 1) # one
self.len_eq(OneOrMore(Literal('b')).match('bbb'), 3) # more
self.len_eq(OneOrMore(Literal('b'), min=3).match('bbb'), 3) # with custom min; success
self.len_eq(Quantifier(Literal('b'), min=3, max=5).match('bbbb'), 4) # with custom min and max; success
self.len_eq(Quantifier(Literal('b'), min=3, max=5).match('bbbbbb'), 5) # with custom min and max; success
self.assertRaises(ParseError, OneOrMore(Literal('b'), min=3).match, 'bb') # with custom min; failure
self.assertRaises(ParseError, Quantifier(Literal('b'), min=3, max=5).match, 'bb') # with custom min and max; failure
self.len_eq(OneOrMore(Regex('^')).match('bb'), 0) # attempt infinite loop
class TreeTests(TestCase):
"""Tests for building the right trees
We have only to test successes here; failures (None-returning cases) are
covered above.
"""
def test_simple_node(self):
"""Test that leaf expressions like ``Literal`` make the right nodes."""
h = Literal('hello', name='greeting')
self.assertEqual(h.match('hello'), Node(h, 'hello', 0, 5))
def test_sequence_nodes(self):
"""Assert that ``Sequence`` produces nodes with the right children."""
s = Sequence(Literal('heigh', name='greeting1'),
Literal('ho', name='greeting2'), name='dwarf')
text = 'heighho'
self.assertEqual(s.match(text), Node(s, text, 0, 7, children=[Node(s.members[0], text, 0, 5),
Node(s.members[1], text, 5, 7)]))
def test_one_of(self):
"""``OneOf`` should return its own node, wrapping the child that succeeds."""
o = OneOf(Literal('a', name='lit'), name='one_of')
text = 'aa'
self.assertEqual(o.match(text), Node(o, text, 0, 1, children=[
Node(o.members[0], text, 0, 1)]))
def test_optional(self):
"""``Optional`` should return its own node wrapping the succeeded child."""
expr = Optional(Literal('a', name='lit'), name='opt')
text = 'a'
self.assertEqual(expr.match(text), Node(expr, text, 0, 1, children=[
Node(expr.members[0], text, 0, 1)]))
# Test failure of the Literal inside the Optional; the
# LengthTests.test_optional is ambiguous for that.
text = ''
self.assertEqual(expr.match(text), Node(expr, text, 0, 0))
def test_zero_or_more_zero(self):
"""Test the 0 case of ``ZeroOrMore``; it should still return a node."""
expr = ZeroOrMore(Literal('a'), name='zero')
text = ''
self.assertEqual(expr.match(text), Node(expr, text, 0, 0))
def test_one_or_more_one(self):
"""Test the 1 case of ``OneOrMore``; it should return a node with a child."""
expr = OneOrMore(Literal('a', name='lit'), name='one')
text = 'a'
self.assertEqual(expr.match(text), Node(expr, text, 0, 1, children=[
Node(expr.members[0], text, 0, 1)]))
# Things added since Grammar got implemented are covered in integration
# tests in test_grammar.
class ParseTests(TestCase):
"""Tests for the ``parse()`` method"""
def test_parse_success(self):
"""Make sure ``parse()`` returns the tree on success.
There's not much more than that to test that we haven't already vetted
above.
"""
expr = OneOrMore(Literal('a', name='lit'), name='more')
text = 'aa'
self.assertEqual(expr.parse(text), Node(expr, text, 0, 2, children=[
Node(expr.members[0], text, 0, 1),
Node(expr.members[0], text, 1, 2)]))
class ErrorReportingTests(TestCase):
"""Tests for reporting parse errors"""
def test_inner_rule_succeeding(self):
"""Make sure ``parse()`` fails and blames the
rightward-progressing-most named Expression when an Expression isn't
satisfied.
Make sure ParseErrors have nice Unicode representations.
"""
grammar = Grammar("""
bold_text = open_parens text close_parens
open_parens = "(("
text = ~"[a-zA-Z]+"
close_parens = "))"
""")
text = '((fred!!'
try:
grammar.parse(text)
except ParseError as error:
self.assertEqual(error.pos, 6)
self.assertEqual(error.expr, grammar['close_parens'])
self.assertEqual(error.text, text)
self.assertEqual(str(error), "Rule 'close_parens' didn't match at '!!' (line 1, column 7).")
def test_rewinding(self):
"""Make sure rewinding the stack and trying an alternative (which
progresses farther) from a higher-level rule can blame an expression
within the alternative on failure.
There's no particular reason I suspect this wouldn't work, but it's a
more real-world example than the no-alternative cases already tested.
"""
grammar = Grammar("""
formatted_text = bold_text / weird_text
bold_text = open_parens text close_parens
weird_text = open_parens text "!!" bork
bork = "bork"
open_parens = "(("
text = ~"[a-zA-Z]+"
close_parens = "))"
""")
text = '((fred!!'
try:
grammar.parse(text)
except ParseError as error:
self.assertEqual(error.pos, 8)
self.assertEqual(error.expr, grammar['bork'])
self.assertEqual(error.text, text)
def test_no_named_rule_succeeding(self):
"""Make sure ParseErrors have sane printable representations even if we
never succeeded in matching any named expressions."""
grammar = Grammar('''bork = "bork"''')
try:
grammar.parse('snork')
except ParseError as error:
self.assertEqual(error.pos, 0)
self.assertEqual(error.expr, grammar['bork'])
self.assertEqual(error.text, 'snork')
def test_parse_with_leftovers(self):
"""Make sure ``parse()`` reports where we started failing to match,
even if a partial match was successful."""
grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''')
try:
grammar.parse('chitty bangbang')
except IncompleteParseError as error:
self.assertEqual(str(
error), "Rule 'sequence' matched in its entirety, but it didn't consume all the text. The non-matching portion of the text begins with 'bang' (line 1, column 12).")
def test_favoring_named_rules(self):
"""Named rules should be used in error messages in favor of anonymous
ones, even if those are rightward-progressing-more, and even if the
failure starts at position 0."""
grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
try:
grammar.parse('burp')
except ParseError as error:
self.assertEqual(str(error), "Rule 'starts_with_a' didn't match at 'burp' (line 1, column 1).")
def test_line_and_column(self):
"""Make sure we got the line and column computation right."""
grammar = Grammar(r"""
whee_lah = whee "\n" lah "\n"
whee = "whee"
lah = "lah"
""")
try:
grammar.parse('whee\nlahGOO')
except ParseError as error:
# TODO: Right now, this says "Rule <Literal "\n" at 0x4368250432>
# didn't match". That's not the greatest. Fix that, then fix this.
self.assertTrue(str(error).endswith(r"""didn't match at 'GOO' (line 2, column 4)."""))
class RepresentationTests(TestCase):
"""Tests for str(), unicode(), and repr() of expressions"""
def test_unicode_crash(self):
"""Make sure matched unicode strings don't crash ``__str__``."""
grammar = Grammar(r'string = ~r"\S+"u')
str(grammar.parse('中文'))
def test_unicode(self):
"""Smoke-test the conversion of expressions to bits of rules.
A slightly more comprehensive test of the actual values is in
``GrammarTests.test_unicode``.
"""
str(rule_grammar)
def test_unicode_keep_parens(self):
"""Make sure converting an expression to unicode doesn't strip
parenthesis.
"""
# ZeroOrMore
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs")* "spam"')),
"foo = 'bar' ('baz' 'eggs')* 'spam'")
# Quantifiers
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){2,4} "spam"')),
"foo = 'bar' ('baz' 'eggs'){2,4} 'spam'")
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){2,} "spam"')),
"foo = 'bar' ('baz' 'eggs'){2,} 'spam'")
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){1,} "spam"')),
"foo = 'bar' ('baz' 'eggs')+ 'spam'")
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){,4} "spam"')),
"foo = 'bar' ('baz' 'eggs'){,4} 'spam'")
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){0,1} "spam"')),
"foo = 'bar' ('baz' 'eggs')? 'spam'")
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){0,} "spam"')),
"foo = 'bar' ('baz' 'eggs')* 'spam'")
# OneOf
self.assertEqual(str(Grammar('foo = "bar" ("baz" / "eggs") "spam"')),
"foo = 'bar' ('baz' / 'eggs') 'spam'")
# Lookahead
self.assertEqual(str(Grammar('foo = "bar" &("baz" "eggs") "spam"')),
"foo = 'bar' &('baz' 'eggs') 'spam'")
# Multiple sequences
self.assertEqual(str(Grammar('foo = ("bar" "baz") / ("baff" "bam")')),
"foo = ('bar' 'baz') / ('baff' 'bam')")
def test_unicode_surrounding_parens(self):
"""
Make sure there are no surrounding parens around the entire
right-hand side of an expression (as they're unnecessary).
"""
self.assertEqual(str(Grammar('foo = ("foo" ("bar" "baz"))')),
"foo = 'foo' ('bar' 'baz')")
class SlotsTests(TestCase):
"""Tests to do with __slots__"""
def test_subclassing(self):
"""Make sure a subclass of a __slots__-less class can introduce new
slots itself.
This isn't supposed to work, according to the language docs:
When inheriting from a class without __slots__, the __dict__
attribute of that class will always be accessible, so a __slots__
definition in the subclass is meaningless.
But it does.
"""
class Smoo(Quantifier):
__slots__ = ['smoo']
def __init__(self):
self.smoo = 'smoo'
smoo = Smoo()
self.assertEqual(smoo.__dict__, {}) # has a __dict__ but with no smoo in it
self.assertEqual(smoo.smoo, 'smoo') # The smoo attr ended up in a slot.