336 lines
14 KiB
Python
336 lines
14 KiB
Python
# coding=utf-8
|
|
from unittest import TestCase
|
|
|
|
from parsimonious.exceptions import ParseError, IncompleteParseError
|
|
from parsimonious.expressions import (Literal, Regex, Sequence, OneOf, Not,
|
|
Quantifier, Optional, ZeroOrMore, OneOrMore, Expression)
|
|
from parsimonious.grammar import Grammar, rule_grammar
|
|
from parsimonious.nodes import Node
|
|
|
|
|
|
class LengthTests(TestCase):
|
|
"""Tests for returning the right lengths
|
|
|
|
I wrote these before parse tree generation was implemented. They're
|
|
partially redundant with TreeTests.
|
|
|
|
"""
|
|
|
|
def len_eq(self, node, length):
|
|
"""Return whether the match lengths of 2 nodes are equal.
|
|
|
|
Makes tests shorter and lets them omit positional stuff they don't care
|
|
about.
|
|
|
|
"""
|
|
node_length = None if node is None else node.end - node.start
|
|
assert node_length == length
|
|
|
|
def test_regex(self):
|
|
self.len_eq(Literal('hello').match('ehello', 1), 5) # simple
|
|
self.len_eq(Regex('hello*').match('hellooo'), 7) # *
|
|
self.assertRaises(ParseError, Regex('hello*').match, 'goodbye') # no match
|
|
self.len_eq(Regex('hello', ignore_case=True).match('HELLO'), 5)
|
|
|
|
def test_sequence(self):
|
|
self.len_eq(Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobingo1234'), 12) # succeed
|
|
self.assertRaises(ParseError, Sequence(Regex('hi*'), Literal('lo'),
|
|
Regex('.ingo')).match, 'hiiiilobing') # don't
|
|
self.len_eq(Sequence(Regex('hi*')).match('>hiiii', 1), 5) # non-0 pos
|
|
|
|
def test_one_of(self):
|
|
self.len_eq(OneOf(Literal('aaa'), Literal('bb')).match('aaa'), 3) # first alternative
|
|
self.len_eq(OneOf(Literal('aaa'), Literal('bb')).match('bbaaa'), 2) # second
|
|
self.assertRaises(ParseError, OneOf(Literal('aaa'), Literal('bb')).match, 'aa') # no match
|
|
|
|
def test_not(self):
|
|
self.len_eq(Not(Regex('.')).match(''), 0) # match
|
|
self.assertRaises(ParseError, Not(Regex('.')).match, 'Hi') # don't
|
|
|
|
def test_optional(self):
|
|
self.len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('b'), 1) # contained expr fails
|
|
self.len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('ab'), 2) # contained expr succeeds
|
|
self.len_eq(Optional(Literal('a')).match('aa'), 1)
|
|
self.len_eq(Optional(Literal('a')).match('bb'), 0)
|
|
|
|
def test_zero_or_more(self):
|
|
self.len_eq(ZeroOrMore(Literal('b')).match(''), 0) # zero
|
|
self.len_eq(ZeroOrMore(Literal('b')).match('bbb'), 3) # more
|
|
|
|
self.len_eq(Regex('^').match(''), 0) # Validate the next test.
|
|
|
|
# Try to make it loop infinitely using a zero-length contained expression:
|
|
self.len_eq(ZeroOrMore(Regex('^')).match(''), 0)
|
|
|
|
def test_one_or_more(self):
|
|
self.len_eq(OneOrMore(Literal('b')).match('b'), 1) # one
|
|
self.len_eq(OneOrMore(Literal('b')).match('bbb'), 3) # more
|
|
self.len_eq(OneOrMore(Literal('b'), min=3).match('bbb'), 3) # with custom min; success
|
|
self.len_eq(Quantifier(Literal('b'), min=3, max=5).match('bbbb'), 4) # with custom min and max; success
|
|
self.len_eq(Quantifier(Literal('b'), min=3, max=5).match('bbbbbb'), 5) # with custom min and max; success
|
|
self.assertRaises(ParseError, OneOrMore(Literal('b'), min=3).match, 'bb') # with custom min; failure
|
|
self.assertRaises(ParseError, Quantifier(Literal('b'), min=3, max=5).match, 'bb') # with custom min and max; failure
|
|
self.len_eq(OneOrMore(Regex('^')).match('bb'), 0) # attempt infinite loop
|
|
|
|
|
|
class TreeTests(TestCase):
|
|
"""Tests for building the right trees
|
|
|
|
We have only to test successes here; failures (None-returning cases) are
|
|
covered above.
|
|
|
|
"""
|
|
|
|
def test_simple_node(self):
|
|
"""Test that leaf expressions like ``Literal`` make the right nodes."""
|
|
h = Literal('hello', name='greeting')
|
|
self.assertEqual(h.match('hello'), Node(h, 'hello', 0, 5))
|
|
|
|
def test_sequence_nodes(self):
|
|
"""Assert that ``Sequence`` produces nodes with the right children."""
|
|
s = Sequence(Literal('heigh', name='greeting1'),
|
|
Literal('ho', name='greeting2'), name='dwarf')
|
|
text = 'heighho'
|
|
self.assertEqual(s.match(text), Node(s, text, 0, 7, children=[Node(s.members[0], text, 0, 5),
|
|
Node(s.members[1], text, 5, 7)]))
|
|
|
|
def test_one_of(self):
|
|
"""``OneOf`` should return its own node, wrapping the child that succeeds."""
|
|
o = OneOf(Literal('a', name='lit'), name='one_of')
|
|
text = 'aa'
|
|
self.assertEqual(o.match(text), Node(o, text, 0, 1, children=[
|
|
Node(o.members[0], text, 0, 1)]))
|
|
|
|
def test_optional(self):
|
|
"""``Optional`` should return its own node wrapping the succeeded child."""
|
|
expr = Optional(Literal('a', name='lit'), name='opt')
|
|
|
|
text = 'a'
|
|
self.assertEqual(expr.match(text), Node(expr, text, 0, 1, children=[
|
|
Node(expr.members[0], text, 0, 1)]))
|
|
|
|
# Test failure of the Literal inside the Optional; the
|
|
# LengthTests.test_optional is ambiguous for that.
|
|
text = ''
|
|
self.assertEqual(expr.match(text), Node(expr, text, 0, 0))
|
|
|
|
def test_zero_or_more_zero(self):
|
|
"""Test the 0 case of ``ZeroOrMore``; it should still return a node."""
|
|
expr = ZeroOrMore(Literal('a'), name='zero')
|
|
text = ''
|
|
self.assertEqual(expr.match(text), Node(expr, text, 0, 0))
|
|
|
|
def test_one_or_more_one(self):
|
|
"""Test the 1 case of ``OneOrMore``; it should return a node with a child."""
|
|
expr = OneOrMore(Literal('a', name='lit'), name='one')
|
|
text = 'a'
|
|
self.assertEqual(expr.match(text), Node(expr, text, 0, 1, children=[
|
|
Node(expr.members[0], text, 0, 1)]))
|
|
|
|
# Things added since Grammar got implemented are covered in integration
|
|
# tests in test_grammar.
|
|
|
|
|
|
class ParseTests(TestCase):
|
|
"""Tests for the ``parse()`` method"""
|
|
|
|
def test_parse_success(self):
|
|
"""Make sure ``parse()`` returns the tree on success.
|
|
|
|
There's not much more than that to test that we haven't already vetted
|
|
above.
|
|
|
|
"""
|
|
expr = OneOrMore(Literal('a', name='lit'), name='more')
|
|
text = 'aa'
|
|
self.assertEqual(expr.parse(text), Node(expr, text, 0, 2, children=[
|
|
Node(expr.members[0], text, 0, 1),
|
|
Node(expr.members[0], text, 1, 2)]))
|
|
|
|
|
|
class ErrorReportingTests(TestCase):
|
|
"""Tests for reporting parse errors"""
|
|
|
|
def test_inner_rule_succeeding(self):
|
|
"""Make sure ``parse()`` fails and blames the
|
|
rightward-progressing-most named Expression when an Expression isn't
|
|
satisfied.
|
|
|
|
Make sure ParseErrors have nice Unicode representations.
|
|
|
|
"""
|
|
grammar = Grammar("""
|
|
bold_text = open_parens text close_parens
|
|
open_parens = "(("
|
|
text = ~"[a-zA-Z]+"
|
|
close_parens = "))"
|
|
""")
|
|
text = '((fred!!'
|
|
try:
|
|
grammar.parse(text)
|
|
except ParseError as error:
|
|
self.assertEqual(error.pos, 6)
|
|
self.assertEqual(error.expr, grammar['close_parens'])
|
|
self.assertEqual(error.text, text)
|
|
self.assertEqual(str(error), "Rule 'close_parens' didn't match at '!!' (line 1, column 7).")
|
|
|
|
def test_rewinding(self):
|
|
"""Make sure rewinding the stack and trying an alternative (which
|
|
progresses farther) from a higher-level rule can blame an expression
|
|
within the alternative on failure.
|
|
|
|
There's no particular reason I suspect this wouldn't work, but it's a
|
|
more real-world example than the no-alternative cases already tested.
|
|
|
|
"""
|
|
grammar = Grammar("""
|
|
formatted_text = bold_text / weird_text
|
|
bold_text = open_parens text close_parens
|
|
weird_text = open_parens text "!!" bork
|
|
bork = "bork"
|
|
open_parens = "(("
|
|
text = ~"[a-zA-Z]+"
|
|
close_parens = "))"
|
|
""")
|
|
text = '((fred!!'
|
|
try:
|
|
grammar.parse(text)
|
|
except ParseError as error:
|
|
self.assertEqual(error.pos, 8)
|
|
self.assertEqual(error.expr, grammar['bork'])
|
|
self.assertEqual(error.text, text)
|
|
|
|
def test_no_named_rule_succeeding(self):
|
|
"""Make sure ParseErrors have sane printable representations even if we
|
|
never succeeded in matching any named expressions."""
|
|
grammar = Grammar('''bork = "bork"''')
|
|
try:
|
|
grammar.parse('snork')
|
|
except ParseError as error:
|
|
self.assertEqual(error.pos, 0)
|
|
self.assertEqual(error.expr, grammar['bork'])
|
|
self.assertEqual(error.text, 'snork')
|
|
|
|
def test_parse_with_leftovers(self):
|
|
"""Make sure ``parse()`` reports where we started failing to match,
|
|
even if a partial match was successful."""
|
|
grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''')
|
|
try:
|
|
grammar.parse('chitty bangbang')
|
|
except IncompleteParseError as error:
|
|
self.assertEqual(str(
|
|
error), "Rule 'sequence' matched in its entirety, but it didn't consume all the text. The non-matching portion of the text begins with 'bang' (line 1, column 12).")
|
|
|
|
def test_favoring_named_rules(self):
|
|
"""Named rules should be used in error messages in favor of anonymous
|
|
ones, even if those are rightward-progressing-more, and even if the
|
|
failure starts at position 0."""
|
|
grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
|
|
try:
|
|
grammar.parse('burp')
|
|
except ParseError as error:
|
|
self.assertEqual(str(error), "Rule 'starts_with_a' didn't match at 'burp' (line 1, column 1).")
|
|
|
|
def test_line_and_column(self):
|
|
"""Make sure we got the line and column computation right."""
|
|
grammar = Grammar(r"""
|
|
whee_lah = whee "\n" lah "\n"
|
|
whee = "whee"
|
|
lah = "lah"
|
|
""")
|
|
try:
|
|
grammar.parse('whee\nlahGOO')
|
|
except ParseError as error:
|
|
# TODO: Right now, this says "Rule <Literal "\n" at 0x4368250432>
|
|
# didn't match". That's not the greatest. Fix that, then fix this.
|
|
self.assertTrue(str(error).endswith(r"""didn't match at 'GOO' (line 2, column 4)."""))
|
|
|
|
|
|
class RepresentationTests(TestCase):
|
|
"""Tests for str(), unicode(), and repr() of expressions"""
|
|
|
|
def test_unicode_crash(self):
|
|
"""Make sure matched unicode strings don't crash ``__str__``."""
|
|
grammar = Grammar(r'string = ~r"\S+"u')
|
|
str(grammar.parse('中文'))
|
|
|
|
def test_unicode(self):
|
|
"""Smoke-test the conversion of expressions to bits of rules.
|
|
|
|
A slightly more comprehensive test of the actual values is in
|
|
``GrammarTests.test_unicode``.
|
|
|
|
"""
|
|
str(rule_grammar)
|
|
|
|
def test_unicode_keep_parens(self):
|
|
"""Make sure converting an expression to unicode doesn't strip
|
|
parenthesis.
|
|
|
|
"""
|
|
# ZeroOrMore
|
|
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs")* "spam"')),
|
|
"foo = 'bar' ('baz' 'eggs')* 'spam'")
|
|
|
|
# Quantifiers
|
|
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){2,4} "spam"')),
|
|
"foo = 'bar' ('baz' 'eggs'){2,4} 'spam'")
|
|
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){2,} "spam"')),
|
|
"foo = 'bar' ('baz' 'eggs'){2,} 'spam'")
|
|
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){1,} "spam"')),
|
|
"foo = 'bar' ('baz' 'eggs')+ 'spam'")
|
|
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){,4} "spam"')),
|
|
"foo = 'bar' ('baz' 'eggs'){,4} 'spam'")
|
|
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){0,1} "spam"')),
|
|
"foo = 'bar' ('baz' 'eggs')? 'spam'")
|
|
self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){0,} "spam"')),
|
|
"foo = 'bar' ('baz' 'eggs')* 'spam'")
|
|
|
|
# OneOf
|
|
self.assertEqual(str(Grammar('foo = "bar" ("baz" / "eggs") "spam"')),
|
|
"foo = 'bar' ('baz' / 'eggs') 'spam'")
|
|
|
|
# Lookahead
|
|
self.assertEqual(str(Grammar('foo = "bar" &("baz" "eggs") "spam"')),
|
|
"foo = 'bar' &('baz' 'eggs') 'spam'")
|
|
|
|
# Multiple sequences
|
|
self.assertEqual(str(Grammar('foo = ("bar" "baz") / ("baff" "bam")')),
|
|
"foo = ('bar' 'baz') / ('baff' 'bam')")
|
|
|
|
def test_unicode_surrounding_parens(self):
|
|
"""
|
|
Make sure there are no surrounding parens around the entire
|
|
right-hand side of an expression (as they're unnecessary).
|
|
|
|
"""
|
|
self.assertEqual(str(Grammar('foo = ("foo" ("bar" "baz"))')),
|
|
"foo = 'foo' ('bar' 'baz')")
|
|
|
|
|
|
class SlotsTests(TestCase):
|
|
"""Tests to do with __slots__"""
|
|
|
|
def test_subclassing(self):
|
|
"""Make sure a subclass of a __slots__-less class can introduce new
|
|
slots itself.
|
|
|
|
This isn't supposed to work, according to the language docs:
|
|
|
|
When inheriting from a class without __slots__, the __dict__
|
|
attribute of that class will always be accessible, so a __slots__
|
|
definition in the subclass is meaningless.
|
|
|
|
But it does.
|
|
|
|
"""
|
|
class Smoo(Quantifier):
|
|
__slots__ = ['smoo']
|
|
|
|
def __init__(self):
|
|
self.smoo = 'smoo'
|
|
|
|
smoo = Smoo()
|
|
self.assertEqual(smoo.__dict__, {}) # has a __dict__ but with no smoo in it
|
|
self.assertEqual(smoo.smoo, 'smoo') # The smoo attr ended up in a slot.
|