95 lines
3.1 KiB
Python
95 lines
3.1 KiB
Python
"""Benchmarks for Parsimonious
|
|
|
|
Run these with ``python parsimonious/tests/benchmarks.py``. They don't run during
|
|
normal test runs because they're not tests--they don't assert anything. Also,
|
|
they're a bit slow.
|
|
|
|
These differ from the ones in test_benchmarks in that these are meant to be
|
|
compared from revision to revision of Parsimonious to make sure we're not
|
|
getting slower. test_benchmarks simply makes sure our choices among
|
|
implementation alternatives remain valid.
|
|
|
|
"""
|
|
from __future__ import print_function
|
|
import gc
|
|
from timeit import repeat
|
|
|
|
from parsimonious.grammar import Grammar
|
|
|
|
|
|
def test_not_really_json_parsing():
|
|
"""As a baseline for speed, parse some JSON.
|
|
|
|
I have no reason to believe that JSON is a particularly representative or
|
|
revealing grammar to test with. Also, this is a naive, unoptimized,
|
|
incorrect grammar, so don't use it as a basis for comparison with other
|
|
parsers. It's just meant to compare across versions of Parsimonious.
|
|
|
|
"""
|
|
father = """{
|
|
"id" : 1,
|
|
"married" : true,
|
|
"name" : "Larry Lopez",
|
|
"sons" : null,
|
|
"daughters" : [
|
|
{
|
|
"age" : 26,
|
|
"name" : "Sandra"
|
|
},
|
|
{
|
|
"age" : 25,
|
|
"name" : "Margaret"
|
|
},
|
|
{
|
|
"age" : 6,
|
|
"name" : "Mary"
|
|
}
|
|
]
|
|
}"""
|
|
more_fathers = ','.join([father] * 60)
|
|
json = '{"fathers" : [' + more_fathers + ']}'
|
|
grammar = Grammar(r"""
|
|
value = space (string / number / object / array / true_false_null)
|
|
space
|
|
|
|
object = "{" members "}"
|
|
members = (pair ("," pair)*)?
|
|
pair = string ":" value
|
|
array = "[" elements "]"
|
|
elements = (value ("," value)*)?
|
|
true_false_null = "true" / "false" / "null"
|
|
|
|
string = space "\"" chars "\"" space
|
|
chars = ~"[^\"]*" # TODO implement the real thing
|
|
number = (int frac exp) / (int exp) / (int frac) / int
|
|
int = "-"? ((digit1to9 digits) / digit)
|
|
frac = "." digits
|
|
exp = e digits
|
|
digits = digit+
|
|
e = "e+" / "e-" / "e" / "E+" / "E-" / "E"
|
|
|
|
digit1to9 = ~"[1-9]"
|
|
digit = ~"[0-9]"
|
|
space = ~"\s*"
|
|
""")
|
|
|
|
# These number and repetition values seem to keep results within 5% of the
|
|
# difference between min and max. We get more consistent results running a
|
|
# bunch of single-parse tests and taking the min rather than upping the
|
|
# NUMBER and trying to stomp out the outliers with averaging.
|
|
NUMBER = 1
|
|
REPEAT = 5
|
|
total_seconds = min(repeat(lambda: grammar.parse(json),
|
|
lambda: gc.enable(), # so we take into account how we treat the GC
|
|
repeat=REPEAT,
|
|
number=NUMBER))
|
|
seconds_each = total_seconds / NUMBER
|
|
|
|
kb = len(json) / 1024.0
|
|
print('Took %.3fs to parse %.1fKB: %.0fKB/s.' % (seconds_each,
|
|
kb,
|
|
kb / seconds_each))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_not_really_json_parsing() |