CCR/.venv/lib/python3.12/site-packages/parsimonious/tests/benchmarks.py

95 lines
3.1 KiB
Python

"""Benchmarks for Parsimonious
Run these with ``python parsimonious/tests/benchmarks.py``. They don't run during
normal test runs because they're not tests--they don't assert anything. Also,
they're a bit slow.
These differ from the ones in test_benchmarks in that these are meant to be
compared from revision to revision of Parsimonious to make sure we're not
getting slower. test_benchmarks simply makes sure our choices among
implementation alternatives remain valid.
"""
from __future__ import print_function
import gc
from timeit import repeat
from parsimonious.grammar import Grammar
def test_not_really_json_parsing():
"""As a baseline for speed, parse some JSON.
I have no reason to believe that JSON is a particularly representative or
revealing grammar to test with. Also, this is a naive, unoptimized,
incorrect grammar, so don't use it as a basis for comparison with other
parsers. It's just meant to compare across versions of Parsimonious.
"""
father = """{
"id" : 1,
"married" : true,
"name" : "Larry Lopez",
"sons" : null,
"daughters" : [
{
"age" : 26,
"name" : "Sandra"
},
{
"age" : 25,
"name" : "Margaret"
},
{
"age" : 6,
"name" : "Mary"
}
]
}"""
more_fathers = ','.join([father] * 60)
json = '{"fathers" : [' + more_fathers + ']}'
grammar = Grammar(r"""
value = space (string / number / object / array / true_false_null)
space
object = "{" members "}"
members = (pair ("," pair)*)?
pair = string ":" value
array = "[" elements "]"
elements = (value ("," value)*)?
true_false_null = "true" / "false" / "null"
string = space "\"" chars "\"" space
chars = ~"[^\"]*" # TODO implement the real thing
number = (int frac exp) / (int exp) / (int frac) / int
int = "-"? ((digit1to9 digits) / digit)
frac = "." digits
exp = e digits
digits = digit+
e = "e+" / "e-" / "e" / "E+" / "E-" / "E"
digit1to9 = ~"[1-9]"
digit = ~"[0-9]"
space = ~"\s*"
""")
# These number and repetition values seem to keep results within 5% of the
# difference between min and max. We get more consistent results running a
# bunch of single-parse tests and taking the min rather than upping the
# NUMBER and trying to stomp out the outliers with averaging.
NUMBER = 1
REPEAT = 5
total_seconds = min(repeat(lambda: grammar.parse(json),
lambda: gc.enable(), # so we take into account how we treat the GC
repeat=REPEAT,
number=NUMBER))
seconds_each = total_seconds / NUMBER
kb = len(json) / 1024.0
print('Took %.3fs to parse %.1fKB: %.0fKB/s.' % (seconds_each,
kb,
kb / seconds_each))
if __name__ == "__main__":
test_not_really_json_parsing()