From c9e20291e2f4cfb5d8f1ac630530cc52d3e64256 Mon Sep 17 00:00:00 2001 From: Julian Berman Date: Fri, 19 Jan 2024 15:14:47 -0500 Subject: [PATCH 1/2] perf -> bench --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 2bead0e86..2c611278b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -222,7 +222,7 @@ def docs_style(session): for each in BENCHMARKS.glob("[!_]*.py") ], ) -def perf(session, benchmark): +def bench(session, benchmark): """ Run a performance benchmark. """ From aa53fd004b172d811e26b8f61926135e4481b2a5 Mon Sep 17 00:00:00 2001 From: Julian Berman Date: Fri, 19 Jan 2024 15:45:22 -0500 Subject: [PATCH 2/2] Slightly speed up the contains keyword. Saves some unnecessary repeated validator re-creation while validating arrays. In a quick benchmark (added here) and on my local machine (an M2 Mini) this goes from: ``` baseline: Mean +- std dev: 3.55 us +- 0.04 us beginning: Mean +- std dev: 3.37 ms +- 0.02 ms middle: Mean +- std dev: 3.37 ms +- 0.03 ms end: Mean +- std dev: 3.36 ms +- 0.02 ms invalid: Mean +- std dev: 3.40 ms +- 0.02 ms ``` to: ``` baseline: Mean +- std dev: 4.27 us +- 0.05 us beginning: Mean +- std dev: 2.65 ms +- 0.01 ms middle: Mean +- std dev: 2.66 ms +- 0.02 ms end: Mean +- std dev: 2.67 ms +- 0.02 ms invalid: Mean +- std dev: 2.70 ms +- 0.02 ms ``` on the included example (synthetic of course, but not ridiculously so). (The lack of difference in timing for how far into the array we get before finding a match seems interesting but probably requires a benchmark with a more interesting subschema we're matching on). --- CHANGELOG.rst | 5 +++++ jsonschema/_keywords.py | 4 +++- jsonschema/benchmarks/contains.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 jsonschema/benchmarks/contains.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 592c941b8..b91e8827b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,8 @@ +v4.21.1 +======= + +* Slightly speed up the ``contains`` keyword by removing some unnecessary validator (re-)creation. + v4.21.0 ======= diff --git a/jsonschema/_keywords.py b/jsonschema/_keywords.py index 69d7580b1..f30f95419 100644 --- a/jsonschema/_keywords.py +++ b/jsonschema/_keywords.py @@ -95,8 +95,10 @@ def contains(validator, contains, instance, schema): min_contains = schema.get("minContains", 1) max_contains = schema.get("maxContains", len(instance)) + contains_validator = validator.evolve(schema=contains) + for each in instance: - if validator.evolve(schema=contains).is_valid(each): + if contains_validator.is_valid(each): matches += 1 if matches > max_contains: yield ValidationError( diff --git a/jsonschema/benchmarks/contains.py b/jsonschema/benchmarks/contains.py new file mode 100644 index 000000000..739cd044c --- /dev/null +++ b/jsonschema/benchmarks/contains.py @@ -0,0 +1,28 @@ +""" +A benchmark for validation of the `contains` keyword. +""" + +from pyperf import Runner + +from jsonschema import Draft202012Validator + +schema = { + "type": "array", + "contains": {"const": 37}, +} +validator = Draft202012Validator(schema) + +size = 1000 +beginning = [37] + [0] * (size - 1) +middle = [0] * (size // 2) + [37] + [0] * (size // 2) +end = [0] * (size - 1) + [37] +invalid = [0] * size + + +if __name__ == "__main__": + runner = Runner() + runner.bench_func("baseline", lambda: validator.is_valid([])) + runner.bench_func("beginning", lambda: validator.is_valid(beginning)) + runner.bench_func("middle", lambda: validator.is_valid(middle)) + runner.bench_func("end", lambda: validator.is_valid(end)) + runner.bench_func("invalid", lambda: validator.is_valid(invalid))