From 20ad6a4539d1b9d429d08e8f9e3977c5c66ad7a3 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 12 Feb 2025 16:27:11 +0100
Subject: [PATCH 01/17] Added microbenchmark for relative performnace factor

---
 tests/test_benchmark.py | 85 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 tests/test_benchmark.py

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
new file mode 100644
index 0000000..9b10714
--- /dev/null
+++ b/tests/test_benchmark.py
@@ -0,0 +1,85 @@
+import numpy as np
+from time import perf_counter
+import pytest
+
+@pytest.mark.skip
+def get_performance_factor(repeats=3):
+    """Run microbenchmarks to indicate how much faster / slower this system is compared to the reference."""
+
+    def cpu_1():
+        """Matrix multiplication"""
+        A = np.random.random((1000, 1000))
+        B = np.random.random((1000, 1000))
+        return np.dot(A, B)
+
+    def cpu_2():
+        """Element-wise arithmetic"""
+        A = np.random.random(10**6)
+        B = np.random.random(10**6)
+        return A + B
+    
+    def cpu_3():
+        """Addition"""
+        N = 10**6
+        return [i + i for i in range(N)]
+    
+    def cpu_4():
+        """Multiplication"""
+        N = 10**6
+        return [i * i for i in range(N)]
+    
+    def cpu_5():
+        """Division"""
+        N = 10**6
+        return [i / i for i in range(1, N+1)]
+
+    def mem_1():
+        """Array copying"""
+        A = np.random.random(10**6)
+        return np.copy(A)
+    
+    def mem_2():
+        """Array slicing"""
+        A = np.random.random(10**6)
+        return A[::2]
+    
+    def mem_3():
+        """Dictionary lookup"""
+        N = 10**3
+        keys = list(range(N))
+        values = list(range(N))
+        lst = list(zip(keys, values))
+        return [next((v for k, v in lst if k == i), None) for i in range(N)]
+    
+    def cache_1():
+        """Sequential array sum"""
+        A = np.random.random(10**6)
+        return np.sum(A)
+
+    def cache_2():
+        """Strided array sum"""
+        A = np.random.random(10**6)
+        return np.sum(A[::2])
+    
+    # run the benchmarks
+    benchmarks = [cpu_1, cpu_2, cpu_3, cpu_4, cpu_5, mem_1, mem_2, mem_3, cache_1, cache_2]
+    raw_data = [list() for _ in range(repeats)]
+    for i in range(repeats):
+        for f in benchmarks:
+            start = perf_counter()
+            f()
+            duration = perf_counter() - start
+            raw_data[i].append(duration)
+
+    # calculate statistics
+    benchmark_data = np.array(raw_data)
+    benchmark_mean = benchmark_data.mean(axis=0)
+    relative_std = (benchmark_data.std(axis=0) / np.abs(benchmark_mean))
+    mean_relative_std = max(np.mean(relative_std), 0.025)
+
+    # calculate the performance factor relative to the reference
+    reference_benchmark_mean = np.array([0.03569697, 0.04690351, 0.1586863, 0.13609187, 0.13637274, 0.01238605, 0.01072952, 0.07484022, 0.01054054, 0.01030138])
+    performance_factor: float = np.mean(benchmark_mean / reference_benchmark_mean)
+    return performance_factor, mean_relative_std
+
+performance_factor, mean_relative_std = get_performance_factor()

From 2cf9f5e668a60119c09f6c66d9dac6dedd76467f Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 12 Feb 2025 16:28:00 +0100
Subject: [PATCH 02/17] Added pytest-benchmark, updated dependencies

---
 poetry.lock    | 155 ++++++++++++++++++++++++++++++-------------------
 pyproject.toml |   1 +
 2 files changed, 97 insertions(+), 59 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index f4d5a42..1332c01 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -181,68 +181,74 @@ development = ["black", "flake8", "mypy", "pytest", "types-colorama"]
 
 [[package]]
 name = "coverage"
-version = "7.6.11"
+version = "7.6.12"
 description = "Code coverage measurement for Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "coverage-7.6.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eafea49da254a8289bed3fab960f808b322eda5577cb17a3733014928bbfbebd"},
-    {file = "coverage-7.6.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5a3f7cbbcb4ad95067a6525f83a6fc78d9cbc1e70f8abaeeaeaa72ef34f48fc3"},
-    {file = "coverage-7.6.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de6b079b39246a7da9a40cfa62d5766bd52b4b7a88cf5a82ec4c45bf6e152306"},
-    {file = "coverage-7.6.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:60d4ad09dfc8c36c4910685faafcb8044c84e4dae302e86c585b3e2e7778726c"},
-    {file = "coverage-7.6.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e433b6e3a834a43dae2889adc125f3fa4c66668df420d8e49bc4ee817dd7a70"},
-    {file = "coverage-7.6.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ac5d92e2cc121a13270697e4cb37e1eb4511ac01d23fe1b6c097facc3b46489e"},
-    {file = "coverage-7.6.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5128f3ba694c0a1bde55fc480090392c336236c3e1a10dad40dc1ab17c7675ff"},
-    {file = "coverage-7.6.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:397489c611b76302dfa1d9ea079e138dddc4af80fc6819d5f5119ec8ca6c0e47"},
-    {file = "coverage-7.6.11-cp310-cp310-win32.whl", hash = "sha256:c7719a5e1dc93883a6b319bc0374ecd46fb6091ed659f3fbe281ab991634b9b0"},
-    {file = "coverage-7.6.11-cp310-cp310-win_amd64.whl", hash = "sha256:c27df03730059118b8a923cfc8b84b7e9976742560af528242f201880879c1da"},
-    {file = "coverage-7.6.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:532fe139691af134aa8b54ed60dd3c806aa81312d93693bd2883c7b61592c840"},
-    {file = "coverage-7.6.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0b0f272901a5172090c0802053fbc503cdc3fa2612720d2669a98a7384a7bec"},
-    {file = "coverage-7.6.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4bda710139ea646890d1c000feb533caff86904a0e0638f85e967c28cb8eec50"},
-    {file = "coverage-7.6.11-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a165b09e7d5f685bf659063334a9a7b1a2d57b531753d3e04bd442b3cfe5845b"},
-    {file = "coverage-7.6.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ff136607689c1c87f43d24203b6d2055b42030f352d5176f9c8b204d4235ef27"},
-    {file = "coverage-7.6.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:050172741de03525290e67f0161ae5f7f387c88fca50d47fceb4724ceaa591d2"},
-    {file = "coverage-7.6.11-cp311-cp311-win32.whl", hash = "sha256:27700d859be68e4fb2e7bf774cf49933dcac6f81a9bc4c13bd41735b8d26a53b"},
-    {file = "coverage-7.6.11-cp311-cp311-win_amd64.whl", hash = "sha256:cd4839813b09ab1dd1be1bbc74f9a7787615f931f83952b6a9af1b2d3f708bf7"},
-    {file = "coverage-7.6.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:dbb1a822fd858d9853333a7c95d4e70dde9a79e65893138ce32c2ec6457d7a36"},
-    {file = "coverage-7.6.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:61c834cbb80946d6ebfddd9b393a4c46bec92fcc0fa069321fcb8049117f76ea"},
-    {file = "coverage-7.6.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a46d56e99a31d858d6912d31ffa4ede6a325c86af13139539beefca10a1234ce"},
-    {file = "coverage-7.6.11-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b48db06f53d1864fea6dbd855e6d51d41c0f06c212c3004511c0bdc6847b297"},
-    {file = "coverage-7.6.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b6ff5be3b1853e0862da9d349fe87f869f68e63a25f7c37ce1130b321140f963"},
-    {file = "coverage-7.6.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:be05bde21d5e6eefbc3a6de6b9bee2b47894b8945342e8663192809c4d1f08ce"},
-    {file = "coverage-7.6.11-cp312-cp312-win32.whl", hash = "sha256:e3b746fa0ffc5b6b8856529de487da8b9aeb4fb394bb58de6502ef45f3434f12"},
-    {file = "coverage-7.6.11-cp312-cp312-win_amd64.whl", hash = "sha256:ac476e6d0128fb7919b3fae726de72b28b5c9644cb4b579e4a523d693187c551"},
-    {file = "coverage-7.6.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c86f4c7a6d1a54a24d804d9684d96e36a62d3ef7c0d7745ae2ea39e3e0293251"},
-    {file = "coverage-7.6.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7eb0504bb307401fd08bc5163a351df301438b3beb88a4fa044681295bbefc67"},
-    {file = "coverage-7.6.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca95d40900cf614e07f00cee8c2fad0371df03ca4d7a80161d84be2ec132b7a4"},
-    {file = "coverage-7.6.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db4b1a69976b1b02acda15937538a1d3fe10b185f9d99920b17a740a0a102e06"},
-    {file = "coverage-7.6.11-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cf96beb05d004e4c51cd846fcdf9eee9eb2681518524b66b2e7610507944c2f"},
-    {file = "coverage-7.6.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:08e5fb93576a6b054d3d326242af5ef93daaac9bb52bc25f12ccbc3fa94227cd"},
-    {file = "coverage-7.6.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:25575cd5a7d2acc46b42711e8aff826027c0e4f80fb38028a74f31ac22aae69d"},
-    {file = "coverage-7.6.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8fa4fffd90ee92f62ff7404b4801b59e8ea8502e19c9bf2d3241ce745b52926c"},
-    {file = "coverage-7.6.11-cp313-cp313-win32.whl", hash = "sha256:0d03c9452d9d1ccfe5d3a5df0427705022a49b356ac212d529762eaea5ef97b4"},
-    {file = "coverage-7.6.11-cp313-cp313-win_amd64.whl", hash = "sha256:fd2fffc8ce8692ce540103dff26279d2af22d424516ddebe2d7e4d6dbb3816b2"},
-    {file = "coverage-7.6.11-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:5e7ac966ab110bd94ee844f2643f196d78fde1cd2450399116d3efdd706e19f5"},
-    {file = "coverage-7.6.11-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ba27a0375c5ef4d2a7712f829265102decd5ff78b96d342ac2fa555742c4f4f"},
-    {file = "coverage-7.6.11-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2778be4f574b39ec9dcd9e5e13644f770351ee0990a0ecd27e364aba95af89b"},
-    {file = "coverage-7.6.11-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5edc16712187139ab635a2e644cc41fc239bc6d245b16124045743130455c652"},
-    {file = "coverage-7.6.11-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df6ff122a0a10a30121d9f0cb3fbd03a6fe05861e4ec47adb9f25e9245aabc19"},
-    {file = "coverage-7.6.11-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ff562952f15eff27247a4c4b03e45ce8a82e3fb197de6a7c54080f9d4ba07845"},
-    {file = "coverage-7.6.11-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:4f21e3617f48d683f30cf2a6c8b739c838e600cb1454fe6b2eb486ac2bce8fbd"},
-    {file = "coverage-7.6.11-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6d60577673ba48d8ae8e362e61fd4ad1a640293ffe8991d11c86f195479100b7"},
-    {file = "coverage-7.6.11-cp313-cp313t-win32.whl", hash = "sha256:13100f98497086b359bf56fc035a762c674de8ef526daa389ac8932cb9bff1e0"},
-    {file = "coverage-7.6.11-cp313-cp313t-win_amd64.whl", hash = "sha256:2c81e53782043b323bd34c7de711ed9b4673414eb517eaf35af92185b873839c"},
-    {file = "coverage-7.6.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ff52b4e2ac0080c96e506819586c4b16cdbf46724bda90d308a7330a73cc8521"},
-    {file = "coverage-7.6.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f4679fcc9eb9004fdd1b00231ef1ec7167168071bebc4d66327e28c1979b4449"},
-    {file = "coverage-7.6.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:90de4e9ca4489e823138bd13098af9ac8028cc029f33f60098b5c08c675c7bda"},
-    {file = "coverage-7.6.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c96a142057d83ee993eaf71629ca3fb952cda8afa9a70af4132950c2bd3deb9"},
-    {file = "coverage-7.6.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:476f29a258b9cd153f2be5bf5f119d670d2806363595263917bddc167d6e5cce"},
-    {file = "coverage-7.6.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:09d03f48d9025b8a6a116cddcb6c7b8ce80e4fb4c31dd2e124a7c377036ad58e"},
-    {file = "coverage-7.6.11-cp39-cp39-win32.whl", hash = "sha256:bb35ae9f134fbd9cf7302a9654d5a1e597c974202678082dcc569eb39a8cde03"},
-    {file = "coverage-7.6.11-cp39-cp39-win_amd64.whl", hash = "sha256:f382004fa4c93c01016d9226b9d696a08c53f6818b7ad59b4e96cb67e863353a"},
-    {file = "coverage-7.6.11-pp39.pp310-none-any.whl", hash = "sha256:adc2d941c0381edfcf3897f94b9f41b1e504902fab78a04b1677f2f72afead4b"},
-    {file = "coverage-7.6.11-py3-none-any.whl", hash = "sha256:f0f334ae844675420164175bf32b04e18a81fe57ad8eb7e0cfd4689d681ffed7"},
-    {file = "coverage-7.6.11.tar.gz", hash = "sha256:e642e6a46a04e992ebfdabed79e46f478ec60e2c528e1e1a074d63800eda4286"},
+    {file = "coverage-7.6.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:704c8c8c6ce6569286ae9622e534b4f5b9759b6f2cd643f1c1a61f666d534fe8"},
+    {file = "coverage-7.6.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ad7525bf0241e5502168ae9c643a2f6c219fa0a283001cee4cf23a9b7da75879"},
+    {file = "coverage-7.6.12-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06097c7abfa611c91edb9e6920264e5be1d6ceb374efb4986f38b09eed4cb2fe"},
+    {file = "coverage-7.6.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:220fa6c0ad7d9caef57f2c8771918324563ef0d8272c94974717c3909664e674"},
+    {file = "coverage-7.6.12-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3688b99604a24492bcfe1c106278c45586eb819bf66a654d8a9a1433022fb2eb"},
+    {file = "coverage-7.6.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d1a987778b9c71da2fc8948e6f2656da6ef68f59298b7e9786849634c35d2c3c"},
+    {file = "coverage-7.6.12-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:cec6b9ce3bd2b7853d4a4563801292bfee40b030c05a3d29555fd2a8ee9bd68c"},
+    {file = "coverage-7.6.12-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ace9048de91293e467b44bce0f0381345078389814ff6e18dbac8fdbf896360e"},
+    {file = "coverage-7.6.12-cp310-cp310-win32.whl", hash = "sha256:ea31689f05043d520113e0552f039603c4dd71fa4c287b64cb3606140c66f425"},
+    {file = "coverage-7.6.12-cp310-cp310-win_amd64.whl", hash = "sha256:676f92141e3c5492d2a1596d52287d0d963df21bf5e55c8b03075a60e1ddf8aa"},
+    {file = "coverage-7.6.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e18aafdfb3e9ec0d261c942d35bd7c28d031c5855dadb491d2723ba54f4c3015"},
+    {file = "coverage-7.6.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:66fe626fd7aa5982cdebad23e49e78ef7dbb3e3c2a5960a2b53632f1f703ea45"},
+    {file = "coverage-7.6.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ef01d70198431719af0b1f5dcbefc557d44a190e749004042927b2a3fed0702"},
+    {file = "coverage-7.6.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07e92ae5a289a4bc4c0aae710c0948d3c7892e20fd3588224ebe242039573bf0"},
+    {file = "coverage-7.6.12-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e695df2c58ce526eeab11a2e915448d3eb76f75dffe338ea613c1201b33bab2f"},
+    {file = "coverage-7.6.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d74c08e9aaef995f8c4ef6d202dbd219c318450fe2a76da624f2ebb9c8ec5d9f"},
+    {file = "coverage-7.6.12-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e995b3b76ccedc27fe4f477b349b7d64597e53a43fc2961db9d3fbace085d69d"},
+    {file = "coverage-7.6.12-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b1f097878d74fe51e1ddd1be62d8e3682748875b461232cf4b52ddc6e6db0bba"},
+    {file = "coverage-7.6.12-cp311-cp311-win32.whl", hash = "sha256:1f7ffa05da41754e20512202c866d0ebfc440bba3b0ed15133070e20bf5aeb5f"},
+    {file = "coverage-7.6.12-cp311-cp311-win_amd64.whl", hash = "sha256:e216c5c45f89ef8971373fd1c5d8d1164b81f7f5f06bbf23c37e7908d19e8558"},
+    {file = "coverage-7.6.12-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b172f8e030e8ef247b3104902cc671e20df80163b60a203653150d2fc204d1ad"},
+    {file = "coverage-7.6.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:641dfe0ab73deb7069fb972d4d9725bf11c239c309ce694dd50b1473c0f641c3"},
+    {file = "coverage-7.6.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e549f54ac5f301e8e04c569dfdb907f7be71b06b88b5063ce9d6953d2d58574"},
+    {file = "coverage-7.6.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:959244a17184515f8c52dcb65fb662808767c0bd233c1d8a166e7cf74c9ea985"},
+    {file = "coverage-7.6.12-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bda1c5f347550c359f841d6614fb8ca42ae5cb0b74d39f8a1e204815ebe25750"},
+    {file = "coverage-7.6.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1ceeb90c3eda1f2d8c4c578c14167dbd8c674ecd7d38e45647543f19839dd6ea"},
+    {file = "coverage-7.6.12-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f16f44025c06792e0fb09571ae454bcc7a3ec75eeb3c36b025eccf501b1a4c3"},
+    {file = "coverage-7.6.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b076e625396e787448d27a411aefff867db2bffac8ed04e8f7056b07024eed5a"},
+    {file = "coverage-7.6.12-cp312-cp312-win32.whl", hash = "sha256:00b2086892cf06c7c2d74983c9595dc511acca00665480b3ddff749ec4fb2a95"},
+    {file = "coverage-7.6.12-cp312-cp312-win_amd64.whl", hash = "sha256:7ae6eabf519bc7871ce117fb18bf14e0e343eeb96c377667e3e5dd12095e0288"},
+    {file = "coverage-7.6.12-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:488c27b3db0ebee97a830e6b5a3ea930c4a6e2c07f27a5e67e1b3532e76b9ef1"},
+    {file = "coverage-7.6.12-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5d1095bbee1851269f79fd8e0c9b5544e4c00c0c24965e66d8cba2eb5bb535fd"},
+    {file = "coverage-7.6.12-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0533adc29adf6a69c1baa88c3d7dbcaadcffa21afbed3ca7a225a440e4744bf9"},
+    {file = "coverage-7.6.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53c56358d470fa507a2b6e67a68fd002364d23c83741dbc4c2e0680d80ca227e"},
+    {file = "coverage-7.6.12-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64cbb1a3027c79ca6310bf101014614f6e6e18c226474606cf725238cf5bc2d4"},
+    {file = "coverage-7.6.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:79cac3390bfa9836bb795be377395f28410811c9066bc4eefd8015258a7578c6"},
+    {file = "coverage-7.6.12-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:9b148068e881faa26d878ff63e79650e208e95cf1c22bd3f77c3ca7b1d9821a3"},
+    {file = "coverage-7.6.12-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8bec2ac5da793c2685ce5319ca9bcf4eee683b8a1679051f8e6ec04c4f2fd7dc"},
+    {file = "coverage-7.6.12-cp313-cp313-win32.whl", hash = "sha256:200e10beb6ddd7c3ded322a4186313d5ca9e63e33d8fab4faa67ef46d3460af3"},
+    {file = "coverage-7.6.12-cp313-cp313-win_amd64.whl", hash = "sha256:2b996819ced9f7dbb812c701485d58f261bef08f9b85304d41219b1496b591ef"},
+    {file = "coverage-7.6.12-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:299cf973a7abff87a30609879c10df0b3bfc33d021e1adabc29138a48888841e"},
+    {file = "coverage-7.6.12-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4b467a8c56974bf06e543e69ad803c6865249d7a5ccf6980457ed2bc50312703"},
+    {file = "coverage-7.6.12-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2458f275944db8129f95d91aee32c828a408481ecde3b30af31d552c2ce284a0"},
+    {file = "coverage-7.6.12-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a9d8be07fb0832636a0f72b80d2a652fe665e80e720301fb22b191c3434d924"},
+    {file = "coverage-7.6.12-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14d47376a4f445e9743f6c83291e60adb1b127607a3618e3185bbc8091f0467b"},
+    {file = "coverage-7.6.12-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b95574d06aa9d2bd6e5cc35a5bbe35696342c96760b69dc4287dbd5abd4ad51d"},
+    {file = "coverage-7.6.12-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:ecea0c38c9079570163d663c0433a9af4094a60aafdca491c6a3d248c7432827"},
+    {file = "coverage-7.6.12-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2251fabcfee0a55a8578a9d29cecfee5f2de02f11530e7d5c5a05859aa85aee9"},
+    {file = "coverage-7.6.12-cp313-cp313t-win32.whl", hash = "sha256:eb5507795caabd9b2ae3f1adc95f67b1104971c22c624bb354232d65c4fc90b3"},
+    {file = "coverage-7.6.12-cp313-cp313t-win_amd64.whl", hash = "sha256:f60a297c3987c6c02ffb29effc70eadcbb412fe76947d394a1091a3615948e2f"},
+    {file = "coverage-7.6.12-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e7575ab65ca8399c8c4f9a7d61bbd2d204c8b8e447aab9d355682205c9dd948d"},
+    {file = "coverage-7.6.12-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8161d9fbc7e9fe2326de89cd0abb9f3599bccc1287db0aba285cb68d204ce929"},
+    {file = "coverage-7.6.12-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a1e465f398c713f1b212400b4e79a09829cd42aebd360362cd89c5bdc44eb87"},
+    {file = "coverage-7.6.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f25d8b92a4e31ff1bd873654ec367ae811b3a943583e05432ea29264782dc32c"},
+    {file = "coverage-7.6.12-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a936309a65cc5ca80fa9f20a442ff9e2d06927ec9a4f54bcba9c14c066323f2"},
+    {file = "coverage-7.6.12-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:aa6f302a3a0b5f240ee201297fff0bbfe2fa0d415a94aeb257d8b461032389bd"},
+    {file = "coverage-7.6.12-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f973643ef532d4f9be71dd88cf7588936685fdb576d93a79fe9f65bc337d9d73"},
+    {file = "coverage-7.6.12-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:78f5243bb6b1060aed6213d5107744c19f9571ec76d54c99cc15938eb69e0e86"},
+    {file = "coverage-7.6.12-cp39-cp39-win32.whl", hash = "sha256:69e62c5034291c845fc4df7f8155e8544178b6c774f97a99e2734b05eb5bed31"},
+    {file = "coverage-7.6.12-cp39-cp39-win_amd64.whl", hash = "sha256:b01a840ecc25dce235ae4c1b6a0daefb2a203dba0e6e980637ee9c2f6ee0df57"},
+    {file = "coverage-7.6.12-pp39.pp310-none-any.whl", hash = "sha256:7e39e845c4d764208e7b8f6a21c541ade741e2c41afabdfa1caa28687a3c98cf"},
+    {file = "coverage-7.6.12-py3-none-any.whl", hash = "sha256:eb8668cfbc279a536c633137deeb9435d2962caec279c3f8cf8b91fff6ff8953"},
+    {file = "coverage-7.6.12.tar.gz", hash = "sha256:48cfc4641d95d34766ad41d9573cc0f22a48aa88d22657a1fe01dca0dbae4de2"},
 ]
 
 [package.dependencies]
@@ -650,6 +656,17 @@ files = [
 dev = ["pre-commit", "tox"]
 testing = ["pytest", "pytest-benchmark"]
 
+[[package]]
+name = "py-cpuinfo"
+version = "9.0.0"
+description = "Get CPU info with pure Python"
+optional = false
+python-versions = "*"
+files = [
+    {file = "py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690"},
+    {file = "py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5"},
+]
+
 [[package]]
 name = "pygments"
 version = "2.19.1"
@@ -686,6 +703,26 @@ tomli = {version = ">=1", markers = "python_version < \"3.11\""}
 [package.extras]
 dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
 
+[[package]]
+name = "pytest-benchmark"
+version = "5.1.0"
+description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pytest-benchmark-5.1.0.tar.gz", hash = "sha256:9ea661cdc292e8231f7cd4c10b0319e56a2118e2c09d9f50e1b3d150d2aca105"},
+    {file = "pytest_benchmark-5.1.0-py3-none-any.whl", hash = "sha256:922de2dfa3033c227c96da942d1878191afa135a29485fb942e85dff1c592c89"},
+]
+
+[package.dependencies]
+py-cpuinfo = "*"
+pytest = ">=8.1"
+
+[package.extras]
+aspect = ["aspectlib"]
+elasticsearch = ["elasticsearch"]
+histogram = ["pygal", "pygaljs", "setuptools"]
+
 [[package]]
 name = "pytest-cov"
 version = "6.0.0"
@@ -1071,4 +1108,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.15"
-content-hash = "ac25afc7dd34ee5361620f30a1602d9112d674d4c8f00a2aaac85f34f826e3f9"
+content-hash = "ea292d92d9b2e84e7875683b8144dea8124bc26c86212c9b2f75086467afa803"
diff --git a/pyproject.toml b/pyproject.toml
index 03ef635..2df1344 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,6 +65,7 @@ sphinx-pyproject = "^0.3.0"
 optional = true
 [tool.poetry.group.test.dependencies]
 pytest = "^8.3.3"
+pytest-benchmark = "^5.1.0"
 pytest-cov = "^6.0.0"
 nox = "^2024.10.9"
 ruff = "^0.7.2"

From 6d555cce4d30477419c1bafcd036fcb1c7e7c63b Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Wed, 12 Feb 2025 18:29:26 +0100
Subject: [PATCH 03/17] Added MicroHH kernel and comparison to reference

---
 tests/test_benchmark.py | 56 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 9b10714..ea2baaa 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -1,6 +1,24 @@
 import numpy as np
 from time import perf_counter
 import pytest
+from constraint import Problem
+
+
+# reference times (using A4000 on DAS6)
+reference_microbenchmark_mean = np.array([0.03569697, 0.04690351, 0.1586863, 0.13609187, 0.13637274, 0.01238605, 0.01072952, 0.07484022, 0.01054054, 0.01030138])    # noqa E501
+reference_results = {
+    "microhh": 1.1565620
+}
+# device properties (for A4000 on DAS6 using get_opencl_device_info.cpp)
+dev = {
+    "max_threads": 1024,
+    "max_threads_per_sm": 1024, 
+    "max_threads_per_block": 1536,
+    "max_shared_memory_per_block": 49152,
+    "max_shared_memory": 102400,
+    "max_wi_size": [1024, 1024, 64],
+    "max_wg_size": 1024,
+}
 
 @pytest.mark.skip
 def get_performance_factor(repeats=3):
@@ -78,8 +96,42 @@ def cache_2():
     mean_relative_std = max(np.mean(relative_std), 0.025)
 
     # calculate the performance factor relative to the reference
-    reference_benchmark_mean = np.array([0.03569697, 0.04690351, 0.1586863, 0.13609187, 0.13637274, 0.01238605, 0.01072952, 0.07484022, 0.01054054, 0.01030138])
-    performance_factor: float = np.mean(benchmark_mean / reference_benchmark_mean)
+    performance_factor: float = np.mean(benchmark_mean / reference_microbenchmark_mean)
     return performance_factor, mean_relative_std
 
 performance_factor, mean_relative_std = get_performance_factor()
+
+
+def test_microhh(benchmark):
+    cta_padding = 0  # default argument
+
+    # setup the tunable parameters
+    problem = Problem()
+    problem.addVariable("STATIC_STRIDES", [0])
+    problem.addVariable("TILING_STRATEGY", [0])
+    problem.addVariable("REWRITE_INTERP", [0])
+    problem.addVariable("BLOCK_SIZE_X", [1, 2, 4, 8, 16, 32, 128, 256, 512, 1024])
+    problem.addVariable("BLOCK_SIZE_Y", [1, 2, 4, 8, 16, 32])
+    problem.addVariable("BLOCK_SIZE_Z", [1, 2, 4])
+    problem.addVariable("TILING_FACTOR_X", [1, 2, 4, 8])
+    problem.addVariable("TILING_FACTOR_Y", [1, 2, 4])
+    problem.addVariable("TILING_FACTOR_Z", [1, 2, 4])
+    problem.addVariable("LOOP_UNROLL_FACTOR_X",[1, 2, 4, 8])
+    problem.addVariable("LOOP_UNROLL_FACTOR_Y", [1, 2, 4])
+    problem.addVariable("LOOP_UNROLL_FACTOR_Z", [1, 2, 4])
+    problem.addVariable("BLOCKS_PER_MP", [0, 1, 2, 3, 4])
+
+    # setup the restrictions
+    problem.addConstraint([
+        f"BLOCK_SIZE_X * BLOCK_SIZE_Y * BLOCK_SIZE_Z * BLOCKS_PER_MP <= {dev['max_threads_per_sm']}",
+        f"32 <= BLOCK_SIZE_X * BLOCK_SIZE_Y * BLOCK_SIZE_Z <= {dev['max_threads_per_block']}",
+        "LOOP_UNROLL_FACTOR_X == 0 or TILING_FACTOR_X % LOOP_UNROLL_FACTOR_X == 0",
+        "LOOP_UNROLL_FACTOR_Y == 0 or TILING_FACTOR_Y % LOOP_UNROLL_FACTOR_Y == 0",
+        "LOOP_UNROLL_FACTOR_Z == 0 or TILING_FACTOR_Z % LOOP_UNROLL_FACTOR_Z == 0",
+        f"BLOCK_SIZE_X * TILING_FACTOR_X > {cta_padding}",
+        f"BLOCK_SIZE_Y * TILING_FACTOR_Y > {cta_padding}",
+        f"BLOCK_SIZE_Z * TILING_FACTOR_Z > {cta_padding}",
+    ])
+
+    benchmark(problem.getSolutions)
+    assert benchmark.stats.stats.mean <= reference_results["microhh"] * (performance_factor + mean_relative_std)

From 545c60a61f5a47fd72ecabf83aad3c97535fcbf6 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 08:21:14 +0100
Subject: [PATCH 04/17] Implemented support for saving benchmark data and
 passing OS name as argument

---
 .github/workflows/build-test-python-package.yml | 2 +-
 .gitignore                                      | 1 +
 noxfile.py                                      | 8 +++++++-
 tests/test_benchmark.py                         | 2 +-
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-test-python-package.yml b/.github/workflows/build-test-python-package.yml
index 62da4f6..b7e5ffa 100644
--- a/.github/workflows/build-test-python-package.yml
+++ b/.github/workflows/build-test-python-package.yml
@@ -25,7 +25,7 @@ jobs:
             - uses: actions/checkout@v4
             - uses: fjwillemsen/setup-nox2@v3.0.0
             - run: |
-                  nox
+                  nox -- ${{ runner.os }}
             - name: Report to Coveralls
               uses: coverallsapp/github-action@v2
               with:
diff --git a/.gitignore b/.gitignore
index c8e2636..01fce1b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@ pip-log.txt
 pip-delete-this-directory.txt
 
 # Unit test / coverage reports
+.benchmarks
 htmlcov/
 .tox/
 .coverage
diff --git a/noxfile.py b/noxfile.py
index 0cab3a0..9dc6481 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -35,13 +35,19 @@ def lint(session: Session) -> None:
 # do not forget check / set the versions with `pyenv global`, or `pyenv local` in case of virtual environment
 def tests(session: Session) -> None:
     """Run the tests for the specified Python versions."""
+    # get command line arguments
+    if session.posargs:
+        os_name = session.posargs[0]
+    else:
+        os_name = 'local'
+
     # install the dev-dependencies and build the package
     session.install("poetry")
     session.run("poetry", "install", "--with", "dev,test", external=True)
     # session.poetry.installroot(distribution_format="sdist")
 
     # run pytest on the package with C-extensions, disable required coverage percentage
-    session.run("pytest", "--no-cov")
+    session.run("pytest", "--no-cov", "--benchmark-json" f".benchmarks/benchmark_{os_name}_{session.python}.json")
 
     # for the last Python version session:
     if session.python == python_versions_to_test[-1]:
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index ea2baaa..6fec275 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -134,4 +134,4 @@ def test_microhh(benchmark):
     ])
 
     benchmark(problem.getSolutions)
-    assert benchmark.stats.stats.mean <= reference_results["microhh"] * (performance_factor + mean_relative_std)
+    assert benchmark.stats.stats.mean - benchmark.stats.stats.std <= reference_results["microhh"] * (performance_factor + mean_relative_std)

From 0ea091f3e06c0b0d220c3bde1bf80c78d65ce5c8 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 09:46:05 +0100
Subject: [PATCH 05/17] Made Numpy-free implementation of microbenchmarks to
 avoid adding Numpy dependency

---
 tests/test_benchmark.py | 81 ++++++++++++++++++++++++++++-------------
 1 file changed, 56 insertions(+), 25 deletions(-)

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 6fec275..85116da 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -1,11 +1,12 @@
-import numpy as np
+from random import random
 from time import perf_counter
 import pytest
 from constraint import Problem
+from math import sqrt
 
 
 # reference times (using A4000 on DAS6)
-reference_microbenchmark_mean = np.array([0.03569697, 0.04690351, 0.1586863, 0.13609187, 0.13637274, 0.01238605, 0.01072952, 0.07484022, 0.01054054, 0.01030138])    # noqa E501
+reference_microbenchmark_mean = [0.03569697, 0.04690351, 0.1586863, 0.13609187, 0.13637274, 0.01238605, 0.01072952, 0.07484022, 0.01054054, 0.01030138]    # noqa E501
 reference_results = {
     "microhh": 1.1565620
 }
@@ -22,19 +23,22 @@
 
 @pytest.mark.skip
 def get_performance_factor(repeats=3):
-    """Run microbenchmarks to indicate how much faster / slower this system is compared to the reference."""
+    """Run microbenchmarks to indicate how much slower this system is compared to the reference."""
 
     def cpu_1():
         """Matrix multiplication"""
-        A = np.random.random((1000, 1000))
-        B = np.random.random((1000, 1000))
-        return np.dot(A, B)
+        size = 100
+        A = [[random() for _ in range(size)] for _ in range(size)]
+        B = [[random() for _ in range(size)] for _ in range(size)]
+        result = [[sum(A[i][k] * B[k][j] for k in range(size)) for j in range(size)] for i in range(size)]
+        return result
 
     def cpu_2():
         """Element-wise arithmetic"""
-        A = np.random.random(10**6)
-        B = np.random.random(10**6)
-        return A + B
+        N = 10**6
+        A = [random() for _ in range(N)]
+        B = [random() for _ in range(N)]
+        return [A[i] + B[i] for i in range(N)]
     
     def cpu_3():
         """Addition"""
@@ -53,12 +57,14 @@ def cpu_5():
 
     def mem_1():
         """Array copying"""
-        A = np.random.random(10**6)
-        return np.copy(A)
-    
+        N = 10**6
+        A = [random() for _ in range(N)]
+        return A.copy()
+
     def mem_2():
         """Array slicing"""
-        A = np.random.random(10**6)
+        N = 10**6
+        A = [random() for _ in range(N)]
         return A[::2]
     
     def mem_3():
@@ -71,13 +77,15 @@ def mem_3():
     
     def cache_1():
         """Sequential array sum"""
-        A = np.random.random(10**6)
-        return np.sum(A)
+        N = 10**6
+        A = [random() for _ in range(N)]
+        return sum(A)
 
     def cache_2():
         """Strided array sum"""
-        A = np.random.random(10**6)
-        return np.sum(A[::2])
+        N = 10**6
+        A = [random() for _ in range(N)]
+        return sum(A[::2])
     
     # run the benchmarks
     benchmarks = [cpu_1, cpu_2, cpu_3, cpu_4, cpu_5, mem_1, mem_2, mem_3, cache_1, cache_2]
@@ -89,14 +97,37 @@ def cache_2():
             duration = perf_counter() - start
             raw_data[i].append(duration)
 
-    # calculate statistics
-    benchmark_data = np.array(raw_data)
-    benchmark_mean = benchmark_data.mean(axis=0)
-    relative_std = (benchmark_data.std(axis=0) / np.abs(benchmark_mean))
-    mean_relative_std = max(np.mean(relative_std), 0.025)
+    # # below is the non-Numpy equivalent of the following statistics calculation
+    # benchmark_data = np.array(raw_data)
+    # np_benchmark_mean = benchmark_data.mean(axis=0)
+    # np_relative_std = (benchmark_data.std(axis=0) / abs(np_benchmark_mean))
+    # np_mean_relative_std = max(np.mean(np_relative_std), 0.025)
+    # # calculate the performance factor relative to the reference
+    # np_performance_factor: float = np.mean(np_benchmark_mean / reference_microbenchmark_mean)
+
+
+    # Transpose the raw_data to get columns as rows
+    transposed_data = list(zip(*raw_data))
+
+    # Calculate mean along axis=0 (column-wise)
+    benchmark_mean = [sum(column) / len(column) for column in transposed_data]
+
+    # Calculate standard deviation along axis=0 (column-wise)
+    def stddev(column, mean):
+        variance = sum((x - mean) ** 2 for x in column) / len(column)
+        return sqrt(variance)
+
+    benchmark_std = [stddev(column, mean) for column, mean in zip(transposed_data, benchmark_mean)]
+
+    # Calculate relative standard deviation
+    relative_std = [(s / abs(m)) if m != 0 else 0 for s, m in zip(benchmark_std, benchmark_mean)]
+
+    # Calculate mean relative standard deviation and apply threshold
+    mean_relative_std = max(sum(relative_std) / len(relative_std), 0.025)
 
-    # calculate the performance factor relative to the reference
-    performance_factor: float = np.mean(benchmark_mean / reference_microbenchmark_mean)
+    # Calculate performance factor
+    performance_factor = sum(bm / rm for bm, rm in zip(benchmark_mean, reference_microbenchmark_mean)) / len(benchmark_mean)
+    raise ValueError(benchmark_mean)
     return performance_factor, mean_relative_std
 
 performance_factor, mean_relative_std = get_performance_factor()
@@ -134,4 +165,4 @@ def test_microhh(benchmark):
     ])
 
     benchmark(problem.getSolutions)
-    assert benchmark.stats.stats.mean - benchmark.stats.stats.std <= reference_results["microhh"] * (performance_factor + mean_relative_std)
+    assert benchmark.stats.stats.mean - benchmark.stats.stats.stddev <= reference_results["microhh"] * (performance_factor + mean_relative_std)

From 263e22f99d2c1f95023ebf30b5b79fb8e4a1f909 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 09:47:11 +0100
Subject: [PATCH 06/17] Register benchmarks folder and path

---
 noxfile.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/noxfile.py b/noxfile.py
index 9dc6481..6faa52c 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -7,6 +7,7 @@
 
 import nox
 from nox import Session, session
+from pathlib import Path
 
 # from nox_poetry import Session, session   # nox_poetry is a better option, but <=1.0.3 has a bug with filename-URLs
 
@@ -21,6 +22,9 @@
 nox.options.stop_on_first_error = True
 nox.options.error_on_missing_interpreters = True
 
+# create the benchmark folder
+Path(".benchmarks").mkdir(exist_ok=True)
+
 
 # Test code quality: linting
 @session
@@ -47,7 +51,7 @@ def tests(session: Session) -> None:
     # session.poetry.installroot(distribution_format="sdist")
 
     # run pytest on the package with C-extensions, disable required coverage percentage
-    session.run("pytest", "--no-cov", "--benchmark-json" f".benchmarks/benchmark_{os_name}_{session.python}.json")
+    session.run("pytest", "--no-cov", "--benchmark-json", f".benchmarks/benchmark_{os_name}_{session.python}.json")
 
     # for the last Python version session:
     if session.python == python_versions_to_test[-1]:

From 337449e1364a8827c35d8e99a0a33ff0ac7b6deb Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 09:53:58 +0100
Subject: [PATCH 07/17] Updated reference microbenchmark times, improved
 comments

---
 tests/test_benchmark.py | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 85116da..75bac1f 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -6,7 +6,7 @@
 
 
 # reference times (using A4000 on DAS6)
-reference_microbenchmark_mean = [0.03569697, 0.04690351, 0.1586863, 0.13609187, 0.13637274, 0.01238605, 0.01072952, 0.07484022, 0.01054054, 0.01030138]    # noqa E501
+reference_microbenchmark_mean = [0.3784186691045761, 0.4737640768289566, 0.10726054509480794, 0.10744890073935191, 0.10979799057046573, 0.15360217044750848, 0.14483965436617532, 0.054416230569283165, 0.13835338006416956, 0.1371802551050981]    # noqa E501
 reference_results = {
     "microhh": 1.1565620
 }
@@ -97,37 +97,26 @@ def cache_2():
             duration = perf_counter() - start
             raw_data[i].append(duration)
 
-    # # below is the non-Numpy equivalent of the following statistics calculation
-    # benchmark_data = np.array(raw_data)
-    # np_benchmark_mean = benchmark_data.mean(axis=0)
-    # np_relative_std = (benchmark_data.std(axis=0) / abs(np_benchmark_mean))
-    # np_mean_relative_std = max(np.mean(np_relative_std), 0.025)
-    # # calculate the performance factor relative to the reference
-    # np_performance_factor: float = np.mean(np_benchmark_mean / reference_microbenchmark_mean)
+    # non-Numpy implementation of statistics calculation
+    transposed_data = list(zip(*raw_data))  # transpose the raw_data to get columns as rows
 
-
-    # Transpose the raw_data to get columns as rows
-    transposed_data = list(zip(*raw_data))
-
-    # Calculate mean along axis=0 (column-wise)
+    # calculate mean along axis=0 (column-wise) (`benchmark_data.mean(axis=0)`)
     benchmark_mean = [sum(column) / len(column) for column in transposed_data]
 
-    # Calculate standard deviation along axis=0 (column-wise)
+    # calculate standard deviation along axis=0 (column-wise)
     def stddev(column, mean):
         variance = sum((x - mean) ** 2 for x in column) / len(column)
         return sqrt(variance)
 
+    # calculate relative standard deviation (`(benchmark_data.std(axis=0) / abs(np_benchmark_mean))`)
     benchmark_std = [stddev(column, mean) for column, mean in zip(transposed_data, benchmark_mean)]
-
-    # Calculate relative standard deviation
     relative_std = [(s / abs(m)) if m != 0 else 0 for s, m in zip(benchmark_std, benchmark_mean)]
 
-    # Calculate mean relative standard deviation and apply threshold
+    # calculate mean relative standard deviation and apply threshold (`max(np.mean(np_relative_std), 0.025)``)
     mean_relative_std = max(sum(relative_std) / len(relative_std), 0.025)
 
-    # Calculate performance factor
+    # calculate performance factor  (`np.mean(np_benchmark_mean / reference_microbenchmark_mean)``)
     performance_factor = sum(bm / rm for bm, rm in zip(benchmark_mean, reference_microbenchmark_mean)) / len(benchmark_mean)
-    raise ValueError(benchmark_mean)
     return performance_factor, mean_relative_std
 
 performance_factor, mean_relative_std = get_performance_factor()

From 60c97514186f0ec9b331e87818efab7f6b697104 Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 10:03:33 +0100
Subject: [PATCH 08/17] Added the dedispersion benchmark and reference
 performance

---
 tests/test_benchmark.py | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 75bac1f..cf5479f 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -8,7 +8,8 @@
 # reference times (using A4000 on DAS6)
 reference_microbenchmark_mean = [0.3784186691045761, 0.4737640768289566, 0.10726054509480794, 0.10744890073935191, 0.10979799057046573, 0.15360217044750848, 0.14483965436617532, 0.054416230569283165, 0.13835338006416956, 0.1371802551050981]    # noqa E501
 reference_results = {
-    "microhh": 1.1565620
+    "microhh": 1.1565620,
+    "dedispersion": 0.1171140,
 }
 # device properties (for A4000 on DAS6 using get_opencl_device_info.cpp)
 dev = {
@@ -123,6 +124,8 @@ def stddev(column, mean):
 
 
 def test_microhh(benchmark):
+    """Based on the MicroHH search space in the paper."""
+
     cta_padding = 0  # default argument
 
     # setup the tunable parameters
@@ -153,5 +156,36 @@ def test_microhh(benchmark):
         f"BLOCK_SIZE_Z * TILING_FACTOR_Z > {cta_padding}",
     ])
 
+    # run the benchmark and check for performance degradation
     benchmark(problem.getSolutions)
     assert benchmark.stats.stats.mean - benchmark.stats.stats.stddev <= reference_results["microhh"] * (performance_factor + mean_relative_std)
+
+
+def test_dedispersion(benchmark):
+    """Based on the Dedispersion search space in the paper."""
+
+    # setup the tunable parameters
+    problem = Problem()
+    problem.addVariable("block_size_x", [1, 2, 4, 8] + [16 * i for i in range(1, 3)])
+    problem.addVariable("block_size_y", [8 * i for i in range(4, 33)])
+    problem.addVariable("block_size_z", [1])
+    problem.addVariable("tile_size_x", [i for i in range(1, 5)])
+    problem.addVariable("tile_size_y", [i for i in range(1, 9)])
+    problem.addVariable("tile_stride_x", [0, 1])
+    problem.addVariable("tile_stride_y", [0, 1])
+    problem.addVariable("loop_unroll_factor_channel", [
+        0
+    ])  # + [i for i in range(1,nr_channels+1) if nr_channels % i == 0] #[i for i in range(nr_channels+1)]
+    # tune_params["loop_unroll_factor_x", [0] #[i for i in range(1,max(tune_params["tile_size_x"]))]
+    # tune_params["loop_unroll_factor_y", [0] #[i for i in range(1,max(tune_params["tile_size_y"]))]
+    # tune_params["blocks_per_sm", [i for i in range(5)]
+
+    # setup the restrictions
+    check_block_size = "32 <= block_size_x * block_size_y <= 1024"
+    check_tile_stride_x = "tile_size_x > 1 or tile_stride_x == 0"
+    check_tile_stride_y = "tile_size_y > 1 or tile_stride_y == 0"
+    problem.addConstraint([check_block_size, check_tile_stride_x, check_tile_stride_y])
+
+    # run the benchmark and check for performance degradation
+    benchmark(problem.getSolutions)
+    assert benchmark.stats.stats.mean - benchmark.stats.stats.stddev <= reference_results["dedispersion"] * (performance_factor + mean_relative_std)

From dd1bb3ad11969632b6c3c2b59a77187cbc8440ad Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 10:12:05 +0100
Subject: [PATCH 09/17] Added the Hotspot benchmark and reference performance

---
 tests/test_benchmark.py | 37 +++++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index cf5479f..124c7a1 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -10,6 +10,7 @@
 reference_results = {
     "microhh": 1.1565620,
     "dedispersion": 0.1171140,
+    "hotspot": 2.6839208,
 }
 # device properties (for A4000 on DAS6 using get_opencl_device_info.cpp)
 dev = {
@@ -175,10 +176,7 @@ def test_dedispersion(benchmark):
     problem.addVariable("tile_stride_y", [0, 1])
     problem.addVariable("loop_unroll_factor_channel", [
         0
-    ])  # + [i for i in range(1,nr_channels+1) if nr_channels % i == 0] #[i for i in range(nr_channels+1)]
-    # tune_params["loop_unroll_factor_x", [0] #[i for i in range(1,max(tune_params["tile_size_x"]))]
-    # tune_params["loop_unroll_factor_y", [0] #[i for i in range(1,max(tune_params["tile_size_y"]))]
-    # tune_params["blocks_per_sm", [i for i in range(5)]
+    ])
 
     # setup the restrictions
     check_block_size = "32 <= block_size_x * block_size_y <= 1024"
@@ -189,3 +187,34 @@ def test_dedispersion(benchmark):
     # run the benchmark and check for performance degradation
     benchmark(problem.getSolutions)
     assert benchmark.stats.stats.mean - benchmark.stats.stats.stddev <= reference_results["dedispersion"] * (performance_factor + mean_relative_std)
+
+def test_hotspot(benchmark):
+    """Based on the Hotspot search space in the paper."""
+    # constants
+    temporal_tiling_factor = [i for i in range(1, 11)]
+    max_tfactor = max(temporal_tiling_factor)
+
+    # setup the tunable parameters
+    problem = Problem()
+    problem.addVariable("block_size_x", [1, 2, 4, 8, 16] + [32 * i for i in range(1, 33)])
+    problem.addVariable("block_size_y", [2**i for i in range(6)])
+    problem.addVariable("tile_size_x", [i for i in range(1, 11)])
+    problem.addVariable("tile_size_y", [i for i in range(1, 11)])
+    problem.addVariable("temporal_tiling_factor", temporal_tiling_factor)
+    problem.addVariable("max_tfactor", [max_tfactor])
+    problem.addVariable("loop_unroll_factor_t", [i for i in range(1, max_tfactor + 1)])
+    problem.addVariable("sh_power", [0, 1])
+    problem.addVariable("blocks_per_sm", [0, 1, 2, 3, 4])
+
+    # setup the restrictions
+    problem.addConstraint([
+        "block_size_x*block_size_y >= 32",
+        "temporal_tiling_factor % loop_unroll_factor_t == 0",
+        f"block_size_x*block_size_y <= {dev['max_threads']}",
+        f"(block_size_x*tile_size_x + temporal_tiling_factor * 2) * (block_size_y*tile_size_y + temporal_tiling_factor * 2) * (2+sh_power) * 4 <= {dev['max_shared_memory_per_block']}",
+        f"blocks_per_sm == 0 or (((block_size_x*tile_size_x + temporal_tiling_factor * 2) * (block_size_y*tile_size_y + temporal_tiling_factor * 2) * (2+sh_power) * 4) * blocks_per_sm <= {dev['max_shared_memory']})",
+    ])
+
+    # run the benchmark and check for performance degradation
+    benchmark(problem.getSolutions)
+    assert benchmark.stats.stats.mean - benchmark.stats.stats.stddev <= reference_results["hotspot"] * (performance_factor + mean_relative_std)

From eaf9aa9a29760bc73c746cb1d1b4290350e0864d Mon Sep 17 00:00:00 2001
From: fjwillemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 10:18:40 +0100
Subject: [PATCH 10/17] Benchmark outcomes are validated

---
 tests/test_benchmark.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 124c7a1..9d15f34 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -157,8 +157,9 @@ def test_microhh(benchmark):
         f"BLOCK_SIZE_Z * TILING_FACTOR_Z > {cta_padding}",
     ])
 
-    # run the benchmark and check for performance degradation
-    benchmark(problem.getSolutions)
+    # run the benchmark and check for valid outcome and performance degradation
+    solutions = benchmark(problem.getSolutions)
+    assert len(solutions) == 138600
     assert benchmark.stats.stats.mean - benchmark.stats.stats.stddev <= reference_results["microhh"] * (performance_factor + mean_relative_std)
 
 
@@ -184,8 +185,9 @@ def test_dedispersion(benchmark):
     check_tile_stride_y = "tile_size_y > 1 or tile_stride_y == 0"
     problem.addConstraint([check_block_size, check_tile_stride_x, check_tile_stride_y])
 
-    # run the benchmark and check for performance degradation
-    benchmark(problem.getSolutions)
+    # run the benchmark and check for valid outcome and performance degradation
+    solutions = benchmark(problem.getSolutions)
+    assert len(solutions) == 11130
     assert benchmark.stats.stats.mean - benchmark.stats.stats.stddev <= reference_results["dedispersion"] * (performance_factor + mean_relative_std)
 
 def test_hotspot(benchmark):
@@ -215,6 +217,7 @@ def test_hotspot(benchmark):
         f"blocks_per_sm == 0 or (((block_size_x*tile_size_x + temporal_tiling_factor * 2) * (block_size_y*tile_size_y + temporal_tiling_factor * 2) * (2+sh_power) * 4) * blocks_per_sm <= {dev['max_shared_memory']})",
     ])
 
-    # run the benchmark and check for performance degradation
-    benchmark(problem.getSolutions)
+    # run the benchmark and check for valid outcome and performance degradation
+    solutions = benchmark(problem.getSolutions)
+    assert len(solutions) == 349853
     assert benchmark.stats.stats.mean - benchmark.stats.stats.stddev <= reference_results["hotspot"] * (performance_factor + mean_relative_std)

From dd9b71d430efea9574a2b3273199153b8e2bdcc1 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 10:54:29 +0100
Subject: [PATCH 11/17] Benchmark results collection and printing

---
 tests/test_benchmark.py | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 9d15f34..742ae1d 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -22,6 +22,8 @@
     "max_wi_size": [1024, 1024, 64],
     "max_wg_size": 1024,
 }
+# collect benchmark times
+benchmark_results = dict()
 
 @pytest.mark.skip
 def get_performance_factor(repeats=3):
@@ -114,18 +116,20 @@ def stddev(column, mean):
     benchmark_std = [stddev(column, mean) for column, mean in zip(transposed_data, benchmark_mean)]
     relative_std = [(s / abs(m)) if m != 0 else 0 for s, m in zip(benchmark_std, benchmark_mean)]
 
-    # calculate mean relative standard deviation and apply threshold (`max(np.mean(np_relative_std), 0.025)``)
+    # calculate mean relative standard deviation and apply threshold (`max(np.mean(np_relative_std), 0.025)`)
     mean_relative_std = max(sum(relative_std) / len(relative_std), 0.025)
 
-    # calculate performance factor  (`np.mean(np_benchmark_mean / reference_microbenchmark_mean)``)
+    # calculate performance factor  (`np.mean(np_benchmark_mean / reference_microbenchmark_mean)`)
     performance_factor = sum(bm / rm for bm, rm in zip(benchmark_mean, reference_microbenchmark_mean)) / len(benchmark_mean)
     return performance_factor, mean_relative_std
 
 performance_factor, mean_relative_std = get_performance_factor()
+print(f"\nSystem performance factor: {round(performance_factor, 3)}")
 
 
 def test_microhh(benchmark):
     """Based on the MicroHH search space in the paper."""
+    benchmark_name = "microhh"
 
     cta_padding = 0  # default argument
 
@@ -159,12 +163,17 @@ def test_microhh(benchmark):
 
     # run the benchmark and check for valid outcome and performance degradation
     solutions = benchmark(problem.getSolutions)
+    reference_result = reference_results[benchmark_name]
+    benchmark_result = benchmark.stats.stats.mean
+    benchmark_results[benchmark_name] = benchmark_result
     assert len(solutions) == 138600
-    assert benchmark.stats.stats.mean - benchmark.stats.stats.stddev <= reference_results["microhh"] * (performance_factor + mean_relative_std)
+    assert benchmark_result - benchmark.stats.stats.stddev <= reference_result * (performance_factor + mean_relative_std)
+    print(f"Reference: {round(reference_result, 3)}, benchmark: {round(benchmark_result, 3)}, expected: {round(reference_result * performance_factor, 3)}")
 
 
 def test_dedispersion(benchmark):
     """Based on the Dedispersion search space in the paper."""
+    benchmark_name = "dedispersion"
 
     # setup the tunable parameters
     problem = Problem()
@@ -187,11 +196,18 @@ def test_dedispersion(benchmark):
 
     # run the benchmark and check for valid outcome and performance degradation
     solutions = benchmark(problem.getSolutions)
+    reference_result = reference_results[benchmark_name]
+    benchmark_result = benchmark.stats.stats.mean
+    benchmark_results[benchmark_name] = benchmark_result
     assert len(solutions) == 11130
-    assert benchmark.stats.stats.mean - benchmark.stats.stats.stddev <= reference_results["dedispersion"] * (performance_factor + mean_relative_std)
+    assert benchmark_results[benchmark_name] - benchmark.stats.stats.stddev <= reference_results["dedispersion"] * (performance_factor + mean_relative_std)
+    print(f"Reference: {round(reference_result, 3)}, benchmark: {round(benchmark_result, 3)}, expected: {round(reference_result * performance_factor, 3)}")
+
 
 def test_hotspot(benchmark):
     """Based on the Hotspot search space in the paper."""
+    benchmark_name = "hotspot"
+
     # constants
     temporal_tiling_factor = [i for i in range(1, 11)]
     max_tfactor = max(temporal_tiling_factor)
@@ -219,5 +235,9 @@ def test_hotspot(benchmark):
 
     # run the benchmark and check for valid outcome and performance degradation
     solutions = benchmark(problem.getSolutions)
+    reference_result = reference_results[benchmark_name]
+    benchmark_result = benchmark.stats.stats.mean
+    benchmark_results[benchmark_name] = benchmark_result
     assert len(solutions) == 349853
-    assert benchmark.stats.stats.mean - benchmark.stats.stats.stddev <= reference_results["hotspot"] * (performance_factor + mean_relative_std)
+    assert benchmark_results[benchmark_name] - benchmark.stats.stats.stddev <= reference_results[benchmark_name] * (performance_factor + mean_relative_std)
+    print(f"Reference: {round(reference_result, 3)}, benchmark: {round(benchmark_result, 3)}, expected: {round(reference_result * performance_factor, 3)}")

From 1f25e9a62db9eb535a8cb772badbecf66fbf8450 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 11:20:45 +0100
Subject: [PATCH 12/17] Improved and standardized performance check and
 printing

---
 tests/test_benchmark.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 742ae1d..8cb07a3 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -116,8 +116,8 @@ def stddev(column, mean):
     benchmark_std = [stddev(column, mean) for column, mean in zip(transposed_data, benchmark_mean)]
     relative_std = [(s / abs(m)) if m != 0 else 0 for s, m in zip(benchmark_std, benchmark_mean)]
 
-    # calculate mean relative standard deviation and apply threshold (`max(np.mean(np_relative_std), 0.025)`)
-    mean_relative_std = max(sum(relative_std) / len(relative_std), 0.025)
+    # calculate mean relative standard deviation and apply threshold (`max(np.mean(np_relative_std), 0.125)`)
+    mean_relative_std = max(sum(relative_std) / len(relative_std), 0.125)
 
     # calculate performance factor  (`np.mean(np_benchmark_mean / reference_microbenchmark_mean)`)
     performance_factor = sum(bm / rm for bm, rm in zip(benchmark_mean, reference_microbenchmark_mean)) / len(benchmark_mean)
@@ -126,6 +126,13 @@ def stddev(column, mean):
 performance_factor, mean_relative_std = get_performance_factor()
 print(f"\nSystem performance factor: {round(performance_factor, 3)}")
 
+@pytest.mark.skip
+def check_benchmark_performance(benchmark_name, mean, std):
+    """Utility function to check whether the performance of a benchmark is within the expected range and print information."""
+    reference_result = reference_results[benchmark_name]
+    assert  mean - std * 2 <= reference_result * (performance_factor + mean_relative_std * 2)
+    print(f"Reference: {round(reference_result, 3)}, benchmark: {round(mean, 3)}, expected: {round(reference_result * performance_factor, 3)}")
+
 
 def test_microhh(benchmark):
     """Based on the MicroHH search space in the paper."""
@@ -167,8 +174,7 @@ def test_microhh(benchmark):
     benchmark_result = benchmark.stats.stats.mean
     benchmark_results[benchmark_name] = benchmark_result
     assert len(solutions) == 138600
-    assert benchmark_result - benchmark.stats.stats.stddev <= reference_result * (performance_factor + mean_relative_std)
-    print(f"Reference: {round(reference_result, 3)}, benchmark: {round(benchmark_result, 3)}, expected: {round(reference_result * performance_factor, 3)}")
+    check_benchmark_performance(benchmark_name, benchmark_result, benchmark.stats.stats.stddev)
 
 
 def test_dedispersion(benchmark):
@@ -200,8 +206,7 @@ def test_dedispersion(benchmark):
     benchmark_result = benchmark.stats.stats.mean
     benchmark_results[benchmark_name] = benchmark_result
     assert len(solutions) == 11130
-    assert benchmark_results[benchmark_name] - benchmark.stats.stats.stddev <= reference_results["dedispersion"] * (performance_factor + mean_relative_std)
-    print(f"Reference: {round(reference_result, 3)}, benchmark: {round(benchmark_result, 3)}, expected: {round(reference_result * performance_factor, 3)}")
+    check_benchmark_performance(benchmark_name, benchmark_result, benchmark.stats.stats.stddev)
 
 
 def test_hotspot(benchmark):
@@ -239,5 +244,4 @@ def test_hotspot(benchmark):
     benchmark_result = benchmark.stats.stats.mean
     benchmark_results[benchmark_name] = benchmark_result
     assert len(solutions) == 349853
-    assert benchmark_results[benchmark_name] - benchmark.stats.stats.stddev <= reference_results[benchmark_name] * (performance_factor + mean_relative_std)
-    print(f"Reference: {round(reference_result, 3)}, benchmark: {round(benchmark_result, 3)}, expected: {round(reference_result * performance_factor, 3)}")
+    check_benchmark_performance(benchmark_name, benchmark_result, benchmark.stats.stats.stddev)

From 6743764bed911df03c50e594379c9d724b026e6a Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 14:27:20 +0100
Subject: [PATCH 13/17] Added GH benchmark actions

---
 .github/workflows/build-test-python-package.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/build-test-python-package.yml b/.github/workflows/build-test-python-package.yml
index b7e5ffa..f6914ba 100644
--- a/.github/workflows/build-test-python-package.yml
+++ b/.github/workflows/build-test-python-package.yml
@@ -26,6 +26,11 @@ jobs:
             - uses: fjwillemsen/setup-nox2@v3.0.0
             - run: |
                   nox -- ${{ runner.os }}
+            - name: Store benchmark result
+              uses: benchmark-action/github-action-benchmark@v1
+              with:
+                  tool: "pytest"
+                  output-file-path: .benchmarks/benchmark_{${{ runner.os }}}_3.13.json
             - name: Report to Coveralls
               uses: coverallsapp/github-action@v2
               with:

From 5b11a8721b506b21d86546e883e3b1d43c3c3133 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 14:52:43 +0100
Subject: [PATCH 14/17] Fixed an error with the benchmark output file path

---
 .github/workflows/build-test-python-package.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-test-python-package.yml b/.github/workflows/build-test-python-package.yml
index f6914ba..46d30e8 100644
--- a/.github/workflows/build-test-python-package.yml
+++ b/.github/workflows/build-test-python-package.yml
@@ -30,7 +30,7 @@ jobs:
               uses: benchmark-action/github-action-benchmark@v1
               with:
                   tool: "pytest"
-                  output-file-path: .benchmarks/benchmark_{${{ runner.os }}}_3.13.json
+                  output-file-path: .benchmarks/benchmark_${{ runner.os }}_3.13.json
             - name: Report to Coveralls
               uses: coverallsapp/github-action@v2
               with:

From d909bffbda00e144af3ff68111e35e41c3e8527e Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 15:15:00 +0100
Subject: [PATCH 15/17] Created docs folder for benchmarks, point GH action to
 main instead of gh-pages branch

---
 .github/workflows/build-test-python-package.yml | 2 ++
 docs/benchmarks/.gitkeep                        | 0
 2 files changed, 2 insertions(+)
 create mode 100644 docs/benchmarks/.gitkeep

diff --git a/.github/workflows/build-test-python-package.yml b/.github/workflows/build-test-python-package.yml
index 46d30e8..1a12d8e 100644
--- a/.github/workflows/build-test-python-package.yml
+++ b/.github/workflows/build-test-python-package.yml
@@ -31,6 +31,8 @@ jobs:
               with:
                   tool: "pytest"
                   output-file-path: .benchmarks/benchmark_${{ runner.os }}_3.13.json
+                  gh-pages-branch: main
+                  benchmark-data-dir-path: docs/benchmarks
             - name: Report to Coveralls
               uses: coverallsapp/github-action@v2
               with:
diff --git a/docs/benchmarks/.gitkeep b/docs/benchmarks/.gitkeep
new file mode 100644
index 0000000..e69de29

From 31640bb4f34506156e9961cccef82d64193dfbf0 Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 15:32:35 +0100
Subject: [PATCH 16/17] Added token to save benchmark results

---
 .github/workflows/build-test-python-package.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/build-test-python-package.yml b/.github/workflows/build-test-python-package.yml
index 1a12d8e..5938f92 100644
--- a/.github/workflows/build-test-python-package.yml
+++ b/.github/workflows/build-test-python-package.yml
@@ -33,6 +33,12 @@ jobs:
                   output-file-path: .benchmarks/benchmark_${{ runner.os }}_3.13.json
                   gh-pages-branch: main
                   benchmark-data-dir-path: docs/benchmarks
+                  fail-on-alert: true
+                  # GitHub API token to make a commit comment
+                  github-token: ${{ secrets.GITHUB_TOKEN }}
+                  # Enable alert commit comment
+                  comment-on-alert: true
+                  #   alert-comment-cc-users: mention a GitHub user in the comment
             - name: Report to Coveralls
               uses: coverallsapp/github-action@v2
               with:

From 9fd0036e43b3bd01fc35de877049dff326b0963d Mon Sep 17 00:00:00 2001
From: Floris-Jan Willemsen <fjwillemsen@icloud.com>
Date: Thu, 13 Feb 2025 16:00:38 +0100
Subject: [PATCH 17/17] Minor update to GH action build & test workflow

---
 .github/workflows/build-test-python-package.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build-test-python-package.yml b/.github/workflows/build-test-python-package.yml
index 5938f92..b739f70 100644
--- a/.github/workflows/build-test-python-package.yml
+++ b/.github/workflows/build-test-python-package.yml
@@ -36,11 +36,12 @@ jobs:
                   fail-on-alert: true
                   # GitHub API token to make a commit comment
                   github-token: ${{ secrets.GITHUB_TOKEN }}
-                  # Enable alert commit comment
                   comment-on-alert: true
-                  #   alert-comment-cc-users: mention a GitHub user in the comment
+                  comment-always: true
+                  #   alert-comment-cc-users: '@fjwillemsen' mention a GitHub user in the comment
             - name: Report to Coveralls
               uses: coverallsapp/github-action@v2
               with:
                   file: coverage.xml
                   format: cobertura
+                  fail-on-error: false