From ecbfbf061504568851639efdc1498cb6eb455097 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 16 Dec 2024 22:44:22 +0800 Subject: [PATCH 1/4] Parallelize benchmarking venv setup --- pyperformance/run.py | 85 +++++++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 29 deletions(-) diff --git a/pyperformance/run.py b/pyperformance/run.py index e0df48d7..a810a62a 100644 --- a/pyperformance/run.py +++ b/pyperformance/run.py @@ -4,6 +4,9 @@ import sys import time import traceback +import concurrent.futures +import io +import contextlib import pyperformance from . import _utils, _python, _pythoninfo @@ -67,47 +70,40 @@ def get_loops_from_file(filename): return loops -def run_benchmarks(should_run, python, options): - if options.same_loops is not None: - loops = get_loops_from_file(options.same_loops) - else: - loops = {} - - to_run = sorted(should_run) +def setup_single_venv(args): + (i, num_benchmarks, python, options, bench) = args - info = _pythoninfo.get_info(python) - runid = get_run_id(info) + stdout = io.StringIO() + with contextlib.redirect_stdout(stdout): + info = _pythoninfo.get_info(python) + runid = get_run_id(info) - unique = getattr(options, 'unique_venvs', False) - if not unique: - common = VenvForBenchmarks.ensure( - _venv.get_venv_root(runid.name, python=info), - info, - upgrade='oncreate', - inherit_environ=options.inherit_environ, - ) - - benchmarks = {} - venvs = set() - for i, bench in enumerate(to_run): + unique = getattr(options, 'unique_venvs', False) + if not unique: + common = VenvForBenchmarks.ensure( + _venv.get_venv_root(runid.name, python=info), + info, + upgrade='oncreate', + inherit_environ=options.inherit_environ, + ) bench_runid = runid._replace(bench=bench) assert bench_runid.name, (bench, bench_runid) name = bench_runid.name venv_root = _venv.get_venv_root(name, python=info) + bench_status = f'({i+1:>2}/{num_benchmarks})' print() print('='*50) - print(f'({i+1:>2}/{len(to_run)}) creating venv for benchmark ({bench.name})') + print(f'{bench_status} creating venv for benchmark ({bench.name})') print() if not unique: - print('(trying common venv first)') + print(f'{bench_status} (trying common venv first)') # Try the common venv first. try: common.ensure_reqs(bench) except _venv.RequirementsInstallationFailedError: - print('(falling back to unique venv)') + print(f'{bench_status} (falling back to unique venv)') else: - benchmarks[bench] = (common, bench_runid) - continue + return(bench, None, common, bench_runid, stdout.getvalue()) try: venv = VenvForBenchmarks.ensure( venv_root, @@ -118,12 +114,43 @@ def run_benchmarks(should_run, python, options): # XXX Do not override when there is a requirements collision. venv.ensure_reqs(bench) except _venv.RequirementsInstallationFailedError: - print('(benchmark will be skipped)') + print(f'{bench_status} (benchmark will be skipped)') print() venv = None + print(f'{bench_status} done') + return (bench, venv_root, venv, bench_runid, stdout.getvalue()) + +def run_benchmarks(should_run, python, options): + if options.same_loops is not None: + loops = get_loops_from_file(options.same_loops) + else: + loops = {} + + to_run = sorted(should_run) + benchmarks = {} + venvs = set() + + # Setup a first venv on its own first to create common + # requirements without threading issues. + bench, venv_root, venv, bench_runid, cons_output = setup_single_venv((0, len(to_run), python, options, to_run[0])) + if venv_root is not None: venvs.add(venv_root) - benchmarks[bench] = (venv, bench_runid) - print() + benchmarks[bench] = (venv, bench_runid) + print(cons_output) + + # Parallelise the rest. + executor_input = [(i+1, len(to_run), python, options, bench) + for i, bench in enumerate(to_run[1:])] + # It's fine to set a higher worker count, because this is IO-bound anyways. + with concurrent.futures.ProcessPoolExecutor(max_workers=len(to_run)-1) as executor: + for bench, venv_root, venv, bench_runid, cons_output in executor.map(setup_single_venv, executor_input): + if venv_root is not None: + venvs.add(venv_root) + benchmarks[bench] = (venv, bench_runid) + print(cons_output) + + print("Completed venv installation. Now sleeping 15s to stabilize thermals.") + time.sleep(15) suite = None run_count = str(len(to_run)) From db29d34b0710e4eceded138fb11528f610864cd7 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 16 Dec 2024 22:51:41 +0800 Subject: [PATCH 2/4] fix bug --- pyperformance/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyperformance/run.py b/pyperformance/run.py index a810a62a..280f8b03 100644 --- a/pyperformance/run.py +++ b/pyperformance/run.py @@ -142,7 +142,7 @@ def run_benchmarks(should_run, python, options): executor_input = [(i+1, len(to_run), python, options, bench) for i, bench in enumerate(to_run[1:])] # It's fine to set a higher worker count, because this is IO-bound anyways. - with concurrent.futures.ProcessPoolExecutor(max_workers=len(to_run)-1) as executor: + with concurrent.futures.ProcessPoolExecutor(max_workers=max(1, len(to_run))) as executor: for bench, venv_root, venv, bench_runid, cons_output in executor.map(setup_single_venv, executor_input): if venv_root is not None: venvs.add(venv_root) From aefde59d7861677a5c7fe88869cbf59fac5144fc Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 16 Dec 2024 23:11:39 +0800 Subject: [PATCH 3/4] Use threads because windows is unhappy --- pyperformance/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyperformance/run.py b/pyperformance/run.py index 280f8b03..9d7d629c 100644 --- a/pyperformance/run.py +++ b/pyperformance/run.py @@ -142,8 +142,8 @@ def run_benchmarks(should_run, python, options): executor_input = [(i+1, len(to_run), python, options, bench) for i, bench in enumerate(to_run[1:])] # It's fine to set a higher worker count, because this is IO-bound anyways. - with concurrent.futures.ProcessPoolExecutor(max_workers=max(1, len(to_run))) as executor: - for bench, venv_root, venv, bench_runid, cons_output in executor.map(setup_single_venv, executor_input): + with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, len(to_run))) as executor: + for bench, venv_root, venv, bench_runid, cons_output in list(executor.map(setup_single_venv, executor_input)): if venv_root is not None: venvs.add(venv_root) benchmarks[bench] = (venv, bench_runid) From 0cfc548a64824eb29283878ee6273bce2f23908d Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 16 Dec 2024 23:15:33 +0800 Subject: [PATCH 4/4] switch back to processpoolexecutor --- pyperformance/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyperformance/run.py b/pyperformance/run.py index 9d7d629c..f280f4a7 100644 --- a/pyperformance/run.py +++ b/pyperformance/run.py @@ -142,7 +142,7 @@ def run_benchmarks(should_run, python, options): executor_input = [(i+1, len(to_run), python, options, bench) for i, bench in enumerate(to_run[1:])] # It's fine to set a higher worker count, because this is IO-bound anyways. - with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, len(to_run))) as executor: + with concurrent.futures.ProcessPoolExecutor() as executor: for bench, venv_root, venv, bench_runid, cons_output in list(executor.map(setup_single_venv, executor_input)): if venv_root is not None: venvs.add(venv_root)