Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b6926e8

Browse files
committed
Attempt to partition files to use all possible cores
1 parent ec0ed8a commit b6926e8

2 files changed

Lines changed: 48 additions & 12 deletions

File tree

pre_commit/xargs.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
from __future__ import absolute_import
2+
from __future__ import division
23
from __future__ import unicode_literals
34

45
import contextlib
6+
import math
57
import multiprocessing.pool
68
import sys
79

@@ -37,8 +39,13 @@ class ArgumentTooLongError(RuntimeError):
3739
pass
3840

3941

40-
def partition(cmd, varargs, _max_length=None):
42+
def partition(cmd, varargs, target_concurrency, _max_length=None):
4143
_max_length = _max_length or _get_platform_max_length()
44+
45+
# Generally, we try to partition evenly into at least `target_concurrency`
46+
# partitions, but we don't want a bunch of tiny partitions.
47+
max_args = max(4, math.ceil(len(varargs) / target_concurrency))
48+
4249
cmd = tuple(cmd)
4350
ret = []
4451

@@ -51,7 +58,10 @@ def partition(cmd, varargs, _max_length=None):
5158
arg = varargs.pop()
5259

5360
arg_length = _command_length(arg) + 1
54-
if total_length + arg_length <= _max_length:
61+
if (
62+
total_length + arg_length <= _max_length
63+
and len(ret_cmd) < max_args
64+
):
5565
ret_cmd.append(arg)
5666
total_length += arg_length
5767
elif not ret_cmd:
@@ -94,9 +104,7 @@ def xargs(cmd, varargs, **kwargs):
94104
except parse_shebang.ExecutableNotFoundError as e:
95105
return e.to_output()
96106

97-
# TODO: teach partition to intelligently target our desired concurrency
98-
# while still respecting max_length.
99-
partitions = partition(cmd, varargs, **kwargs)
107+
partitions = partition(cmd, varargs, target_concurrency, **kwargs)
100108

101109
def run_cmd_partition(run_cmd):
102110
return cmd_output(*run_cmd, encoding=None, retcode=None)

tests/xargs_test.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@ def linux_mock():
3636

3737

3838
def test_partition_trivial():
39-
assert xargs.partition(('cmd',), ()) == (('cmd',),)
39+
assert xargs.partition(('cmd',), (), 1) == (('cmd',),)
4040

4141

4242
def test_partition_simple():
43-
assert xargs.partition(('cmd',), ('foo',)) == (('cmd', 'foo'),)
43+
assert xargs.partition(('cmd',), ('foo',), 1) == (('cmd', 'foo'),)
4444

4545

4646
def test_partition_limits():
@@ -54,6 +54,7 @@ def test_partition_limits():
5454
'.' * 5,
5555
'.' * 6,
5656
),
57+
1,
5758
_max_length=20,
5859
)
5960
assert ret == (
@@ -68,34 +69,61 @@ def test_partition_limit_win32_py3(win32_py3_mock):
6869
cmd = ('ninechars',)
6970
# counted as half because of utf-16 encode
7071
varargs = ('😑' * 5,)
71-
ret = xargs.partition(cmd, varargs, _max_length=20)
72+
ret = xargs.partition(cmd, varargs, 1, _max_length=20)
7273
assert ret == (cmd + varargs,)
7374

7475

7576
def test_partition_limit_win32_py2(win32_py2_mock):
7677
cmd = ('ninechars',)
7778
varargs = ('😑' * 5,) # 4 bytes * 5
78-
ret = xargs.partition(cmd, varargs, _max_length=30)
79+
ret = xargs.partition(cmd, varargs, 1, _max_length=30)
7980
assert ret == (cmd + varargs,)
8081

8182

8283
def test_partition_limit_linux(linux_mock):
8384
cmd = ('ninechars',)
8485
varargs = ('😑' * 5,)
85-
ret = xargs.partition(cmd, varargs, _max_length=30)
86+
ret = xargs.partition(cmd, varargs, 1, _max_length=30)
8687
assert ret == (cmd + varargs,)
8788

8889

8990
def test_argument_too_long_with_large_unicode(linux_mock):
9091
cmd = ('ninechars',)
9192
varargs = ('😑' * 10,) # 4 bytes * 10
9293
with pytest.raises(xargs.ArgumentTooLongError):
93-
xargs.partition(cmd, varargs, _max_length=20)
94+
xargs.partition(cmd, varargs, 1, _max_length=20)
95+
96+
97+
def test_partition_target_concurrency():
98+
ret = xargs.partition(
99+
('foo',), ('A',) * 22,
100+
4,
101+
_max_length=50,
102+
)
103+
assert ret == (
104+
('foo',) + ('A',) * 6,
105+
('foo',) + ('A',) * 6,
106+
('foo',) + ('A',) * 6,
107+
('foo',) + ('A',) * 4,
108+
)
109+
110+
111+
def test_partition_target_concurrency_wont_make_tiny_partitions():
112+
ret = xargs.partition(
113+
('foo',), ('A',) * 10,
114+
4,
115+
_max_length=50,
116+
)
117+
assert ret == (
118+
('foo',) + ('A',) * 4,
119+
('foo',) + ('A',) * 4,
120+
('foo',) + ('A',) * 2,
121+
)
94122

95123

96124
def test_argument_too_long():
97125
with pytest.raises(xargs.ArgumentTooLongError):
98-
xargs.partition(('a' * 5,), ('a' * 5,), _max_length=10)
126+
xargs.partition(('a' * 5,), ('a' * 5,), 1, _max_length=10)
99127

100128

101129
def test_xargs_smoke():

0 commit comments

Comments
 (0)