Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e099d05

Browse files
authored
Merge pull request #29621 from math-hiyoko/enh/extend_unique_benchmark
ENH: Extend coverage for benchmark of np.unique
2 parents c8e2005 + 598f2df commit e099d05

File tree

1 file changed

+47
-12
lines changed

1 file changed

+47
-12
lines changed

benchmarks/benchmarks/bench_lib.py

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Benchmarks for `numpy.lib`."""
22

3+
import string
34

45
import numpy as np
56

@@ -119,37 +120,71 @@ def time_nanpercentile(self, array_size, percent_nans):
119120
class Unique(Benchmark):
120121
"""Benchmark for np.unique with np.nan values."""
121122

122-
param_names = ["array_size", "percent_nans"]
123+
param_names = ["array_size", "percent_nans", "percent_unique_values", "dtype"]
123124
params = [
124125
# sizes of the 1D arrays
125126
[200, int(2e5)],
126127
# percent of np.nan in arrays
127-
[0, 0.1, 2., 50., 90.],
128+
[10., 90.],
129+
# percent of unique values in arrays
130+
[0.2, 20.],
131+
# dtypes of the arrays
132+
[np.float64, np.complex128, np.dtypes.StringDType(na_object=np.nan)],
128133
]
129134

130-
def setup(self, array_size, percent_nans):
131-
np.random.seed(123)
135+
def setup(self, array_size, percent_nans, percent_unique_values, dtype):
136+
rng = np.random.default_rng(123)
132137
# produce a randomly shuffled array with the
133138
# approximate desired percentage np.nan content
134-
base_array = np.random.uniform(size=array_size)
135-
n_nan = int(percent_nans * array_size)
136-
nan_indices = np.random.choice(np.arange(array_size), size=n_nan)
139+
unique_values_size = max(int(percent_unique_values / 100. * array_size), 2)
140+
match dtype:
141+
case np.float64:
142+
unique_array = rng.uniform(size=unique_values_size).astype(dtype)
143+
case np.complex128:
144+
unique_array = np.array(
145+
[
146+
complex(*rng.uniform(size=2))
147+
for _ in range(unique_values_size)
148+
],
149+
dtype=dtype,
150+
)
151+
case np.dtypes.StringDType():
152+
chars = string.ascii_letters + string.digits
153+
unique_array = np.array(
154+
[
155+
''.join(rng.choice(list(chars), size=rng.integers(4, 8)))
156+
for _ in range(unique_values_size)
157+
],
158+
dtype=dtype,
159+
)
160+
case _:
161+
raise ValueError(f"Unsupported dtype {dtype}")
162+
163+
base_array = np.resize(unique_array, array_size)
164+
rng.shuffle(base_array)
165+
# insert nans in random places
166+
n_nan = int(percent_nans / 100. * array_size)
167+
nan_indices = rng.choice(np.arange(array_size), size=n_nan, replace=False)
137168
base_array[nan_indices] = np.nan
138169
self.arr = base_array
139170

140-
def time_unique_values(self, array_size, percent_nans):
171+
def time_unique_values(self, array_size, percent_nans,
172+
percent_unique_values, dtype):
141173
np.unique(self.arr, return_index=False,
142174
return_inverse=False, return_counts=False)
143175

144-
def time_unique_counts(self, array_size, percent_nans):
176+
def time_unique_counts(self, array_size, percent_nans,
177+
percent_unique_values, dtype):
145178
np.unique(self.arr, return_index=False,
146-
return_inverse=False, return_counts=True)
179+
return_inverse=False, return_counts=True,)
147180

148-
def time_unique_inverse(self, array_size, percent_nans):
181+
def time_unique_inverse(self, array_size, percent_nans,
182+
percent_unique_values, dtype):
149183
np.unique(self.arr, return_index=False,
150184
return_inverse=True, return_counts=False)
151185

152-
def time_unique_all(self, array_size, percent_nans):
186+
def time_unique_all(self, array_size, percent_nans,
187+
percent_unique_values, dtype):
153188
np.unique(self.arr, return_index=True,
154189
return_inverse=True, return_counts=True)
155190

0 commit comments

Comments
 (0)