|
1 | 1 | """Benchmarks for `numpy.lib`."""
|
2 | 2 |
|
| 3 | +import string |
3 | 4 |
|
4 | 5 | import numpy as np
|
5 | 6 |
|
@@ -119,37 +120,71 @@ def time_nanpercentile(self, array_size, percent_nans):
|
119 | 120 | class Unique(Benchmark):
|
120 | 121 | """Benchmark for np.unique with np.nan values."""
|
121 | 122 |
|
122 |
| - param_names = ["array_size", "percent_nans"] |
| 123 | + param_names = ["array_size", "percent_nans", "percent_unique_values", "dtype"] |
123 | 124 | params = [
|
124 | 125 | # sizes of the 1D arrays
|
125 | 126 | [200, int(2e5)],
|
126 | 127 | # percent of np.nan in arrays
|
127 |
| - [0, 0.1, 2., 50., 90.], |
| 128 | + [10., 90.], |
| 129 | + # percent of unique values in arrays |
| 130 | + [0.2, 20.], |
| 131 | + # dtypes of the arrays |
| 132 | + [np.float64, np.complex128, np.dtypes.StringDType(na_object=np.nan)], |
128 | 133 | ]
|
129 | 134 |
|
130 |
| - def setup(self, array_size, percent_nans): |
131 |
| - np.random.seed(123) |
| 135 | + def setup(self, array_size, percent_nans, percent_unique_values, dtype): |
| 136 | + rng = np.random.default_rng(123) |
132 | 137 | # produce a randomly shuffled array with the
|
133 | 138 | # approximate desired percentage np.nan content
|
134 |
| - base_array = np.random.uniform(size=array_size) |
135 |
| - n_nan = int(percent_nans * array_size) |
136 |
| - nan_indices = np.random.choice(np.arange(array_size), size=n_nan) |
| 139 | + unique_values_size = max(int(percent_unique_values / 100. * array_size), 2) |
| 140 | + match dtype: |
| 141 | + case np.float64: |
| 142 | + unique_array = rng.uniform(size=unique_values_size).astype(dtype) |
| 143 | + case np.complex128: |
| 144 | + unique_array = np.array( |
| 145 | + [ |
| 146 | + complex(*rng.uniform(size=2)) |
| 147 | + for _ in range(unique_values_size) |
| 148 | + ], |
| 149 | + dtype=dtype, |
| 150 | + ) |
| 151 | + case np.dtypes.StringDType(): |
| 152 | + chars = string.ascii_letters + string.digits |
| 153 | + unique_array = np.array( |
| 154 | + [ |
| 155 | + ''.join(rng.choice(list(chars), size=rng.integers(4, 8))) |
| 156 | + for _ in range(unique_values_size) |
| 157 | + ], |
| 158 | + dtype=dtype, |
| 159 | + ) |
| 160 | + case _: |
| 161 | + raise ValueError(f"Unsupported dtype {dtype}") |
| 162 | + |
| 163 | + base_array = np.resize(unique_array, array_size) |
| 164 | + rng.shuffle(base_array) |
| 165 | + # insert nans in random places |
| 166 | + n_nan = int(percent_nans / 100. * array_size) |
| 167 | + nan_indices = rng.choice(np.arange(array_size), size=n_nan, replace=False) |
137 | 168 | base_array[nan_indices] = np.nan
|
138 | 169 | self.arr = base_array
|
139 | 170 |
|
140 |
| - def time_unique_values(self, array_size, percent_nans): |
| 171 | + def time_unique_values(self, array_size, percent_nans, |
| 172 | + percent_unique_values, dtype): |
141 | 173 | np.unique(self.arr, return_index=False,
|
142 | 174 | return_inverse=False, return_counts=False)
|
143 | 175 |
|
144 |
| - def time_unique_counts(self, array_size, percent_nans): |
| 176 | + def time_unique_counts(self, array_size, percent_nans, |
| 177 | + percent_unique_values, dtype): |
145 | 178 | np.unique(self.arr, return_index=False,
|
146 |
| - return_inverse=False, return_counts=True) |
| 179 | + return_inverse=False, return_counts=True,) |
147 | 180 |
|
148 |
| - def time_unique_inverse(self, array_size, percent_nans): |
| 181 | + def time_unique_inverse(self, array_size, percent_nans, |
| 182 | + percent_unique_values, dtype): |
149 | 183 | np.unique(self.arr, return_index=False,
|
150 | 184 | return_inverse=True, return_counts=False)
|
151 | 185 |
|
152 |
| - def time_unique_all(self, array_size, percent_nans): |
| 186 | + def time_unique_all(self, array_size, percent_nans, |
| 187 | + percent_unique_values, dtype): |
153 | 188 | np.unique(self.arr, return_index=True,
|
154 | 189 | return_inverse=True, return_counts=True)
|
155 | 190 |
|
|
0 commit comments