Thanks to visit codestin.com
Credit goes to github.com

Skip to content

ENH: Speed up trim_zeros #16911

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Aug 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
2dd352c
Added a benchmark for `trim_zeros()`
Jul 11, 2020
59e7cde
Updated the benchmark with `param_names` and `params`
Jul 11, 2020
b6fa460
Improve the performance of `np.trim_zeros()`
Jul 11, 2020
5f85a1c
Fixed a string-representation
Jul 11, 2020
af728c8
docstring update; fixed an issue with the try/except approach in 'B'
Jul 20, 2020
3da3f3c
Removed a sentence which is redundant since 2a2d9d622e9bdeafd5cfafc9f…
Jul 20, 2020
e3da361
Reverted a redundant documentation change
Jul 20, 2020
ca5d462
Improve readability: use ndarray.any() to check for empty arrays
Jul 20, 2020
f4ae522
Revert "Improve readability: use ndarray.any() to check for empty arr…
Jul 20, 2020
8aade1b
The performance increase of 997bdc793234d6c1c75738035ba63730d686b356 …
Jul 20, 2020
189e808
Increase the variety of the tests
Jul 20, 2020
7a9d0e2
BUG: check if array-length is non-zero before calling `argmax()`
Jul 20, 2020
75ad305
Implement https://github.com/numpy/numpy/pull/16911#discussion_r45733…
Jul 20, 2020
9d1662b
Implement https://github.com/numpy/numpy/pull/16911#discussion_r45735…
Jul 20, 2020
ccf1c21
Implemented https://github.com/numpy/numpy/pull/16911#discussion_r457…
Jul 20, 2020
4d91311
Implemented https://github.com/numpy/numpy/pull/16911#issuecomment-66…
Jul 20, 2020
eae7275
TST: Avoid the use of `warnings.simplefilter('ignore', ...)
Jul 21, 2020
20829cf
MAINT: Gramar fix
Jul 21, 2020
a25620e
TST,MAINT: Fixed an incorrect exception name
Jul 21, 2020
25d2d23
STY: remove trailing whitespaces
Jul 31, 2020
16d5072
DEP: Added a date/version deprecation comment
Jul 31, 2020
ab313d5
MAINT: Typo fix; added a missing space
Jul 31, 2020
8b99f60
TST,DEP: Added a deprecation testcase
Jul 31, 2020
2392fe3
DEP,REL: Added a deprecation release note
Jul 31, 2020
dea951f
TST,DEP: Moved leftovers from a deprecation test
Aug 3, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions benchmarks/benchmarks/bench_trim_zeros.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from .common import Benchmark

import numpy as np

_FLOAT = np.dtype('float64')
_COMPLEX = np.dtype('complex128')
_INT = np.dtype('int64')
_BOOL = np.dtype('bool')


class TrimZeros(Benchmark):
param_names = ["dtype", "size"]
params = [
[_INT, _FLOAT, _COMPLEX, _BOOL],
[3000, 30_000, 300_000]
]

def setup(self, dtype, size):
n = size // 3
self.array = np.hstack([
np.zeros(n),
np.random.uniform(size=n),
np.zeros(n),
]).astype(dtype)

def time_trim_zeros(self, dtype, size):
np.trim_zeros(self.array)
7 changes: 7 additions & 0 deletions doc/release/upcoming_changes/16911.deprecation.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
``trim_zeros`` now requires a 1D array compatible with ``ndarray.astype(bool)``
-------------------------------------------------------------------------------
The ``trim_zeros`` function will, in the future, require an array with the
following two properties:

* It must be 1D.
* It must be convertable into a boolean array.
20 changes: 19 additions & 1 deletion numpy/core/tests/test_deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ def test_deprecated(self):
self.assert_deprecated(round, args=(scalar,))
self.assert_deprecated(round, args=(scalar, 0))
self.assert_deprecated(round, args=(scalar,), kwargs={'ndigits': 0})

def test_not_deprecated(self):
for scalar_type in self.not_deprecated_types:
scalar = scalar_type(0)
Expand Down Expand Up @@ -706,3 +706,21 @@ def test_deprecated(self):
# And when it is an assignment into a lower dimensional subarray:
self.assert_deprecated(lambda: np.array([arr, [0]], dtype=np.float64))
self.assert_deprecated(lambda: np.array([[0], arr], dtype=np.float64))


class TestTrimZeros(_DeprecationTestCase):
# Numpy 1.20.0, 2020-07-31
@pytest.mark.parametrize("arr", [np.random.rand(10, 10).tolist(),
np.random.rand(10).astype(str)])
def test_deprecated(self, arr):
with warnings.catch_warnings():
warnings.simplefilter('error', DeprecationWarning)
try:
np.trim_zeros(arr)
except DeprecationWarning as ex:
assert_(isinstance(ex.__cause__, ValueError))
else:
raise AssertionError("No error raised during function call")

out = np.lib.function_base._trim_zeros_old(arr)
assert_array_equal(arr, out)
67 changes: 59 additions & 8 deletions numpy/lib/function_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def asarray_chkfinite(a, dtype=None, order=None):
By default, the data-type is inferred from the input data.
order : {'C', 'F', 'A', 'K'}, optional
Memory layout. 'A' and 'K' depend on the order of input array a.
'C' row-major (C-style),
'C' row-major (C-style),
'F' column-major (Fortran-style) memory representation.
'A' (any) means 'F' if `a` is Fortran contiguous, 'C' otherwise
'K' (keep) preserve input order
Expand Down Expand Up @@ -1624,6 +1624,57 @@ def trim_zeros(filt, trim='fb'):
>>> np.trim_zeros([0, 1, 2, 0])
[1, 2]

"""
try:
return _trim_zeros_new(filt, trim)
except Exception as ex:
# Numpy 1.20.0, 2020-07-31
warning = DeprecationWarning(
"in the future trim_zeros will require a 1-D array as input "
"that is compatible with ndarray.astype(bool)"
)
warning.__cause__ = ex
warnings.warn(warning, stacklevel=3)

# Fall back to the old implementation if an exception is encountered
# Note that the same exception may or may not be raised here as well
return _trim_zeros_old(filt, trim)


def _trim_zeros_new(filt, trim='fb'):
"""Newer optimized implementation of ``trim_zeros()``."""
arr = np.asanyarray(filt).astype(bool, copy=False)

if arr.ndim != 1:
raise ValueError('trim_zeros requires an array of exactly one dimension')
elif not len(arr):
return filt

trim_upper = trim.upper()
first = last = None

if 'F' in trim_upper:
first = arr.argmax()
# If `arr[first] is False` then so are all other elements
if not arr[first]:
return filt[:0]

if 'B' in trim_upper:
last = len(arr) - arr[::-1].argmax()
# If `arr[last - 1] is False` then so are all other elements
if not arr[last - 1]:
return filt[:0]

return filt[first:last]


def _trim_zeros_old(filt, trim='fb'):
"""
Older unoptimized implementation of ``trim_zeros()``.

Used as fallback in case an exception is encountered
in ``_trim_zeros_new()``.

"""
first = 0
trim = trim.upper()
Expand Down Expand Up @@ -2546,11 +2597,11 @@ def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue):
for backwards compatibility with previous versions of this function. These
arguments had no effect on the return values of the function and can be
safely ignored in this and previous versions of numpy.

Examples
--------
--------
In this example we generate two random arrays, ``xarr`` and ``yarr``, and
compute the row-wise and column-wise Pearson correlation coefficients,
compute the row-wise and column-wise Pearson correlation coefficients,
``R``. Since ``rowvar`` is true by default, we first find the row-wise
Pearson correlation coefficients between the variables of ``xarr``.

Expand All @@ -2566,11 +2617,11 @@ def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue):
array([[ 1. , 0.99256089, -0.68080986],
[ 0.99256089, 1. , -0.76492172],
[-0.68080986, -0.76492172, 1. ]])
If we add another set of variables and observations ``yarr``, we can

If we add another set of variables and observations ``yarr``, we can
compute the row-wise Pearson correlation coefficients between the
variables in ``xarr`` and ``yarr``.

>>> yarr = rng.random((3, 3))
>>> yarr
array([[0.45038594, 0.37079802, 0.92676499],
Expand All @@ -2592,7 +2643,7 @@ def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue):
1. ]])

Finally if we use the option ``rowvar=False``, the columns are now
being treated as the variables and we will find the column-wise Pearson
being treated as the variables and we will find the column-wise Pearson
correlation coefficients between variables in ``xarr`` and ``yarr``.

>>> R3 = np.corrcoef(xarr, yarr, rowvar=False)
Expand Down
46 changes: 34 additions & 12 deletions numpy/lib/tests/test_function_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1166,25 +1166,47 @@ def test_subclass(self):

class TestTrimZeros:

"""
Only testing for integer splits.
a = np.array([0, 0, 1, 0, 2, 3, 4, 0])
b = a.astype(float)
c = a.astype(complex)
d = np.array([None, [], 1, False, 'b', 3.0, range(4), b''], dtype=object)

"""
def values(self):
attr_names = ('a', 'b', 'c', 'd')
return (getattr(self, name) for name in attr_names)

def test_basic(self):
a = np.array([0, 0, 1, 2, 3, 4, 0])
res = trim_zeros(a)
assert_array_equal(res, np.array([1, 2, 3, 4]))
slc = np.s_[2:-1]
for arr in self.values():
res = trim_zeros(arr)
assert_array_equal(res, arr[slc])

def test_leading_skip(self):
a = np.array([0, 0, 1, 0, 2, 3, 4, 0])
res = trim_zeros(a)
assert_array_equal(res, np.array([1, 0, 2, 3, 4]))
slc = np.s_[:-1]
for arr in self.values():
res = trim_zeros(arr, trim='b')
assert_array_equal(res, arr[slc])

def test_trailing_skip(self):
a = np.array([0, 0, 1, 0, 2, 3, 0, 4, 0])
res = trim_zeros(a)
assert_array_equal(res, np.array([1, 0, 2, 3, 0, 4]))
slc = np.s_[2:]
for arr in self.values():
res = trim_zeros(arr, trim='F')
assert_array_equal(res, arr[slc])

def test_all_zero(self):
for _arr in self.values():
arr = np.zeros_like(_arr, dtype=_arr.dtype)

res1 = trim_zeros(arr, trim='B')
assert len(res1) == 0

res2 = trim_zeros(arr, trim='f')
assert len(res2) == 0

def test_size_zero(self):
arr = np.zeros(0)
res = trim_zeros(arr)
assert_array_equal(arr, res)


class TestExtins:
Expand Down