Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Restore histogram consistency #7667

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 24, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions numpy/lib/function_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,9 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
# Pre-compute histogram scaling factor
norm = bins / (mx - mn)

# Compute the bin edges for potential correction.
bin_edges = linspace(mn, mx, bins + 1, endpoint=True)

# We iterate over blocks here for two reasons: the first is that for
# large arrays, it is actually faster (for example for a 10^8 array it
# is 2x as fast) and it results in a memory footprint 3x lower in the
Expand All @@ -657,14 +660,22 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
tmp_a = tmp_a[keep]
if tmp_w is not None:
tmp_w = tmp_w[keep]
tmp_a = tmp_a.astype(float)
tmp_a -= mn
tmp_a_data = tmp_a.astype(float)
tmp_a = tmp_a_data - mn
tmp_a *= norm

# Compute the bin indices, and for values that lie exactly on mx we
# need to subtract one
indices = tmp_a.astype(np.intp)
indices[indices == bins] -= 1
equals_endpoint = (indices == bins)
indices[equals_endpoint] -= 1

# The index computation is not guaranteed to give exactly
# consistent results within ~1 ULP of the bin edges.
decrement = tmp_a_data < bin_edges[indices]
indices[decrement] -= 1
increment = (tmp_a_data >= bin_edges[indices + 1]) & ~equals_endpoint
indices[increment] += 1

# We now compute the histogram using bincount
if ntype.kind == 'c':
Expand All @@ -673,8 +684,8 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
else:
n += np.bincount(indices, weights=tmp_w, minlength=bins).astype(ntype)

# We now compute the bin edges since these are returned
bins = linspace(mn, mx, bins + 1, endpoint=True)
# Rename the bin edges for return.
bins = bin_edges
else:
bins = asarray(bins)
if (np.diff(bins) < 0).any():
Expand Down
11 changes: 11 additions & 0 deletions numpy/lib/tests/test_function_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1407,6 +1407,17 @@ def test_finite_range(self):
assert_raises(ValueError, histogram, vals, range=[np.nan,0.75])
assert_raises(ValueError, histogram, vals, range=[0.25,np.inf])

def test_bin_edge_cases(self):
# Ensure that floating-point computations correctly place edge cases.
arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
hist, edges = np.histogram(arr, bins=8296, range=(2, 2280))
mask = hist > 0
left_edges = edges[:-1][mask]
right_edges = edges[1:][mask]
for x, left, right in zip(arr, left_edges, right_edges):
self.assertGreaterEqual(x, left)
self.assertLess(x, right)


class TestHistogramOptimBinNums(TestCase):
"""
Expand Down