Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 690c770

Browse files
khchansolvents
authored andcommitted
Merging for pep8 compliance for mlab's GaussianKDE
1 parent 91a33e8 commit 690c770

File tree

2 files changed

+118
-107
lines changed

2 files changed

+118
-107
lines changed

lib/matplotlib/axes/_axes.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6852,14 +6852,15 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
68526852
# Render violins
68536853
for data, pos, width in zip(dataset, positions, widths):
68546854
# Calculate the kernel density
6855-
kde = mlab.ksdensity(data)
6856-
min_val = kde['xmin']
6857-
max_val = kde['xmax']
6858-
mean = kde['mean']
6859-
median = kde['median']
6860-
vals = kde['result']
6855+
kde = mlab.GaussianKDE(data)
6856+
min_val = kde.dataset.min()
6857+
max_val = kde.dataset.max()
6858+
mean = np.mean(kde.dataset)
6859+
median = np.median(kde.dataset)
68616860
coords = np.arange(min_val, max_val, (max_val - min_val)/points)
68626861

6862+
vals = kde.evaluate(coords)
6863+
68636864
# Since each data point p is plotted from v-p to v+p,
68646865
# we need to scale it by an additional 0.5 factor so that we get
68656866
# correct width in the end.

lib/matplotlib/mlab.py

Lines changed: 111 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -3656,12 +3656,12 @@ def stineman_interp(xi,x,y,yp=None):
36563656
1/(dy1+dy2),))
36573657
return yi
36583658

3659-
def ksdensity(dataset, bw_method=None):
3659+
class GaussianKDE(object):
36603660
"""
36613661
Representation of a kernel-density estimate using Gaussian kernels.
36623662
36633663
Call signature::
3664-
kde_dict = ksdensity(dataset, 'silverman')
3664+
kde = gaussian_kde(dataset, 'silverman')
36653665
36663666
Parameters
36673667
----------
@@ -3676,10 +3676,10 @@ def ksdensity(dataset, bw_method=None):
36763676
Attributes
36773677
----------
36783678
dataset : ndarray
3679-
The dataset with which `ksdensity` was initialized.
3680-
d : int
3679+
The dataset with which `gaussian_kde` was initialized.
3680+
dim : int
36813681
Number of dimensions.
3682-
n : int
3682+
num_dp : int
36833683
Number of datapoints.
36843684
factor : float
36853685
The bandwidth factor, obtained from `kde.covariance_factor`, with which
@@ -3690,117 +3690,127 @@ def ksdensity(dataset, bw_method=None):
36903690
inv_cov : ndarray
36913691
The inverse of `covariance`.
36923692
3693-
Returns
3693+
Methods
36943694
-------
3695-
A dictionary mapping each various aspects of the computed KDE.
3696-
The dictionary has the following keys:
3697-
3698-
xmin : number
3699-
The min of the input dataset
3700-
xmax : number
3701-
The max of the input dataset
3702-
mean : number
3703-
The mean of the result
3704-
median: number
3705-
The median of the result
3706-
result: (# of points,)-array
3707-
The array of the evaluated PDF estimation
3708-
3709-
Raises
3710-
------
3711-
ValueError : if the dimensionality of the input points is different than
3712-
the dimensionality of the KDE.
3695+
kde.evaluate(points) : ndarray
3696+
Evaluate the estimated pdf on a provided set of points.
3697+
kde(points) : ndarray
3698+
Same as kde.evaluate(points)
37133699
37143700
"""
37153701

37163702
# This implementation with minor modification was too good to pass up.
37173703
# from scipy: https://github.com/scipy/scipy/blob/master/scipy/stats/kde.py
37183704

3719-
dataset = np.array(np.atleast_2d(dataset))
3720-
xmin = dataset.min()
3721-
xmax = dataset.max()
3705+
def __init__(self, dataset, bw_method=None):
3706+
self.dataset = np.atleast_2d(dataset)
3707+
if not np.array(self.dataset).size > 1:
3708+
raise ValueError("`dataset` input should have multiple elements.")
3709+
3710+
self.dim, self.num_dp = np.array(self.dataset).shape
3711+
3712+
if bw_method is None:
3713+
pass
3714+
elif bw_method == 'scott':
3715+
self.covariance_factor = self.scotts_factor
3716+
elif bw_method == 'silverman':
3717+
self.covariance_factor = self.silverman_factor
3718+
elif np.isscalar(bw_method):
3719+
if not isinstance(bw_method, six.string_types):
3720+
self._bw_method = 'use constant'
3721+
self.covariance_factor = lambda: bw_method
3722+
elif callable(bw_method):
3723+
self._bw_method = bw_method
3724+
self.covariance_factor = lambda: self._bw_method(self)
3725+
else:
3726+
msg = "`bw_method` should be 'scott', 'silverman', a scalar " \
3727+
"or a callable."
3728+
raise ValueError(msg)
3729+
3730+
# Computes the covariance matrix for each Gaussian kernel using
3731+
# covariance_factor().
3732+
3733+
self.factor = self.covariance_factor()
3734+
# Cache covariance and inverse covariance of the data
3735+
if not hasattr(self, '_data_inv_cov'):
3736+
self.data_covariance = np.atleast_2d(
3737+
np.cov(
3738+
self.dataset,
3739+
rowvar=1,
3740+
bias=False))
3741+
self.data_inv_cov = np.linalg.inv(self.data_covariance)
3742+
3743+
self.covariance = self.data_covariance * self.factor ** 2
3744+
self.inv_cov = self.data_inv_cov / self.factor ** 2
3745+
self.norm_factor = np.sqrt(
3746+
np.linalg.det(
3747+
2 * np.pi * self.covariance)) * self.num_dp
3748+
3749+
def scotts_factor(self):
3750+
return np.power(self.num_dp, -1. / (self.dim + 4))
3751+
3752+
def silverman_factor(self):
3753+
return np.power(
3754+
self.num_dp * (self.dim + 2.0) / 4.0, -1. / (self.dim + 4))
3755+
3756+
# Default method to calculate bandwidth, can be overwritten by subclass
3757+
covariance_factor = scotts_factor
37223758

3723-
if not dataset.size > 1:
3724-
raise ValueError("`dataset` input should have multiple elements.")
3759+
def evaluate(self, points):
3760+
"""Evaluate the estimated pdf on a set of points.
37253761
3726-
dim, num_dp = dataset.shape
3762+
Parameters
3763+
----------
3764+
points : (# of dimensions, # of points)-array
3765+
Alternatively, a (# of dimensions,) vector can be passed in and
3766+
treated as a single point.
37273767
3728-
# ----------------------------------------------
3729-
# Set Bandwidth, defaulted to Scott's Factor
3730-
# ----------------------------------------------
3731-
scotts_factor = lambda: np.power(num_dp, -1./(dim+4))
3732-
silverman_factor = lambda: np.power(num_dp*(dim+2.0)/4.0, -1./(dim+4))
3768+
Returns
3769+
-------
3770+
values : (# of points,)-array
3771+
The values at each point.
37333772
3734-
# Default method to calculate bandwidth, can be overwritten by subclass
3735-
covariance_factor = scotts_factor
3773+
Raises
3774+
------
3775+
ValueError : if the dimensionality of the input points is different
3776+
than the dimensionality of the KDE.
37363777
3737-
if bw_method is None:
3738-
pass
3739-
elif bw_method == 'scott':
3740-
covariance_factor = scotts_factor
3741-
elif bw_method == 'silverman':
3742-
covariance_factor = silverman_factor
3743-
elif np.isscalar(bw_method) and not isinstance(bw_method, six.string_types):
3744-
covariance_factor = lambda: bw_method
3745-
else:
3746-
msg = "`bw_method` should be 'scott', 'silverman', or a scalar"
3747-
raise ValueError(msg)
3748-
3749-
# ---------------------------------------------------------------
3750-
# Computes covariance matrix for each Gaussian kernel with factor
3751-
# ---------------------------------------------------------------
3752-
factor = covariance_factor()
3753-
3754-
# Cache covariance and inverse covariance of the data
3755-
data_covariance = np.atleast_2d(np.cov(dataset, rowvar=1, bias=False))
3756-
data_inv_cov = np.linalg.inv(data_covariance)
3757-
3758-
covariance = data_covariance * factor**2
3759-
inv_cov = data_inv_cov / factor**2
3760-
norm_factor = np.sqrt(np.linalg.det(2*np.pi*covariance)) * num_dp
3761-
3762-
# ----------------------------------------------
3763-
# Evaluate the estimated pdf on a set of points.
3764-
# ----------------------------------------------
3765-
points = np.atleast_2d(np.arange(xmin, xmax, (xmax-xmin)/100.))
3766-
3767-
dim_pts, num_dp_pts = np.array(points).shape
3768-
if dim_pts != dim:
3769-
if dim_pts == 1 and num_dp_pts == num_dp:
3770-
# points was passed in as a row vector
3771-
points = np.reshape(points, (dim, 1))
3772-
num_dp_pts = 1
3778+
"""
3779+
points = np.atleast_2d(points)
3780+
3781+
dim, num_m = np.array(points).shape
3782+
if dim != self.dim:
3783+
if dim == 1 and num_m == self.dim:
3784+
# points was passed in as a row vector
3785+
points = np.reshape(points, (self.dim, 1))
3786+
num_m = 1
3787+
else:
3788+
msg = "points have dimension %s, dataset has dimension %s" % (
3789+
dim, self.dim)
3790+
raise ValueError(msg)
3791+
3792+
result = np.zeros((num_m,), dtype=np.float)
3793+
3794+
if num_m >= self.num_dp:
3795+
# there are more points than data, so loop over data
3796+
for i in range(self.num_dp):
3797+
diff = self.dataset[:, i, np.newaxis] - points
3798+
tdiff = np.dot(self.inv_cov, diff)
3799+
energy = np.sum(diff * tdiff, axis=0) / 2.0
3800+
result = result + np.exp(-energy)
37733801
else:
3774-
msg = "points have dimension %s,\
3775-
dataset has dimension %s" % (dim_pts, dim)
3776-
raise ValueError(msg)
3802+
# loop over points
3803+
for i in range(num_m):
3804+
diff = self.dataset - points[:, i, np.newaxis]
3805+
tdiff = np.dot(self.inv_cov, diff)
3806+
energy = np.sum(diff * tdiff, axis=0) / 2.0
3807+
result[i] = np.sum(np.exp(-energy), axis=0)
37773808

3778-
result = np.zeros((num_dp_pts,), dtype=np.float)
3809+
result = result / self.norm_factor
37793810

3780-
if num_dp_pts >= num_dp:
3781-
# there are more points than data, so loop over data
3782-
for i in range(num_dp):
3783-
diff = dataset[:, i, np.newaxis] - points
3784-
tdiff = np.dot(inv_cov, diff)
3785-
energy = np.sum(diff*tdiff, axis=0) / 2.0
3786-
result = result + np.exp(-energy)
3787-
else:
3788-
# loop over points
3789-
for i in range(num_dp_pts):
3790-
diff = dataset - points[:, i, np.newaxis]
3791-
tdiff = np.dot(inv_cov, diff)
3792-
energy = np.sum(diff * tdiff, axis=0) / 2.0
3793-
result[i] = np.sum(np.exp(-energy), axis=0)
3794-
3795-
result = result / norm_factor
3796-
3797-
return {
3798-
'xmin': xmin,
3799-
'xmax': xmax,
3800-
'mean': np.mean(dataset),
3801-
'median': np.median(dataset),
3802-
'result': result
3803-
}
3811+
return result
3812+
3813+
__call__ = evaluate
38043814

38053815
##################################################
38063816
# Code related to things in and around polygons

0 commit comments

Comments
 (0)