Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
afb4335
Changed name n_components to n_connected_components in base class
scouvreur Mar 10, 2019
e3e267c
Fixed line which exceeded PEP8 max of 79 chars
scouvreur Mar 10, 2019
c9adc5f
Fixed line 818 which exceeded PEP8 max of 79 chars
scouvreur Mar 10, 2019
abe4617
Added try and except to provide deprecation warning if passed
scouvreur Mar 12, 2019
b51a058
Updated deprecation and removal version numbers
scouvreur Mar 12, 2019
ff8b930
Added deprecation of n_components using @property generator
scouvreur Mar 13, 2019
1ccb7c6
Makes FeatureAgglomeration class inherit n_connected_components_ attr…
scouvreur Mar 18, 2019
ca31d0d
Added test for DeprecationWarning when trying to access n_components
scouvreur Mar 18, 2019
238c4ac
Removed @property generator causing linting error
scouvreur Mar 18, 2019
cb73b10
Fixed typo in test
scouvreur Mar 18, 2019
0e5aeb4
Fixed flake8 error due to single line between 2 functions
scouvreur Mar 18, 2019
54fd143
Test fix attempt
scouvreur Mar 18, 2019
852e55e
Edited test function docstring
scouvreur Mar 18, 2019
260a15b
Corrected n_components deprecation test docstring
scouvreur Mar 19, 2019
5d30766
Fixed line continuation issue in AgglomerativeClustering base class
scouvreur Mar 19, 2019
1cbdf5c
Added deprecation message as part of the @deprecated decorator
scouvreur Mar 20, 2019
61caa9a
Merge branch 'master' into AgglomerativeClustering-n_components_-Rena…
Mar 20, 2019
5626fbc
Added attribute deprecation information in the Attributes section of…
scouvreur Mar 21, 2019
14ac792
Added test for deprecation warning message
scouvreur Mar 21, 2019
17aded9
Added attribute deprecation information in the Attributes section of…
scouvreur Mar 21, 2019
822eaa5
Fixed test issue and added longer match string
scouvreur Mar 21, 2019
bdbb14a
Edited n_components_ deprecation message to add double backticks
scouvreur Mar 21, 2019
993fc35
Fixed match string to reflect deprecation message change in test
scouvreur Mar 21, 2019
ce8f02a
Added name to list of contributors
scouvreur Mar 21, 2019
31354c3
Documented information in v0.21 changelog
scouvreur Mar 21, 2019
4cbe2c1
Added cluster parent folder to documentation in v0.21 changelog
scouvreur Mar 21, 2019
7faf58a
Removed myself from list of core contributors
scouvreur Mar 22, 2019
7642f64
Moved |API| subsection to the end of the list, and changed reference …
scouvreur Mar 22, 2019
6c76d7c
Removed n_components deprecation documentation from FeatureAgglomerat…
scouvreur Mar 24, 2019
98a18df
Fix indentation on _fix_connectivity function call
scouvreur Mar 24, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats_new/v0.21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ Support for Python 3.4 and below has been officially dropped.
to set and that scales better, by :user:`Shane <espg>`,
:user:`Adrin Jalali <adrinjalali>`, and :user:`Erich Schubert <kno10>`.

- |API| The ``n_components_`` attribute in :class:`cluster.AgglomerativeClustering`
and :class:`cluster.FeatureAgglomeration` has been renamed to
``n_connected_components_``.
:issue:`13427` by :user:`Stephane Couvreur <scouvreur>`.

:mod:`sklearn.datasets`
.......................

Expand Down
64 changes: 37 additions & 27 deletions sklearn/cluster/hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@
from ._feature_agglomeration import AgglomerationTransform
from ..utils.fast_dict import IntFloatDict
from ..utils.fixes import _astype_copy_false
from ..utils import deprecated

###############################################################################
# For non fully-connected graphs


def _fix_connectivity(X, connectivity, affinity):
"""
Fixes the connectivity matrix
Expand All @@ -54,15 +54,15 @@ def _fix_connectivity(X, connectivity, affinity):
connectivity = connectivity.tolil()

# Compute the number of nodes
n_components, labels = connected_components(connectivity)
n_connected_components, labels = connected_components(connectivity)

if n_components > 1:
if n_connected_components > 1:
warnings.warn("the number of connected components of the "
"connectivity matrix is %d > 1. Completing it to avoid "
"stopping the tree early." % n_components,
"stopping the tree early." % n_connected_components,
stacklevel=2)
# XXX: Can we do without completing the matrix?
for i in range(n_components):
for i in range(n_connected_components):
idx_i = np.where(labels == i)[0]
Xi = X[idx_i]
for j in range(i):
Expand All @@ -75,11 +75,11 @@ def _fix_connectivity(X, connectivity, affinity):
connectivity[idx_i[ii], idx_j[jj]] = True
connectivity[idx_j[jj], idx_i[ii]] = True

return connectivity, n_components
return connectivity, n_connected_components


def _single_linkage_tree(connectivity, n_samples, n_nodes, n_clusters,
n_components, return_distance):
n_connected_components, return_distance):
"""
Perform single linkage clustering on sparse data via the minimum
spanning tree from scipy.sparse.csgraph, then using union-find to label.
Expand Down Expand Up @@ -125,8 +125,8 @@ def _single_linkage_tree(connectivity, n_samples, n_nodes, n_clusters,

if return_distance:
distances = single_linkage_tree[:, 2]
return children_, n_components, n_samples, parent, distances
return children_, n_components, n_samples, parent
return children_, n_connected_components, n_samples, parent, distances
return children_, n_connected_components, n_samples, parent


###############################################################################
Expand Down Expand Up @@ -177,7 +177,7 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False):
at the i-th iteration, children[i][0] and children[i][1]
are merged to form node `n_samples + i`

n_components : int
n_connected_components : int
The number of connected components in the graph.

n_leaves : int
Expand Down Expand Up @@ -239,8 +239,9 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False):
else:
return children_, 1, n_samples, None

connectivity, n_components = _fix_connectivity(X, connectivity,
affinity='euclidean')
connectivity, n_connected_components = _fix_connectivity(
X, connectivity,
affinity='euclidean')
if n_clusters is None:
n_nodes = 2 * n_samples - 1
else:
Expand Down Expand Up @@ -333,9 +334,9 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False):
if return_distance:
# 2 is scaling factor to compare w/ unstructured version
distances = np.sqrt(2. * distances)
return children, n_components, n_leaves, parent, distances
return children, n_connected_components, n_leaves, parent, distances
else:
return children, n_components, n_leaves, parent
return children, n_connected_components, n_leaves, parent


# single average and complete linkage
Expand Down Expand Up @@ -396,7 +397,7 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',
at the i-th iteration, children[i][0] and children[i][1]
are merged to form node `n_samples + i`

n_components : int
n_connected_components : int
The number of connected components in the graph.

n_leaves : int
Expand Down Expand Up @@ -467,9 +468,9 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',
return children_, 1, n_samples, None, distances
return children_, 1, n_samples, None

connectivity, n_components = _fix_connectivity(X, connectivity,
affinity=affinity)

connectivity, n_connected_components = _fix_connectivity(
X, connectivity,
affinity=affinity)
connectivity = connectivity.tocoo()
# Put the diagonal to zero
diag_mask = (connectivity.row != connectivity.col)
Expand Down Expand Up @@ -497,7 +498,8 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',

if linkage == 'single':
return _single_linkage_tree(connectivity, n_samples, n_nodes,
n_clusters, n_components, return_distance)
n_clusters, n_connected_components,
return_distance)

if return_distance:
distances = np.empty(n_nodes - n_samples)
Expand Down Expand Up @@ -567,8 +569,8 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',
children = np.array(children)[:, ::-1]

if return_distance:
return children, n_components, n_leaves, parent, distances
return children, n_components, n_leaves, parent
return children, n_connected_components, n_leaves, parent, distances
return children, n_connected_components, n_leaves, parent


# Matching names to tree-building strategies
Expand Down Expand Up @@ -717,7 +719,7 @@ class AgglomerativeClustering(BaseEstimator, ClusterMixin):
n_leaves_ : int
Number of leaves in the hierarchical tree.

n_components_ : int
n_connected_components_ : int
The estimated number of connected components in the graph.

children_ : array-like, shape (n_samples-1, 2)
Expand Down Expand Up @@ -756,6 +758,13 @@ def __init__(self, n_clusters=2, affinity="euclidean",
self.affinity = affinity
self.pooling_func = pooling_func

@property
@deprecated("The ``n_components_`` attribute was deprecated "
"in favor of ``n_connected_components_`` in 0.21 "
"and will be removed in 0.23.")
def n_components_(self):
return self.n_connected_components_

def fit(self, X, y=None):
"""Fit the hierarchical clustering on the data

Expand Down Expand Up @@ -819,10 +828,11 @@ def fit(self, X, y=None):
if self.linkage != 'ward':
kwargs['linkage'] = self.linkage
kwargs['affinity'] = self.affinity
self.children_, self.n_components_, self.n_leaves_, parents = \
memory.cache(tree_builder)(X, connectivity,
n_clusters=n_clusters,
**kwargs)
(self.children_, self.n_connected_components_, self.n_leaves_,
parents) = memory.cache(tree_builder)(X, connectivity,
n_clusters=n_clusters,
**kwargs)

# Cut the tree
if compute_full_tree:
self.labels_ = _hc_cut(self.n_clusters, self.children_,
Expand Down Expand Up @@ -902,7 +912,7 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
n_leaves_ : int
Number of leaves in the hierarchical tree.

n_components_ : int
n_connected_components_ : int
The estimated number of connected components in the graph.

children_ : array-like, shape (n_nodes-1, 2)
Expand Down
14 changes: 14 additions & 0 deletions sklearn/cluster/tests/test_hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,3 +598,17 @@ def increment(self, *args, **kwargs):
linkage_tree(X, connectivity=connectivity, affinity=fa.increment)

assert_equal(fa.counter, 3)


def test_n_components_deprecation():
# Test that a Deprecation warning is thrown when n_components_
# attribute is accessed

X = np.array([[1, 2], [1, 4], [1, 0], [4, 2]])
agc = AgglomerativeClustering().fit(X)

match = ("``n_components_`` attribute was deprecated "
"in favor of ``n_connected_components_``")
with pytest.warns(DeprecationWarning, match=match):
n = agc.n_components_
assert n == agc.n_connected_components_