From f89e8cbada76dc7fcf0e3e3c2531d768944de91f Mon Sep 17 00:00:00 2001 From: Sebastin Santy Date: Wed, 12 Jul 2017 15:18:19 +0530 Subject: [PATCH 1/5] AffinityPropagation damping factor not explained --- doc/modules/clustering.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index b27496944a616..de402bcd9ac1a 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -301,7 +301,9 @@ is given. Affinity Propagation can be interesting as it chooses the number of clusters based on the data provided. For this purpose, the two important parameters are the *preference*, which controls how many exemplars are -used, and the *damping factor*. +used, and the *damping factor* which damps the responsibility and +availability messages to avoid numerical oscillations when updating these +messages. The main drawback of Affinity Propagation is its complexity. The algorithm has a time complexity of the order :math:`O(N^2 T)`, where :math:`N` From 604cde4e65193a60d059b90231aab6a48ed26696 Mon Sep 17 00:00:00 2001 From: Sebastin Santy Date: Wed, 12 Jul 2017 15:22:33 +0530 Subject: [PATCH 2/5] Added for API also --- sklearn/cluster/affinity_propagation_.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/cluster/affinity_propagation_.py b/sklearn/cluster/affinity_propagation_.py index 398529793880f..2af57db12cba7 100644 --- a/sklearn/cluster/affinity_propagation_.py +++ b/sklearn/cluster/affinity_propagation_.py @@ -197,7 +197,10 @@ class AffinityPropagation(BaseEstimator, ClusterMixin): Parameters ---------- damping : float, optional, default: 0.5 - Damping factor between 0.5 and 1. + Damping factor (between 0.5 and 1) damps the + responsibility and availability messages to + avoid numerical oscillations when updating these + messages. max_iter : int, optional, default: 200 Maximum number of iterations. From 3958a42e57b2ce952028a095095708cc8bb5a327 Mon Sep 17 00:00:00 2001 From: Sebastin Santy Date: Wed, 12 Jul 2017 15:50:00 +0530 Subject: [PATCH 3/5] Add equation for damping --- doc/modules/clustering.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index de402bcd9ac1a..35e7cc8e2f580 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -353,6 +353,13 @@ to be the exemplar of sample :math:`i` is given by: To begin with, all values for :math:`r` and :math:`a` are set to zero, and the calculation of each iterates until convergence. +As discussed above, in order to avoid numerical oscillations when updating the messages, the damping factor :math:`\lambda` is introduced to iteration process: + +.. math:: r_{t+1}(i, k) = \lambda\cdot r_{t}(i, k) + (1-\lambda)\cdot r_{t+1}(i, k) +.. math:: a_{t+1}(i, k) = \lambda\cdot a_{t}(i, k) + (1-\lambda)\cdot a_{t+1}(i, k) + +where :math:`t` indicates the iteration times. + .. _mean_shift: Mean Shift From 886db455125f97f46eaf106dba8e0efebaffb646 Mon Sep 17 00:00:00 2001 From: Sebastin Santy Date: Wed, 12 Jul 2017 16:07:05 +0530 Subject: [PATCH 4/5] formatting text --- doc/modules/clustering.rst | 4 ++-- sklearn/cluster/affinity_propagation_.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index 35e7cc8e2f580..6842c1e2bde4f 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -352,8 +352,8 @@ to be the exemplar of sample :math:`i` is given by: To begin with, all values for :math:`r` and :math:`a` are set to zero, and the calculation of each iterates until convergence. - -As discussed above, in order to avoid numerical oscillations when updating the messages, the damping factor :math:`\lambda` is introduced to iteration process: +As discussed above, in order to avoid numerical oscillations when updating the +messages, the damping factor :math:`\lambda` is introduced to iteration process: .. math:: r_{t+1}(i, k) = \lambda\cdot r_{t}(i, k) + (1-\lambda)\cdot r_{t+1}(i, k) .. math:: a_{t+1}(i, k) = \lambda\cdot a_{t}(i, k) + (1-\lambda)\cdot a_{t+1}(i, k) diff --git a/sklearn/cluster/affinity_propagation_.py b/sklearn/cluster/affinity_propagation_.py index 2af57db12cba7..14d309156d50c 100644 --- a/sklearn/cluster/affinity_propagation_.py +++ b/sklearn/cluster/affinity_propagation_.py @@ -197,8 +197,8 @@ class AffinityPropagation(BaseEstimator, ClusterMixin): Parameters ---------- damping : float, optional, default: 0.5 - Damping factor (between 0.5 and 1) damps the - responsibility and availability messages to + Damping factor (between 0.5 and 1) damps the + responsibility and availability messages to avoid numerical oscillations when updating these messages. From 3b10a6804658b5e82e5425f86e2429ed38db7f3b Mon Sep 17 00:00:00 2001 From: Sebastin Santy Date: Thu, 13 Jul 2017 16:38:12 +0530 Subject: [PATCH 5/5] Add suggestions --- sklearn/cluster/affinity_propagation_.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sklearn/cluster/affinity_propagation_.py b/sklearn/cluster/affinity_propagation_.py index 14d309156d50c..8bf94cee95cda 100644 --- a/sklearn/cluster/affinity_propagation_.py +++ b/sklearn/cluster/affinity_propagation_.py @@ -197,10 +197,11 @@ class AffinityPropagation(BaseEstimator, ClusterMixin): Parameters ---------- damping : float, optional, default: 0.5 - Damping factor (between 0.5 and 1) damps the - responsibility and availability messages to - avoid numerical oscillations when updating these - messages. + Damping factor (between 0.5 and 1) is the extent to + which the current value is maintained relative to + incoming values (weighted 1 - damping). This in order + to avoid numerical oscillations when updating these + values (messages). max_iter : int, optional, default: 200 Maximum number of iterations.