scikit-learn · jnothman · Jun 16, 2016 · Feb 10, 2016 · May 23, 2016 · Jun 16, 2016
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -868,7 +868,9 @@ details.
 
    metrics.adjusted_mutual_info_score
    metrics.adjusted_rand_score
+   metrics.calinski_harabaz_score
    metrics.completeness_score
+   metrics.fowlkes_mallows_score
    metrics.homogeneity_completeness_v_measure
    metrics.homogeneity_score
    metrics.mutual_info_score

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
@@ -1339,6 +1339,93 @@ mean of homogeneity and completeness**:
    <http://www.cs.columbia.edu/~hila/hila-thesis-distributed.pdf>`_, Hila
    Becker, PhD Thesis.
 
+.. _fowlkes_mallows_scores:
+
+Fowlkes-Mallows scores
+----------------------
+
+The Fowlkes-Mallows index (:func:`sklearn.metrics.fowlkes_mallows_score`) can be
+used when the ground truth class assignments of the samples is known. The
+Fowlkes-Mallows score FMI is defined as the geometric mean of the
+pairwise precision and recall:
+
+.. math:: \text{FMI} = \frac{\text{TP}}{\sqrt{(\text{TP} + \text{FP}) (\text{TP} + \text{FN})}}
+
+Where ``TP`` is the number of **True Positive** (i.e. the number of pair
+of points that belong to the same clusters in both the true labels and the
+predicted labels), ``FP`` is the number of **False Positive** (i.e. the number
+of pair of points that belong to the same clusters in the true labels and not
+in the predicted labels) and ``FN`` is the number of **False Negative** (i.e the
+number of pair of points that belongs in the same clusters in the predicted
+labels and not in the true labels).
+
+The score ranges from 0 to 1. A high value indicates a good similarity
+between two clusters.
+
+  >>> from sklearn import metrics
+  >>> labels_true = [0, 0, 0, 1, 1, 1]
+  >>> labels_pred = [0, 0, 1, 1, 2, 2]
+
+  >>> metrics.fowlkes_mallows_score(labels_true, labels_pred)  # doctest: +ELLIPSIS
+  0.47140...
+
+One can permute 0 and 1 in the predicted labels, rename 2 to 3 and get
+the same score::
+
+  >>> labels_pred = [1, 1, 0, 0, 3, 3]
+
+  >>> metrics.fowlkes_mallows_score(labels_true, labels_pred)  # doctest: +ELLIPSIS
+  0.47140...
+
+Perfect labeling is scored 1.0::
+
+  >>> labels_pred = labels_true[:]
+  >>> metrics.fowlkes_mallows_score(labels_true, labels_pred)  # doctest: +ELLIPSIS
+  1.0
+
+Bad (e.g. independent labelings) have zero scores::
+
+  >>> labels_true = [0, 1, 2, 0, 3, 4, 5, 1]
+  >>> labels_pred = [1, 1, 0, 0, 2, 2, 2, 2]
+  >>> metrics.fowlkes_mallows_score(labels_true, labels_pred)  # doctest: +ELLIPSIS
+  0.0
+
+Advantages
+~~~~~~~~~~
+
+- **Random (uniform) label assignments have a FMI score close to 0.0**
+  for any value of ``n_clusters`` and ``n_samples`` (which is not the
+  case for raw Mutual Information or the V-measure for instance).
+
+- **Bounded range [0, 1]**:  Values close to zero indicate two label
+  assignments that are largely independent, while values close to one
+  indicate significant agreement. Further, values of exactly 0 indicate
+  **purely** independent label assignments and a AMI of exactly 1 indicates
+  that the two label assignments are equal (with or without permutation).
+
+- **No assumption is made on the cluster structure**: can be used
+  to compare clustering algorithms such as k-means which assumes isotropic
+  blob shapes with results of spectral clustering algorithms which can
+  find cluster with "folded" shapes.
+
+
+Drawbacks
+~~~~~~~~~
+
+- Contrary to inertia, **FMI-based measures require the knowledge
+  of the ground truth classes** while almost never available in practice or
+  requires manual assignment by human annotators (as in the supervised learning
+  setting).
+
+.. topic:: References
+
+  * E. B. Fowkles and C. L. Mallows, 1983. "A method for comparing two
+    hierarchical clusterings". Journal of the American Statistical Association.
+    http://wildfire.stat.ucla.edu/pdflibrary/fowlkes.pdf
+
+  * `Wikipedia entry for the Fowlkes-Mallows Index
+    <https://en.wikipedia.org/wiki/Fowlkes-Mallows_index>`_
+
 .. _silhouette_coefficient:
 
 Silhouette Coefficient
@@ -1413,3 +1500,73 @@ Drawbacks
 
  * :ref:`example_cluster_plot_kmeans_silhouette_analysis.py` : In this example
    the silhouette analysis is used to choose an optimal value for n_clusters.
+
+.. _calinski_harabaz_index:
+
+Calinski-Harabaz Index
+----------------------
+
+If the ground truth labels are not known, the Calinski-Harabaz index
+(:func:`sklearn.metrics.calinski_harabaz_score`) can be used to evaluate the
+model, where a higher Calinski-Harabaz score relates to a model with better
+defined clusters.
+
+For :math:`k` clusters, the Calinski-Harabaz score :math:`s` is given as the
+ratio of the between-clusters dispersion mean and the within-cluster
+dispersion:
+
+.. math::
+  s(k) = \frac{\mathrm{Tr}(B_k)}{\mathrm{Tr}(W_k)} \times \frac{N - k}{k - 1}
+
+where :math:`B_K` is the between group dispersion matrix and :math:`W_K`
+is the within-cluster dispersion matrix defined by:
+
+.. math:: W_k = \sum_{q=1}^k \sum_{x \in C_q} (x - c_q) (x - c_q)^T
+
+.. math:: B_k = \sum_q n_q (c_q - c) (c_q - c)^T
+
+with :math:`N` be the number of points in our data, :math:`C_q` be the set of
+points in cluster :math:`q`, :math:`c_q` be the center of cluster
+:math:`q`, :math:`c` be the center of :math:`E`, :math:`n_q` be the number of
+points in cluster :math:`q`.
+
+
+  >>> from sklearn import metrics
+  >>> from sklearn.metrics import pairwise_distances
+  >>> from sklearn import datasets
+  >>> dataset = datasets.load_iris()
+  >>> X = dataset.data
+  >>> y = dataset.target
+
+In normal usage, the Calinski-Harabaz index is applied to the results of a
+cluster analysis.
+
+  >>> import numpy as np
+  >>> from sklearn.cluster import KMeans
+  >>> kmeans_model = KMeans(n_clusters=3, random_state=1).fit(X)
+  >>> labels = kmeans_model.labels_
+  >>> metrics.calinski_harabaz_score(X, labels)  # doctest: +ELLIPSIS
+  560.39...
+
+
+Advantages
+~~~~~~~~~~
+
+- The score is higher when clusters are dense and well separated, which relates
+  to a standard concept of a cluster.
+
+- The score is fast to compute
+
+
+Drawbacks
+~~~~~~~~~
+
+- The Calinski-Harabaz index is generally higher for convex clusters than other
+  concepts of clusters, such as density based clusters like those obtained
+  through DBSCAN.
+
+.. topic:: References
+
+ *  Caliński, T., & Harabasz, J. (1974). "A dendrite method for cluster
+    analysis". Communications in Statistics-theory and Methods 3: 1-27.
+    `doi:10.1080/03610926.2011.560741 <http://dx.doi.org/10.1080/03610926.2011.560741>`_.
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -109,6 +109,14 @@ New features
      One can pass method names such as `predict_proba` to be used in the cross
      validation framework instead of the default `predict`. By `Ori Ziv`_ and `Sears Merritt`_.
 
+   - Added :func:`metrics.cluster.fowlkes_mallows_score`, the Fowlkes Mallows
+     Index which measures the similarity of two clusterings of a set of points
+     By `Arnaud Fouchet`_ and `Thierry Guillemot`_.
+
+   - Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski
+     and Harabaz score to evaluate the resulting clustering of a set of points.
+     By `Arnaud Fouchet`_ and `Thierry Guillemot`_.
+
 Enhancements
 ............
 
@@ -4257,3 +4265,5 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 .. _Sears Merritt: https://github.com/merritts
 
 .. _Wenhua Yang: https://github.com/geekoala
+
+.. _Arnaud Fouchet: https://github.com/afouchet
diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
@@ -39,8 +39,10 @@
 from .cluster import homogeneity_score
 from .cluster import mutual_info_score
 from .cluster import normalized_mutual_info_score
+from .cluster import fowlkes_mallows_score
 from .cluster import silhouette_samples
 from .cluster import silhouette_score
+from .cluster import calinski_harabaz_score
 from .cluster import v_measure_score
 
 from .pairwise import euclidean_distances

diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py
@@ -15,14 +15,16 @@
 from .supervised import homogeneity_score
 from .supervised import mutual_info_score
 from .supervised import v_measure_score
+from .supervised import fowlkes_mallows_score
 from .supervised import entropy
 from .unsupervised import silhouette_samples
 from .unsupervised import silhouette_score
+from .unsupervised import calinski_harabaz_score
 from .bicluster import consensus_score
 
 __all__ = ["adjusted_mutual_info_score", "normalized_mutual_info_score",
            "adjusted_rand_score", "completeness_score", "contingency_matrix",
            "expected_mutual_information", "homogeneity_completeness_v_measure",
            "homogeneity_score", "mutual_info_score", "v_measure_score",
-           "entropy", "silhouette_samples", "silhouette_score",
-           "consensus_score"]
+           "fowlkes_mallows_score", "entropy", "silhouette_samples",
+           "silhouette_score", "calinski_harabaz_score", "consensus_score"]