scikit-learn · jnothman · Sep 16, 2018 · Jun 24, 2018 · Jun 25, 2018 · Jun 25, 2018
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -656,7 +656,7 @@ Kernels:
    :template: class.rst
 
    impute.SimpleImputer
-   impute.ChainedImputer
+   impute.IterativeImputer
    impute.MissingIndicator
 
 .. _kernel_approximation_ref:

diff --git a/doc/modules/impute.rst b/doc/modules/impute.rst
@@ -24,7 +24,7 @@ One type of imputation algorithm is univariate, which imputes values in the i-th
 feature dimension using only non-missing values in that feature dimension
 (e.g. :class:`impute.SimpleImputer`). By contrast, multivariate imputation
 algorithms use the entire set of available feature dimensions to estimate the
-missing values (e.g. :class:`impute.ChainedImputer`).
+missing values (e.g. :class:`impute.IterativeImputer`).
 
 
 .. _single_imputer:
@@ -87,37 +87,37 @@ string values or pandas categoricals when using the ``'most_frequent'`` or
      ['a' 'y']
      ['b' 'y']]
 
-.. _chained_imputer:
+.. _iterative_imputer:
 
 
 Multivariate feature imputation
 ===============================
 
-A more sophisticated approach is to use the :class:`ChainedImputer` class, which
-implements the imputation technique from MICE (Multivariate Imputation by
-Chained Equations). MICE models each feature with missing values as a function of
-other features, and uses that estimate for imputation. It does so in a round-robin
-fashion: at each step, a feature column is designated as output `y` and the other
-feature columns are treated as inputs `X`. A regressor is fit on `(X, y)` for known `y`.
-Then, the regressor is used to predict the unknown values of `y`. This is repeated
-for each feature in a chained fashion, and then is done for a number of imputation
-rounds. Here is an example snippet::
+A more sophisticated approach is to use the :class:`IterativeImputer` class,
+which models each feature with missing values as a function of other features,
+and uses that estimate for imputation. It does so in an iterated round-robin
+fashion: at each step, a feature column is designated as output ``y`` and the
+other feature columns are treated as inputs ``X``. A regressor is fit on ``(X,
+y)`` for known ``y``. Then, the regressor is used to predict the missing values
+of ``y``.  This is done for each feature in an iterative fashion, and then is
+repeated for ``n_iter`` imputation rounds. The results of the final imputation
+round are returned.
 
     >>> import numpy as np
-    >>> from sklearn.impute import ChainedImputer
-    >>> imp = ChainedImputer(n_imputations=10, random_state=0)
+    >>> from sklearn.impute import IterativeImputer
+    >>> imp = IterativeImputer(n_iter=10, random_state=0)
     >>> imp.fit([[1, 2], [np.nan, 3], [7, np.nan]])
-    ChainedImputer(imputation_order='ascending', initial_strategy='mean',
-            max_value=None, min_value=None, missing_values=nan, n_burn_in=10,
-            n_imputations=10, n_nearest_features=None, predictor=None,
-            random_state=0, verbose=False)
+    IterativeImputer(imputation_order='ascending', initial_strategy='mean',
+             max_value=None, min_value=None, missing_values=nan, n_iter=10,
+             n_nearest_features=None, predictor=None, random_state=0,
+             sample_posterior=False, verbose=False)
     >>> X_test = [[np.nan, 2], [6, np.nan], [np.nan, 6]]
     >>> print(np.round(imp.transform(X_test)))
     [[ 1.  2.]
-     [ 6.  4.]
-     [13.  6.]]
+     [ 6.  3.]
+     [24.  6.]]
 
-Both :class:`SimpleImputer` and :class:`ChainedImputer` can be used in a Pipeline
+Both :class:`SimpleImputer` and :class:`IterativeImputer` can be used in a Pipeline
 as a way to build a composite estimator that supports imputation.
 See :ref:`sphx_glr_auto_examples_plot_missing_values.py`.
 
@@ -127,21 +127,40 @@ Multiple vs. Single Imputation
 ==============================
 
 In the statistics community, it is common practice to perform multiple imputations,
-generating, for example, 10 separate imputations for a single feature matrix.
-Each of these 10 imputations is then put through the subsequent analysis pipeline
-(e.g. feature engineering, clustering, regression, classification). The 10 final
-analysis results (e.g. held-out validation error) allow the data scientist to
-obtain understanding of the uncertainty inherent in the missing values. The above
-practice is called multiple imputation. As implemented, the :class:`ChainedImputer`
-class generates a single (averaged) imputation for each missing value because this
-is the most common use case for machine learning applications. However, it can also be used
-for multiple imputations by applying it repeatedly to the same dataset with different
-random seeds with the ``n_imputations`` parameter set to 1.
-
-Note that a call to the ``transform`` method of :class:`ChainedImputer` is not
+generating, for example, ``m`` separate imputations for a single feature matrix.
+Each of these ``m`` imputations is then put through the subsequent analysis pipeline
+(e.g. feature engineering, clustering, regression, classification). The ``m`` final
+analysis results (e.g. held-out validation errors) allow the data scientist
+to obtain understanding of how analytic results may differ as a consequence
+of the inherent uncertainty caused by the missing values. The above practice
+is called multiple imputation.
+
+Our implementation of :class:`IterativeImputer` was inspired by the R MICE
+package (Multivariate Imputation by Chained Equations) [1]_, but differs from
+it by returning a single imputation instead of multiple imputations.  However,
+:class:`IterativeImputer` can also be used for multiple imputations by applying
+it repeatedly to the same dataset with different random seeds when
+``sample_posterior=True``. See [2]_, chapter 4 for more discussion on multiple
+vs. single imputations.
+
+It is still an open problem as to how useful single vs. multiple imputation is in
+the context of prediction and classification when the user is not interested in
+measuring uncertainty due to missing values.
+
+Note that a call to the ``transform`` method of :class:`IterativeImputer` is not
 allowed to change the number of samples. Therefore multiple imputations cannot be
 achieved by a single call to ``transform``.
 
+References
+==========
+
+.. [1] Stef van Buuren, Karin Groothuis-Oudshoorn (2011). "mice: Multivariate
+   Imputation by Chained Equations in R". Journal of Statistical Software 45:
+   1-67.
+
+.. [2] Roderick J A Little and Donald B Rubin (1986). "Statistical Analysis
+   with Missing Data". John Wiley & Sons, Inc., New York, NY, USA.
+
 .. _missing_indicator:
 
 Marking imputed values

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
@@ -43,7 +43,7 @@ Support for Python 3.4 and below has been officially dropped.
 :mod:`sklearn.impute`
 .....................
 
-- |MajorFeature| Added :class:`impute.ChainedImputer`, which is a strategy for
+- |MajorFeature| Added :class:`impute.IterativeImputer`, which is a strategy for
   imputing missing values by modeling each feature with missing values as a
   function of other features in a round-robin fashion. :issue:`8478` by
   :user:`Sergey Feldman <sergeyf>`.

diff --git a/examples/plot_missing_values.py b/examples/plot_missing_values.py
@@ -4,11 +4,11 @@
 ====================================================
 
 Missing values can be replaced by the mean, the median or the most frequent
-value using the basic :func:`sklearn.impute.SimpleImputer`.
+value using the basic :class:`sklearn.impute.SimpleImputer`.
 The median is a more robust estimator for data with high magnitude variables
 which could dominate results (otherwise known as a 'long tail').
 
-Another option is the :func:`sklearn.impute.ChainedImputer`. This uses
+Another option is the :class:`sklearn.impute.IterativeImputer`. This uses
 round-robin linear regression, treating every variable as an output in
 turn. The version implemented assumes Gaussian (output) variables. If your
 features are obviously non-Normal, consider transforming them to look more
@@ -26,7 +26,7 @@
 from sklearn.datasets import load_boston
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.pipeline import make_pipeline, make_union
-from sklearn.impute import SimpleImputer, ChainedImputer, MissingIndicator
+from sklearn.impute import SimpleImputer, IterativeImputer, MissingIndicator
 from sklearn.model_selection import cross_val_score
 
 rng = np.random.RandomState(0)
@@ -73,18 +73,18 @@ def get_results(dataset):
                                          scoring='neg_mean_squared_error',
                                          cv=5)
 
-    # Estimate the score after chained imputation of the missing values
+    # Estimate the score after iterative imputation of the missing values
     estimator = make_pipeline(
-        make_union(ChainedImputer(missing_values=0, random_state=0),
+        make_union(IterativeImputer(missing_values=0, random_state=0),
                    MissingIndicator(missing_values=0)),
         RandomForestRegressor(random_state=0, n_estimators=100))
-    chained_impute_scores = cross_val_score(estimator, X_missing, y_missing,
-                                            scoring='neg_mean_squared_error')
+    iterative_impute_scores = cross_val_score(estimator, X_missing, y_missing,
+                                              scoring='neg_mean_squared_error')
 
     return ((full_scores.mean(), full_scores.std()),
             (zero_impute_scores.mean(), zero_impute_scores.std()),
             (mean_impute_scores.mean(), mean_impute_scores.std()),
-            (chained_impute_scores.mean(), chained_impute_scores.std()))
+            (iterative_impute_scores.mean(), iterative_impute_scores.std()))
 
 
 results_diabetes = np.array(get_results(load_diabetes()))
@@ -101,7 +101,7 @@ def get_results(dataset):
 x_labels = ['Full data',
             'Zero imputation',
             'Mean Imputation',
-            'Chained Imputation']
+            'Multivariate Imputation']
 colors = ['r', 'g', 'b', 'orange']
 
 # plot diabetes results