diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py index 791f9167f3333..8abdb89a38da5 100644 --- a/examples/model_selection/plot_roc_crossval.py +++ b/examples/model_selection/plot_roc_crossval.py @@ -3,54 +3,66 @@ Receiver Operating Characteristic (ROC) with cross validation ============================================================= -Example of Receiver Operating Characteristic (ROC) metric to evaluate -classifier output quality using cross-validation. +This example presents how to estimate and visualize the variance of the Receiver +Operating Characteristic (ROC) metric using cross-validation. -ROC curves typically feature true positive rate on the Y axis, and false -positive rate on the X axis. This means that the top left corner of the plot is -the "ideal" point - a false positive rate of zero, and a true positive rate of -one. This is not very realistic, but it does mean that a larger area under the -curve (AUC) is usually better. - -The "steepness" of ROC curves is also important, since it is ideal to maximize -the true positive rate while minimizing the false positive rate. +ROC curves typically feature true positive rate (TPR) on the Y axis, and false +positive rate (FPR) on the X axis. This means that the top left corner of the +plot is the "ideal" point - a FPR of zero, and a TPR of one. This is not very +realistic, but it does mean that a larger Area Under the Curve (AUC) is usually +better. The "steepness" of ROC curves is also important, since it is ideal to +maximize the TPR while minimizing the FPR. This example shows the ROC response of different datasets, created from K-fold cross-validation. Taking all of these curves, it is possible to calculate the -mean area under curve, and see the variance of the curve when the +mean AUC, and see the variance of the curve when the training set is split into different subsets. This roughly shows how the -classifier output is affected by changes in the training data, and how -different the splits generated by K-fold cross-validation are from one another. +classifier output is affected by changes in the training data, and how different +the splits generated by K-fold cross-validation are from one another. .. note:: - See also :func:`sklearn.metrics.roc_auc_score`, - :func:`sklearn.model_selection.cross_val_score`, - :ref:`sphx_glr_auto_examples_model_selection_plot_roc.py`, - + See :ref:`sphx_glr_auto_examples_model_selection_plot_roc.py` for a + complement of the present example explaining the averaging strategies to + generalize the metrics for multiclass classifiers. """ # %% -# Data IO and generation -# ---------------------- -import numpy as np +# Load and prepare data +# ===================== +# +# We import the :ref:`iris_dataset` which contains 3 classes, each one +# corresponding to a type of iris plant. One class is linearly separable from +# the other 2; the latter are **not** linearly separable from each other. +# +# In the following we binarize the dataset by dropping the "virginica" class +# (`class_id=2`). This means that the "versicolor" class (`class_id=1`) is +# regarded as the positive class and "setosa" as the negative class +# (`class_id=0`). -from sklearn import datasets +import numpy as np +from sklearn.datasets import load_iris -# Import some data to play with -iris = datasets.load_iris() -X = iris.data -y = iris.target +iris = load_iris() +target_names = iris.target_names +X, y = iris.data, iris.target X, y = X[y != 2], y[y != 2] n_samples, n_features = X.shape -# Add noisy features +# %% +# We also add noisy features to make the problem harder. random_state = np.random.RandomState(0) -X = np.c_[X, random_state.randn(n_samples, 200 * n_features)] +X = np.concatenate([X, random_state.randn(n_samples, 200 * n_features)], axis=1) # %% # Classification and ROC analysis # ------------------------------- +# +# Here we run a :class:`~sklearn.svm.SVC` classifier with cross-validation and +# plot the ROC curves fold-wise. Notice that the baseline to define the chance +# level (dashed ROC curve) is a classifier that would always predict the most +# frequent class. + import matplotlib.pyplot as plt from sklearn import svm @@ -58,7 +70,6 @@ from sklearn.metrics import RocCurveDisplay from sklearn.model_selection import StratifiedKFold -# Run classifier with cross-validation and plot ROC curves cv = StratifiedKFold(n_splits=6) classifier = svm.SVC(kernel="linear", probability=True, random_state=random_state) @@ -66,14 +77,14 @@ aucs = [] mean_fpr = np.linspace(0, 1, 100) -fig, ax = plt.subplots() -for i, (train, test) in enumerate(cv.split(X, y)): +fig, ax = plt.subplots(figsize=(6, 6)) +for fold, (train, test) in enumerate(cv.split(X, y)): classifier.fit(X[train], y[train]) viz = RocCurveDisplay.from_estimator( classifier, X[test], y[test], - name="ROC fold {}".format(i), + name=f"ROC fold {fold}", alpha=0.3, lw=1, ax=ax, @@ -82,8 +93,7 @@ interp_tpr[0] = 0.0 tprs.append(interp_tpr) aucs.append(viz.roc_auc) - -ax.plot([0, 1], [0, 1], linestyle="--", lw=2, color="r", label="Chance", alpha=0.8) +ax.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)") mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 @@ -113,7 +123,10 @@ ax.set( xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], - title="Receiver operating characteristic example", + xlabel="False Positive Rate", + ylabel="True Positive Rate", + title=f"Mean ROC curve with variability\n(Positive label '{target_names[1]}')", ) +ax.axis("square") ax.legend(loc="lower right") plt.show()