From b8b25886c7b55610d096499dd6529612fa4513fb Mon Sep 17 00:00:00 2001 From: Benjamin Blanc Date: Thu, 4 Oct 2018 16:33:34 +0200 Subject: [PATCH 1/2] DOC Fix bad data visualization Apply missing PCA transformation to the example that doesn't use scaling. --- examples/preprocessing/plot_scaling_importance.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/preprocessing/plot_scaling_importance.py b/examples/preprocessing/plot_scaling_importance.py index 15a134d0fd22e..031cdec9fe7d0 100644 --- a/examples/preprocessing/plot_scaling_importance.py +++ b/examples/preprocessing/plot_scaling_importance.py @@ -93,16 +93,17 @@ print('\nPC 1 without scaling:\n', pca.components_[0]) print('\nPC 1 with scaling:\n', pca_std.components_[0]) -# Scale and use PCA on X_train data for visualization. +# Use PCA without and with scale on X_train data for visualization. +X_train_transformed = pca.transform(X_train) scaler = std_clf.named_steps['standardscaler'] -X_train_std = pca_std.transform(scaler.transform(X_train)) +X_train_std_transformed = pca_std.transform(scaler.transform(X_train)) # visualize standardized vs. untouched dataset with PCA performed fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=FIG_SIZE) for l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')): - ax1.scatter(X_train[y_train == l, 0], X_train[y_train == l, 1], + ax1.scatter(X_train_transformed[y_train == l, 0], X_train_transformed[y_train == l, 1], color=c, label='class %s' % l, alpha=0.5, @@ -110,7 +111,7 @@ ) for l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')): - ax2.scatter(X_train_std[y_train == l, 0], X_train_std[y_train == l, 1], + ax2.scatter(X_train_std_transformed[y_train == l, 0], X_train_std_transformed[y_train == l, 1], color=c, label='class %s' % l, alpha=0.5, From 826c34eaf672976a79c731607cb61e4e233c9f94 Mon Sep 17 00:00:00 2001 From: Benjamin Blanc Date: Thu, 4 Oct 2018 18:14:18 +0200 Subject: [PATCH 2/2] Fix flake8 (E501) --- examples/preprocessing/plot_scaling_importance.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/preprocessing/plot_scaling_importance.py b/examples/preprocessing/plot_scaling_importance.py index 031cdec9fe7d0..7866c511614f9 100644 --- a/examples/preprocessing/plot_scaling_importance.py +++ b/examples/preprocessing/plot_scaling_importance.py @@ -103,7 +103,8 @@ for l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')): - ax1.scatter(X_train_transformed[y_train == l, 0], X_train_transformed[y_train == l, 1], + ax1.scatter(X_train_transformed[y_train == l, 0], + X_train_transformed[y_train == l, 1], color=c, label='class %s' % l, alpha=0.5, @@ -111,7 +112,8 @@ ) for l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')): - ax2.scatter(X_train_std_transformed[y_train == l, 0], X_train_std_transformed[y_train == l, 1], + ax2.scatter(X_train_std_transformed[y_train == l, 0], + X_train_std_transformed[y_train == l, 1], color=c, label='class %s' % l, alpha=0.5,