From d7d92882e927db2521bc850e190f0070164032eb Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Fri, 7 Feb 2020 21:32:54 -0500
Subject: [PATCH 01/35] Add sklearn to docs requirements

---
 doc/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/requirements.txt b/doc/requirements.txt
index 51c56a393a1..8b46ed96701 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -17,6 +17,7 @@ requests
 networkx
 squarify
 scikit-image
+scikit-learn
 sphinx
 sphinx_bootstrap_theme
 recommonmark

From 612c0f676cc19ad4c66265296f8a12f08884ca1a Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Fri, 7 Feb 2020 21:34:44 -0500
Subject: [PATCH 02/35] Create kNN docs draft

---
 doc/python/ml-knn.md | 119 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 doc/python/ml-knn.md

diff --git a/doc/python/ml-knn.md b/doc/python/ml-knn.md
new file mode 100644
index 00000000000..78e04479db1
--- /dev/null
+++ b/doc/python/ml-knn.md
@@ -0,0 +1,119 @@
+## K-Nearest Neighbors (kNN)
+
+How to visualize the K-Nearest Neighbors (kNN) algorithm using scikit-learn.
+
+
+### Binary Probability Estimates with `go.Contour`
+
+```python
+import numpy as np
+from sklearn.datasets import make_moons
+from sklearn.neighbors import KNeighborsClassifier
+import plotly.express as px
+import plotly.graph_objects as go
+
+X, y = make_moons(noise=0.3, random_state=0)
+
+# Create a mesh grid on which we will run our model
+x_min, x_max = X[:, 0].min() - margin, X[:, 0].max() + margin
+y_min, y_max = X[:, 1].min() - margin, X[:, 1].max() + margin
+xrange = np.arange(x_min, x_max, mesh_size)
+yrange = np.arange(y_min, y_max, mesh_size)
+xx, yy = np.meshgrid(xrange, yrange)
+
+# Create classifier, run predictions on grid
+clf = neighbors.KNeighborsClassifier(15, weights='uniform')
+clf.fit(X, y)
+Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
+Z = Z.reshape(xx.shape)
+
+fig = px.scatter(X, x=0, y=1, color=y.astype(str))
+fig.add_trace(
+    go.Contour(
+        x=xrange, 
+        y=yrange, 
+        z=Z, 
+        showscale=False,
+        colorscale=['Blue', 'Red'],
+        opacity=0.4
+    )
+)
+```
+
+### Multi-class classification with `px.data` and `go.Heatmap`
+
+```python
+import numpy as np
+from sklearn.neighbors import KNeighborsClassifier
+import plotly.express as px
+import plotly.graph_objects as go
+
+mesh_size = .02
+margin = 1
+
+df = px.data.iris()
+X = df[['sepal_length', 'sepal_width']]
+y = df.species_id
+
+# Create a mesh grid on which we will run our model
+l_min, l_max = df.sepal_length.min() - margin, df.sepal_length.max() + margin
+w_min, w_max = df.sepal_width.min() - margin, df.sepal_width.max() + margin
+lrange = np.arange(l_min, l_max, mesh_size)
+wrange = np.arange(w_min, w_max, mesh_size)
+ll, ww = np.meshgrid(lrange, wrange)
+
+# Create classifier, run predictions on grid
+clf = KNeighborsClassifier(15, weights='distance')
+clf.fit(X, y)
+Z = clf.predict(np.c_[ll.ravel(), ww.ravel()])
+Z = Z.reshape(ll.shape)
+
+fig = px.scatter(df, x='sepal_length', y='sepal_width', color='species')
+fig.update_traces(marker_size=10, marker_line_width=1)
+fig.add_trace(
+    go.Heatmap(
+        x=lrange, 
+        y=wrange, 
+        z=Z, 
+        showscale=False,
+        colorscale=[[0.0, 'blue'], [0.5, 'red'], [1.0, 'green']],
+        opacity=0.25
+    )
+)
+```
+
+### Visualizing kNN Regression
+
+```python
+from sklearn.neighbors import KNeighborsRegressor
+import plotly.express as px
+import plotly.graph_objects as go
+
+df = px.data.tips()
+X = df.total_bill.values.reshape(-1, 1)
+
+knn_dist = KNeighborsRegressor(10, weights='distance')
+knn_uni = KNeighborsRegressor(10, weights='uniform')
+knn_dist.fit(X, df.tip)
+knn_uni.fit(X, df.tip)
+
+x_range = np.linspace(X.min(), X.max(), 100)
+y_dist = knn_dist.predict(x_range.reshape(-1, 1))
+y_uni = knn_uni.predict(x_range.reshape(-1, 1))
+
+fig = px.scatter(df, x='total_bill', y='tip', color='sex', opacity=0.65)
+fig.add_traces(go.Scatter(x=x_range, y=y_uni, name='Weights: Uniform'))
+fig.add_traces(go.Scatter(x=x_range, y=y_dist, name='Weights: Distance'))
+```
+
+### Reference
+
+Learn more about `px`, `go.Contour`, and `go.Heatmap` here:
+* https://plot.ly/python/plotly-express/
+* https://plot.ly/python/heatmaps/
+* https://plot.ly/python/contour-plots/
+
+This tutorial was inspired by amazing examples from the official scikit-learn docs:
+* https://scikit-learn.org/stable/auto_examples/neighbors/plot_regression.html
+* https://scikit-learn.org/stable/auto_examples/neighbors/plot_classification.html
+* https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html

From 6b3bbb1ebb8f7312cffe58a13c3af72f1df26b82 Mon Sep 17 00:00:00 2001
From: Xing Han <xhlperso@gmail.com>
Date: Sat, 22 Feb 2020 15:52:39 -0500
Subject: [PATCH 03/35] Update based on Emma's suggestions

---
 doc/python/ml-knn.md | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/doc/python/ml-knn.md b/doc/python/ml-knn.md
index 78e04479db1..031097a4404 100644
--- a/doc/python/ml-knn.md
+++ b/doc/python/ml-knn.md
@@ -7,10 +7,13 @@ How to visualize the K-Nearest Neighbors (kNN) algorithm using scikit-learn.
 
 ```python
 import numpy as np
-from sklearn.datasets import make_moons
-from sklearn.neighbors import KNeighborsClassifier
 import plotly.express as px
 import plotly.graph_objects as go
+from sklearn.datasets import make_moons
+from sklearn.neighbors import KNeighborsClassifier
+
+mesh_size = .02
+margin = 1
 
 X, y = make_moons(noise=0.3, random_state=0)
 
@@ -22,12 +25,12 @@ yrange = np.arange(y_min, y_max, mesh_size)
 xx, yy = np.meshgrid(xrange, yrange)
 
 # Create classifier, run predictions on grid
-clf = neighbors.KNeighborsClassifier(15, weights='uniform')
+clf = KNeighborsClassifier(15, weights='uniform')
 clf.fit(X, y)
 Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
 Z = Z.reshape(xx.shape)
 
-fig = px.scatter(X, x=0, y=1, color=y.astype(str))
+fig = px.scatter(X, x=0, y=1, color=y.astype(str), labels={'0':'', '1':''})
 fig.add_trace(
     go.Contour(
         x=xrange, 
@@ -38,15 +41,16 @@ fig.add_trace(
         opacity=0.4
     )
 )
+fig.show()
 ```
 
 ### Multi-class classification with `px.data` and `go.Heatmap`
 
 ```python
 import numpy as np
-from sklearn.neighbors import KNeighborsClassifier
 import plotly.express as px
 import plotly.graph_objects as go
+from sklearn.neighbors import KNeighborsClassifier
 
 mesh_size = .02
 margin = 1
@@ -67,6 +71,8 @@ clf = KNeighborsClassifier(15, weights='distance')
 clf.fit(X, y)
 Z = clf.predict(np.c_[ll.ravel(), ww.ravel()])
 Z = Z.reshape(ll.shape)
+proba = clf.predict_proba(np.c_[ll.ravel(), ww.ravel()])
+proba = proba.reshape(ll.shape + (3,))
 
 fig = px.scatter(df, x='sepal_length', y='sepal_width', color='species')
 fig.update_traces(marker_size=10, marker_line_width=1)
@@ -77,17 +83,27 @@ fig.add_trace(
         z=Z, 
         showscale=False,
         colorscale=[[0.0, 'blue'], [0.5, 'red'], [1.0, 'green']],
-        opacity=0.25
+        opacity=0.25,
+        customdata=proba,
+        hovertemplate=(
+            'sepal length: %{x} <br>'
+            'sepal width: %{y} <br>'
+            'p(setosa): %{customdata[0]:.3f}<br>'
+            'p(versicolor): %{customdata[1]:.3f}<br>'
+            'p(virginica): %{customdata[2]:.3f}<extra></extra>'
+        )
     )
 )
+fig.show()
 ```
 
 ### Visualizing kNN Regression
 
 ```python
-from sklearn.neighbors import KNeighborsRegressor
+import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
+from sklearn.neighbors import KNeighborsRegressor
 
 df = px.data.tips()
 X = df.total_bill.values.reshape(-1, 1)
@@ -104,6 +120,7 @@ y_uni = knn_uni.predict(x_range.reshape(-1, 1))
 fig = px.scatter(df, x='total_bill', y='tip', color='sex', opacity=0.65)
 fig.add_traces(go.Scatter(x=x_range, y=y_uni, name='Weights: Uniform'))
 fig.add_traces(go.Scatter(x=x_range, y=y_dist, name='Weights: Distance'))
+fig.show()
 ```
 
 ### Reference

From fbdd889de59efd6ce682906c435986dd1fda29aa Mon Sep 17 00:00:00 2001
From: Xing Han <xhlperso@gmail.com>
Date: Sat, 22 Feb 2020 16:44:08 -0500
Subject: [PATCH 04/35] Add a header

---
 doc/python/ml-knn.md | 43 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/doc/python/ml-knn.md b/doc/python/ml-knn.md
index 031097a4404..7e265ee8485 100644
--- a/doc/python/ml-knn.md
+++ b/doc/python/ml-knn.md
@@ -1,6 +1,43 @@
-## K-Nearest Neighbors (kNN)
-
-How to visualize the K-Nearest Neighbors (kNN) algorithm using scikit-learn.
+---
+jupyter:
+  jupytext:
+    notebook_metadata_filter: all
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.1'
+      jupytext_version: 1.1.1
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+  language_info:
+    codemirror_mode:
+      name: ipython
+      version: 3
+    file_extension: .py
+    mimetype: text/x-python
+    name: python
+    nbconvert_exporter: python
+    pygments_lexer: ipython3
+    version: 3.6.10
+  plotly:
+    description: How to visualize k-Nearest Neighbors (kNN) created using scikit-learn
+      in Python with Plotly.
+    display_as: basic
+    language: python
+    layout: base
+    name: k-Nearest Neighbors
+    order: 1
+    page_type: example_index
+    permalink: python/knn/
+    redirect_from: python/machine-learning-tutorials/
+    thumbnail: thumbnail/line-and-scatter.jpg
+---
+
+## K-Nearest Neighbors (kNN) Classification
+
+How to visualize K-Nearest Neighbors (kNN) classification using scikit-learn.
 
 
 ### Binary Probability Estimates with `go.Contour`

From b1d7fefce7ee603a1a4d3b14469633f89b975570 Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Sun, 23 Feb 2020 01:26:10 -0500
Subject: [PATCH 05/35] Placeholder Regression Section

---
 doc/python/ml-regression.md | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 doc/python/ml-regression.md

diff --git a/doc/python/ml-regression.md b/doc/python/ml-regression.md
new file mode 100644
index 00000000000..e2b0d37724d
--- /dev/null
+++ b/doc/python/ml-regression.md
@@ -0,0 +1,36 @@
+# Regression
+
+
+### Visualizing kNN Regression
+
+```python
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.neighbors import KNeighborsRegressor
+
+df = px.data.tips()
+X = df.total_bill.values.reshape(-1, 1)
+
+knn_dist = KNeighborsRegressor(10, weights='distance')
+knn_uni = KNeighborsRegressor(10, weights='uniform')
+knn_dist.fit(X, df.tip)
+knn_uni.fit(X, df.tip)
+
+x_range = np.linspace(X.min(), X.max(), 100)
+y_dist = knn_dist.predict(x_range.reshape(-1, 1))
+y_uni = knn_uni.predict(x_range.reshape(-1, 1))
+
+fig = px.scatter(df, x='total_bill', y='tip', color='sex', opacity=0.65)
+fig.add_traces(go.Scatter(x=x_range, y=y_uni, name='Weights: Uniform'))
+fig.add_traces(go.Scatter(x=x_range, y=y_dist, name='Weights: Distance'))
+fig.show()
+```
+
+### Reference
+
+Learn more about `px` here:
+* https://plot.ly/python/plotly-express/
+
+This tutorial was inspired by amazing examples from the official scikit-learn docs:
+* https://scikit-learn.org/stable/auto_examples/neighbors/plot_regression.html

From eafaf2880f9a0c81762493a4384a1a12a9896b5b Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Sun, 23 Feb 2020 01:26:27 -0500
Subject: [PATCH 06/35] Create 2 basic sections, 2 advanced sections

---
 doc/python/ml-knn.md | 124 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 102 insertions(+), 22 deletions(-)

diff --git a/doc/python/ml-knn.md b/doc/python/ml-knn.md
index 7e265ee8485..27ef20c7388 100644
--- a/doc/python/ml-knn.md
+++ b/doc/python/ml-knn.md
@@ -1,6 +1,7 @@
 ---
 jupyter:
   jupytext:
+    formats: ipynb,md
     notebook_metadata_filter: all
     text_representation:
       extension: .md
@@ -20,14 +21,14 @@ jupyter:
     name: python
     nbconvert_exporter: python
     pygments_lexer: ipython3
-    version: 3.6.10
+    version: 3.7.6
   plotly:
     description: How to visualize k-Nearest Neighbors (kNN) created using scikit-learn
       in Python with Plotly.
     display_as: basic
     language: python
     layout: base
-    name: k-Nearest Neighbors
+    name: K-Nearest Neighbors (kNN) Classification
     order: 1
     page_type: example_index
     permalink: python/knn/
@@ -35,12 +36,49 @@ jupyter:
     thumbnail: thumbnail/line-and-scatter.jpg
 ---
 
-## K-Nearest Neighbors (kNN) Classification
+## Basic Binary Classification with `plotly.express`
 
-How to visualize K-Nearest Neighbors (kNN) classification using scikit-learn.
+```python
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.datasets import make_moons
+from sklearn.neighbors import KNeighborsClassifier
+
+X, y = make_moons(noise=0.3, random_state=0)
+X_test, _ = make_moons(noise=0.3, random_state=1)
+
+clf = KNeighborsClassifier(15)
+clf.fit(X, y.astype(str))  # Fit on training set
+y_pred = clf.predict(X_test)  # Predict on new data
+
+fig = px.scatter(x=X_test[:, 0], y=X_test[:, 1], color=y_pred, labels={'color': 'predicted'})
+fig.update_traces(marker_size=10)
+fig.show()
+```
 
+## Visualize Binary Prediction Scores
+
+```python
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.datasets import make_classification
+from sklearn.neighbors import KNeighborsClassifier
+
+X, y = make_classification(n_features=2, n_redundant=0, random_state=0)
+X_test, _ = make_classification(n_features=2, n_redundant=0, random_state=1)
+
+clf = KNeighborsClassifier(15)
+clf.fit(X, y)  # Fit on training set
+y_score = clf.predict_proba(X_test)[:, 1]  # Predict on new data
+
+fig = px.scatter(x=X_test[:, 0], y=X_test[:, 1], color=y_score, labels={'color': 'score'})
+fig.update_traces(marker_size=10)
+fig.show()
+```
 
-### Binary Probability Estimates with `go.Contour`
+## Probability Estimates with `go.Contour`
 
 ```python
 import numpy as np
@@ -68,6 +106,7 @@ Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
 Z = Z.reshape(xx.shape)
 
 fig = px.scatter(X, x=0, y=1, color=y.astype(str), labels={'0':'', '1':''})
+fig.update_traces(marker_size=10, marker_line_width=1)
 fig.add_trace(
     go.Contour(
         x=xrange, 
@@ -75,13 +114,14 @@ fig.add_trace(
         z=Z, 
         showscale=False,
         colorscale=['Blue', 'Red'],
-        opacity=0.4
+        opacity=0.4,
+        name='Confidence'
     )
 )
 fig.show()
 ```
 
-### Multi-class classification with `px.data` and `go.Heatmap`
+## Multi-class prediction confidence with `go.Heatmap`
 
 ```python
 import numpy as np
@@ -92,6 +132,7 @@ from sklearn.neighbors import KNeighborsClassifier
 mesh_size = .02
 margin = 1
 
+# We will use the iris data, which is included in px
 df = px.data.iris()
 X = df[['sepal_length', 'sepal_width']]
 y = df.species_id
@@ -134,29 +175,66 @@ fig.add_trace(
 fig.show()
 ```
 
-### Visualizing kNN Regression
+## 3D Classification with `px.scatter_3d`
+
+```python
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.model_selection import train_test_split
+
+df = px.data.iris()
+features = ["sepal_width", "sepal_length", "petal_width"]
+
+X = df[features]
+y = df.species
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
+
+# Create classifier, run predictions on grid
+clf = KNeighborsClassifier(15, weights='distance')
+clf.fit(X_train, y_train)
+y_pred = clf.predict(X_test)
+y_score = clf.predict_proba(X_test)
+y_score = np.around(y_score.max(axis=1), 4)
+
+fig = px.scatter_3d(
+    X_test, 
+    x='sepal_length', 
+    y='sepal_width', 
+    z='petal_width', 
+    symbol=y_pred,
+    color=y_score,
+    labels={'symbol': 'prediction', 'color': 'score'}
+)
+fig.update_layout(legend=dict(x=0, y=0))
+fig.show()
+```
+
+## High Dimension Visualization with `px.scatter_matrix`
+
+If you need to visualize classifications that go beyond 3D, you can use the [scatter plot matrix](https://plot.ly/python/splom/).
 
 ```python
 import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
-from sklearn.neighbors import KNeighborsRegressor
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.model_selection import train_test_split
 
-df = px.data.tips()
-X = df.total_bill.values.reshape(-1, 1)
+df = px.data.iris()
+features = ["sepal_width", "sepal_length", "petal_width", "petal_length"]
 
-knn_dist = KNeighborsRegressor(10, weights='distance')
-knn_uni = KNeighborsRegressor(10, weights='uniform')
-knn_dist.fit(X, df.tip)
-knn_uni.fit(X, df.tip)
+X = df[features]
+y = df.species
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
 
-x_range = np.linspace(X.min(), X.max(), 100)
-y_dist = knn_dist.predict(x_range.reshape(-1, 1))
-y_uni = knn_uni.predict(x_range.reshape(-1, 1))
+# Create classifier, run predictions on grid
+clf = KNeighborsClassifier(15, weights='distance')
+clf.fit(X_train, y_train)
+y_pred = clf.predict(X_test)
 
-fig = px.scatter(df, x='total_bill', y='tip', color='sex', opacity=0.65)
-fig.add_traces(go.Scatter(x=x_range, y=y_uni, name='Weights: Uniform'))
-fig.add_traces(go.Scatter(x=x_range, y=y_dist, name='Weights: Distance'))
+fig = px.scatter_matrix(X_test, dimensions=features, color=y_pred, labels={'color': 'prediction'})
 fig.show()
 ```
 
@@ -166,8 +244,10 @@ Learn more about `px`, `go.Contour`, and `go.Heatmap` here:
 * https://plot.ly/python/plotly-express/
 * https://plot.ly/python/heatmaps/
 * https://plot.ly/python/contour-plots/
+* https://plot.ly/python/3d-scatter-plots/
+* https://plot.ly/python/splom/
 
 This tutorial was inspired by amazing examples from the official scikit-learn docs:
-* https://scikit-learn.org/stable/auto_examples/neighbors/plot_regression.html
 * https://scikit-learn.org/stable/auto_examples/neighbors/plot_classification.html
 * https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html
+* https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html

From 08aa89b04ed846741d5914444778bb378ff6db6f Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Fri, 28 Feb 2020 12:40:16 -0500
Subject: [PATCH 07/35] KNN ML docs: Update thumbnail, name, permalink,
 description, display_as

---
 doc/python/ml-knn.md | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/doc/python/ml-knn.md b/doc/python/ml-knn.md
index 27ef20c7388..2bcab469875 100644
--- a/doc/python/ml-knn.md
+++ b/doc/python/ml-knn.md
@@ -1,7 +1,6 @@
 ---
 jupyter:
   jupytext:
-    formats: ipynb,md
     notebook_metadata_filter: all
     text_representation:
       extension: .md
@@ -23,17 +22,16 @@ jupyter:
     pygments_lexer: ipython3
     version: 3.7.6
   plotly:
-    description: How to visualize k-Nearest Neighbors (kNN) created using scikit-learn
-      in Python with Plotly.
-    display_as: basic
+    description: Visualize scikit-learn's k-Nearest Neighbors (kNN) classification
+      with Plotly
+    display_as: ai_ml
     language: python
     layout: base
-    name: K-Nearest Neighbors (kNN) Classification
+    name: kNN Classification
     order: 1
     page_type: example_index
-    permalink: python/knn/
-    redirect_from: python/machine-learning-tutorials/
-    thumbnail: thumbnail/line-and-scatter.jpg
+    permalink: python/knn-classification/
+    thumbnail: thumbnail/knn-classification.png
 ---
 
 ## Basic Binary Classification with `plotly.express`
@@ -152,7 +150,7 @@ Z = Z.reshape(ll.shape)
 proba = clf.predict_proba(np.c_[ll.ravel(), ww.ravel()])
 proba = proba.reshape(ll.shape + (3,))
 
-fig = px.scatter(df, x='sepal_length', y='sepal_width', color='species')
+fig = px.scatter(df, x='sepal_length', y='sepal_width', color='species', width=1000, height=1000)
 fig.update_traces(marker_size=10, marker_line_width=1)
 fig.add_trace(
     go.Heatmap(

From be71cfe5d79c99a3fc99e721d771140231946874 Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Fri, 28 Feb 2020 16:55:42 -0500
Subject: [PATCH 08/35] Added 3 sections, drafted out 2 sections

---
 doc/python/ml-regression.md | 157 +++++++++++++++++++++++++++++++++++-
 1 file changed, 155 insertions(+), 2 deletions(-)

diff --git a/doc/python/ml-regression.md b/doc/python/ml-regression.md
index e2b0d37724d..3e34f73de3f 100644
--- a/doc/python/ml-regression.md
+++ b/doc/python/ml-regression.md
@@ -1,7 +1,91 @@
-# Regression
+---
+jupyter:
+  jupytext:
+    notebook_metadata_filter: all
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.1'
+      jupytext_version: 1.1.1
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+  language_info:
+    codemirror_mode:
+      name: ipython
+      version: 3
+    file_extension: .py
+    mimetype: text/x-python
+    name: python
+    nbconvert_exporter: python
+    pygments_lexer: ipython3
+    version: 3.7.6
+  plotly:
+    description: Visualize regression in scikit-learn with Plotly
+    display_as: ai_ml
+    language: python
+    layout: base
+    name: ML Regression
+    order: 2
+    page_type: example_index
+    permalink: python/ml-regression/
+    thumbnail: thumbnail/knn-classification.png
+---
 
+## Basic linear regression
 
-### Visualizing kNN Regression
+This example shows how to train a simple linear regression from `sklearn` to predicts the tips servers will receive based on the value of the total bill (dataset is included in `px.data`).
+
+```python
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.linear_model import LinearRegression
+
+df = px.data.tips()
+X = df.total_bill.values.reshape(-1, 1)
+
+model = LinearRegression()
+model.fit(X, df.tip)
+
+x_range = np.linspace(X.min(), X.max(), 100)
+y_range = model.predict(x_range.reshape(-1, 1))
+
+fig = px.scatter(df, x='total_bill', y='tip', opacity=0.65)
+fig.add_traces(go.Scatter(x=x_range, y=y_range, name='Regression Fit'))
+fig.show()
+```
+
+## Model generalization on unseen data
+
+```python
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import train_test_split
+
+df = px.data.tips()
+X = df.total_bill.values.reshape(-1, 1)
+X_train, X_test, y_train, y_test = train_test_split(X, df.tip, random_state=0)
+
+model = LinearRegression()
+model.fit(X_train, y_train)
+
+x_range = np.linspace(X.min(), X.max(), 100)
+y_range = model.predict(x_range.reshape(-1, 1))
+
+
+fig = go.Figure([
+    go.Scatter(x=X_train.squeeze(), y=y_train, name='train', mode='markers'),
+    go.Scatter(x=X_test.squeeze(), y=y_test, name='test', mode='markers'),
+    go.Scatter(x=x_range, y=y_range, name='prediction')
+])
+fig.show()
+```
+
+## Comparing different kNN models parameters
 
 ```python
 import numpy as np
@@ -27,6 +111,75 @@ fig.add_traces(go.Scatter(x=x_range, y=y_dist, name='Weights: Distance'))
 fig.show()
 ```
 
+## 3D regression surface with `px.scatter_3d` and `go.Surface`
+
+```python
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.neighbors import KNeighborsRegressor
+
+mesh_size = .02
+margin = 0
+
+df = px.data.iris()
+features = ["sepal_width", "sepal_length", "petal_width"]
+
+X = df[['sepal_width', 'sepal_length']]
+y = df['petal_width']
+
+# Condition the model on sepal width and length, predict the petal width
+knn = KNeighborsRegressor(10, weights='distance')
+knn.fit(X, y)
+
+# Create a mesh grid on which we will run our model
+x_min, x_max = X.sepal_width.min() - margin, X.sepal_width.max() + margin
+y_min, y_max = X.sepal_length.min() - margin, X.sepal_length.max() + margin
+xrange = np.arange(x_min, x_max, mesh_size)
+yrange = np.arange(y_min, y_max, mesh_size)
+xx, yy = np.meshgrid(xrange, yrange)
+
+# Run kNN
+pred = knn.predict(np.c_[xx.ravel(), yy.ravel()])
+pred = pred.reshape(xx.shape)
+
+# Generate the plot
+fig = px.scatter_3d(df, x='sepal_width', y='sepal_length', z='petal_width')
+fig.update_traces(marker=dict(size=5))
+fig.add_traces(go.Surface(x=xrange, y=yrange, z=pred, name='pred_surface'))
+fig.show()
+```
+
+## Label polynomial fits with latex
+
+```python
+
+```
+
+## Prediction Error Plots
+
+
+### Simple Prediction Error
+
+```python
+
+```
+
+### Augmented Prediction Error plot using `px`
+
+```python
+
+```
+
+### Grid Search Visualization using `px.scatter_matrix`
+
+
+## Residual Plots
+
+```python
+
+```
+
 ### Reference
 
 Learn more about `px` here:

From 61b3ad8e4d73ff6dd4d529c2499e7621719fec7d Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Mon, 2 Mar 2020 15:39:38 -0500
Subject: [PATCH 09/35] ML Docs: Added 3 new sections to regression notebook

---
 doc/python/ml-regression.md | 206 ++++++++++++++++++++++++++++++++++--
 1 file changed, 199 insertions(+), 7 deletions(-)

diff --git a/doc/python/ml-regression.md b/doc/python/ml-regression.md
index 3e34f73de3f..2e0087982bd 100644
--- a/doc/python/ml-regression.md
+++ b/doc/python/ml-regression.md
@@ -33,9 +33,28 @@ jupyter:
     thumbnail: thumbnail/knn-classification.png
 ---
 
-## Basic linear regression
+## Basic linear regression plots
 
-This example shows how to train a simple linear regression from `sklearn` to predicts the tips servers will receive based on the value of the total bill (dataset is included in `px.data`).
+
+### Ordinary Least Square (OLS) with `plotly.express`
+
+
+This example shows how to use `plotly.express` to train a simply Ordinary Least Square (OLS) that can predict the tips servers will receive based on the value of the total bill.
+
+```python
+import plotly.express as px
+
+df = px.data.tips()
+fig = px.scatter(
+    df, x='total_bill', y='tip', opacity=0.65,
+    trendline='ols', trendline_color_override='red'
+)
+fig.show()
+```
+
+### Linear Regression with scikit-learn
+
+You can also perform the same prediction using scikit-learn's `LinearRegression`.
 
 ```python
 import numpy as np
@@ -123,7 +142,6 @@ mesh_size = .02
 margin = 0
 
 df = px.data.iris()
-features = ["sepal_width", "sepal_length", "petal_width"]
 
 X = df[['sepal_width', 'sepal_length']]
 y = df['petal_width']
@@ -150,10 +168,46 @@ fig.add_traces(go.Surface(x=xrange, y=yrange, z=pred, name='pred_surface'))
 fig.show()
 ```
 
-## Label polynomial fits with latex
+## Displaying `PolynomialFeatures` using $\LaTeX$
+
+It's easy to diplay latex equations in legend and titles by simply adding `$` before and after your equation.
 
 ```python
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.linear_model import LinearRegression
+from sklearn.preprocessing import PolynomialFeatures
+
+def format_coefs(coefs):
+    equation_list = [f"{coef}x^{i}" for i, coef in enumerate(coefs)]
+    equation = "$" +  " + ".join(equation_list) + "$"
+    
+    replace_map = {"x^0": "", "x^1": "x", '+ -': '- '}
+    for old, new in replace_map.items():
+        equation = equation.replace(old, new)
+        
+    return equation
 
+df = px.data.tips()
+X = df.total_bill.values.reshape(-1, 1)
+x_range = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
+
+fig = px.scatter(df, x='total_bill', y='tip', opacity=0.65)
+for n_features in [1, 2, 3, 4]:
+    poly = PolynomialFeatures(n_features)
+    poly.fit(X)
+    X_poly = poly.transform(X)
+    x_range_poly = poly.transform(x_range)
+
+    model = LinearRegression(fit_intercept=False)
+    model.fit(X_poly, df.tip)
+    y_poly = model.predict(x_range_poly)
+    
+    equation = format_coefs(model.coef_.round(2))
+    fig.add_traces(go.Scatter(x=x_range.squeeze(), y=y_poly, name=equation))
+
+fig.show()
 ```
 
 ## Prediction Error Plots
@@ -162,22 +216,160 @@ fig.show()
 ### Simple Prediction Error
 
 ```python
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.linear_model import LinearRegression
 
+df = px.data.iris()
+X = df.loc[train_idx, ['sepal_width', 'sepal_length']]
+y = df.loc[train_idx, 'petal_width']
+
+# Condition the model on sepal width and length, predict the petal width
+model = LinearRegression()
+model.fit(X, y)
+y_pred = model.predict(X)
+
+fig = px.scatter(x=y, y=y_pred, labels={'x': 'y true', 'y': 'y pred'})
+fig.add_shape(
+    type="line", line=dict(dash='dash'),
+    x0=y.min(), y0=y.min(), 
+    x1=y.max(), y1=y.max()
+)
+fig.show()
 ```
 
-### Augmented Prediction Error plot using `px`
+### Augmented Prediction Error analysis using `plotly.express`
 
 ```python
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import train_test_split
 
-```
+df = px.data.iris()
 
-### Grid Search Visualization using `px.scatter_matrix`
+# Split data into training and test splits
+train_idx, test_idx = train_test_split(df.index, test_size=.25, random_state=0)
+df['split'] = 'train'
+df.loc[test_idx, 'split'] = 'test'
 
+X = df[['sepal_width', 'sepal_length']]
+X_train = df.loc[train_idx, ['sepal_width', 'sepal_length']]
+y_train = df.loc[train_idx, 'petal_width']
+
+# Condition the model on sepal width and length, predict the petal width
+model = LinearRegression()
+model.fit(X_train, y_train)
+df['prediction'] = model.predict(X)
+
+fig = px.scatter(
+    df, x='petal_width', y='prediction',
+    marginal_x='histogram', marginal_y='histogram',
+    color='split', trendline='ols'
+)
+fig.add_shape(
+    type="line", line=dict(dash='dash'),
+    x0=y.min(), y0=y.min(), 
+    x1=y.max(), y1=y.max()
+)
+
+fig.show()
+```
 
 ## Residual Plots
 
+Just like prediction error plots, it's easy to visualize your prediction residuals in just a few lines of codes using `plotly.express` built-in capabilities.
+
+```python
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import train_test_split
+
+df = px.data.iris()
+
+# Split data into training and test splits
+train_idx, test_idx = train_test_split(df.index, test_size=.25, random_state=0)
+df['split'] = 'train'
+df.loc[test_idx, 'split'] = 'test'
+
+X = df[['sepal_width', 'sepal_length']]
+X_train = df.loc[train_idx, ['sepal_width', 'sepal_length']]
+y_train = df.loc[train_idx, 'petal_width']
+
+# Condition the model on sepal width and length, predict the petal width
+model = LinearRegression()
+model.fit(X_train, y_train)
+df['prediction'] = model.predict(X)
+df['residual'] = df['prediction'] - df['petal_width']
+
+fig = px.scatter(
+    df, x='prediction', y='residual',
+    marginal_y='violin',
+    color='split', trendline='ols'
+)
+fig.show()
+```
+
+## Grid Search Visualization using `px` facets
+
 ```python
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.model_selection import GridSearchCV
+from sklearn.tree import DecisionTreeRegressor
 
+N_FOLD = 5
+
+df = px.data.iris()
+X = df.loc[train_idx, ['sepal_width', 'sepal_length']]
+y = df.loc[train_idx, 'petal_width']
+
+model = DecisionTreeRegressor()
+param_grid = {
+    'criterion': ['mse', 'friedman_mse', 'mae'], 
+    'max_depth': range(2, 5)
+}
+grid = GridSearchCV(model, param_grid, cv=N_FOLD)
+
+grid.fit(X, y)
+grid_df = pd.DataFrame(grid.cv_results_)
+
+# Convert the wide format of the grid into the long format 
+# accepted by plotly.express
+melted = (
+    grid_df
+    .rename(columns=lambda col: col.replace('param_', ''))
+    .melt(
+        value_vars=[f'split{i}_test_score' for i in range(N_FOLD)],
+        id_vars=['rank_test_score', 'mean_test_score', 
+                 'mean_fit_time', 'criterion', 'max_depth']
+    )
+)
+
+# Convert R-Squared measure to %
+melted[['value', 'mean_test_score']] *= 100
+
+# Format the variable names for simplicity
+melted['variable'] = (
+    melted['variable']
+    .str.replace('_test_score', '')
+    .str.replace('split', '')
+)
+
+px.bar(
+    melted, x='variable', y='value', 
+    color='mean_test_score', 
+    facet_row='max_depth', 
+    facet_col='criterion',
+    title='Test Scores of Grid Search',
+    hover_data=['mean_fit_time', 'rank_test_score'],
+    labels={'variable': 'cv_split', 
+            'value': 'r_squared', 
+            'mean_test_score': "mean_r_squared"}
+)
 ```
 
 ### Reference

From 86e987b380a3c8951ee28ed7dd33b26db307366e Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Mon, 2 Mar 2020 16:48:38 -0500
Subject: [PATCH 10/35] ML Docs: Updated last ML regression section for clarity

---
 doc/python/ml-regression.md | 70 +++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 27 deletions(-)

diff --git a/doc/python/ml-regression.md b/doc/python/ml-regression.md
index 2e0087982bd..3c9a2326188 100644
--- a/doc/python/ml-regression.md
+++ b/doc/python/ml-regression.md
@@ -213,7 +213,7 @@ fig.show()
 ## Prediction Error Plots
 
 
-### Simple Prediction Error
+### Simple actual vs predicted plot
 
 ```python
 import plotly.express as px
@@ -221,8 +221,8 @@ import plotly.graph_objects as go
 from sklearn.linear_model import LinearRegression
 
 df = px.data.iris()
-X = df.loc[train_idx, ['sepal_width', 'sepal_length']]
-y = df.loc[train_idx, 'petal_width']
+X = df[['sepal_width', 'sepal_length']]
+y = df['petal_width']
 
 # Condition the model on sepal width and length, predict the petal width
 model = LinearRegression()
@@ -238,7 +238,7 @@ fig.add_shape(
 fig.show()
 ```
 
-### Augmented Prediction Error analysis using `plotly.express`
+### Augmented prediction error analysis using `plotly.express`
 
 ```python
 import plotly.express as px
@@ -276,7 +276,7 @@ fig.add_shape(
 fig.show()
 ```
 
-## Residual Plots
+## Residual plots
 
 Just like prediction error plots, it's easy to visualize your prediction residuals in just a few lines of codes using `plotly.express` built-in capabilities.
 
@@ -312,28 +312,34 @@ fig = px.scatter(
 fig.show()
 ```
 
-## Grid Search Visualization using `px` facets
+## Grid search visualization using `px.density_heatmap` and `px.box`
+
+In this example, we show how to visualize the results of a grid search on a `DecisionTreeRegressor`. The first plot shows how to visualize the score of each model parameter on individual splits (grouped using facets). The second plot aggregates the results of all splits such that each box represents a single model.
 
 ```python
+import numpy as np
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 from sklearn.model_selection import GridSearchCV
 from sklearn.tree import DecisionTreeRegressor
 
-N_FOLD = 5
+N_FOLD = 6
 
+# Load and shuffle dataframe
 df = px.data.iris()
-X = df.loc[train_idx, ['sepal_width', 'sepal_length']]
-y = df.loc[train_idx, 'petal_width']
+df = df.sample(frac=1, random_state=0)
+
+X = df[['sepal_width', 'sepal_length']]
+y = df['petal_width']
 
+# Define and fit the grid
 model = DecisionTreeRegressor()
 param_grid = {
     'criterion': ['mse', 'friedman_mse', 'mae'], 
     'max_depth': range(2, 5)
 }
 grid = GridSearchCV(model, param_grid, cv=N_FOLD)
-
 grid.fit(X, y)
 grid_df = pd.DataFrame(grid.cv_results_)
 
@@ -344,32 +350,42 @@ melted = (
     .rename(columns=lambda col: col.replace('param_', ''))
     .melt(
         value_vars=[f'split{i}_test_score' for i in range(N_FOLD)],
-        id_vars=['rank_test_score', 'mean_test_score', 
-                 'mean_fit_time', 'criterion', 'max_depth']
+        id_vars=['mean_test_score', 'mean_fit_time', 'criterion', 'max_depth'],
+        var_name="cv_split",
+        value_name="r_squared"
     )
 )
 
-# Convert R-Squared measure to %
-melted[['value', 'mean_test_score']] *= 100
-
 # Format the variable names for simplicity
-melted['variable'] = (
-    melted['variable']
+melted['cv_split'] = (
+    melted['cv_split']
     .str.replace('_test_score', '')
     .str.replace('split', '')
 )
 
-px.bar(
-    melted, x='variable', y='value', 
-    color='mean_test_score', 
-    facet_row='max_depth', 
-    facet_col='criterion',
-    title='Test Scores of Grid Search',
-    hover_data=['mean_fit_time', 'rank_test_score'],
-    labels={'variable': 'cv_split', 
-            'value': 'r_squared', 
-            'mean_test_score': "mean_r_squared"}
+# Single function call to plot each figure
+fig_hmap = px.density_heatmap(
+    melted, x="max_depth", y='criterion', 
+    histfunc="sum", z="r_squared",
+    title='Grid search results on individual fold',
+    hover_data=['mean_fit_time'],
+    facet_col="cv_split", facet_col_wrap=3,
+    labels={'mean_test_score': "mean_r_squared"}
 )
+
+fig_box = px.box(
+    melted, x='max_depth', y='r_squared', 
+    title='Grid search results ',
+    hover_data=['mean_fit_time'],
+    points='all',
+    color="criterion",
+    hover_name='cv_split',
+    labels={'mean_test_score': "mean_r_squared"}
+)
+
+# Display
+fig_hmap.show()
+fig_box.show()
 ```
 
 ### Reference

From 1e4a00805aeaa6b3dfcfa61312a9e8783edb072f Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Mon, 2 Mar 2020 17:14:44 -0500
Subject: [PATCH 11/35] ML Docs: Added annotations after each section of
 regression notebook

---
 doc/python/ml-regression.md | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/doc/python/ml-regression.md b/doc/python/ml-regression.md
index 3c9a2326188..6414dbf43a9 100644
--- a/doc/python/ml-regression.md
+++ b/doc/python/ml-regression.md
@@ -78,6 +78,8 @@ fig.show()
 
 ## Model generalization on unseen data
 
+Easily color your plot based on a predefined data split.
+
 ```python
 import numpy as np
 import plotly.express as px
@@ -106,6 +108,8 @@ fig.show()
 
 ## Comparing different kNN models parameters
 
+Compare the performance of two different models on the same dataset. This can be easily combined with discrete color legends from `px`.
+
 ```python
 import numpy as np
 import plotly.express as px
@@ -114,14 +118,16 @@ from sklearn.neighbors import KNeighborsRegressor
 
 df = px.data.tips()
 X = df.total_bill.values.reshape(-1, 1)
+x_range = np.linspace(X.min(), X.max(), 100)
 
+# Model #1
 knn_dist = KNeighborsRegressor(10, weights='distance')
-knn_uni = KNeighborsRegressor(10, weights='uniform')
 knn_dist.fit(X, df.tip)
-knn_uni.fit(X, df.tip)
-
-x_range = np.linspace(X.min(), X.max(), 100)
 y_dist = knn_dist.predict(x_range.reshape(-1, 1))
+
+# Model #2
+knn_uni = KNeighborsRegressor(10, weights='uniform')
+knn_uni.fit(X, df.tip)
 y_uni = knn_uni.predict(x_range.reshape(-1, 1))
 
 fig = px.scatter(df, x='total_bill', y='tip', color='sex', opacity=0.65)
@@ -132,6 +138,8 @@ fig.show()
 
 ## 3D regression surface with `px.scatter_3d` and `go.Surface`
 
+Visualize the decision plane of your model whenever you have more than one variable in your `X`.
+
 ```python
 import numpy as np
 import plotly.express as px
@@ -229,7 +237,7 @@ model = LinearRegression()
 model.fit(X, y)
 y_pred = model.predict(X)
 
-fig = px.scatter(x=y, y=y_pred, labels={'x': 'y true', 'y': 'y pred'})
+fig = px.scatter(x=y_pred, y=y, labels={'x': 'prediction', 'y': 'actual'})
 fig.add_shape(
     type="line", line=dict(dash='dash'),
     x0=y.min(), y0=y.min(), 
@@ -238,7 +246,9 @@ fig.add_shape(
 fig.show()
 ```
 
-### Augmented prediction error analysis using `plotly.express`
+### Enhanced prediction error analysis using `plotly.express`
+
+Add marginal histograms to quickly diagnoses any prediction bias your model might have. The built-in `OLS` functionality let you visualize how well your model generalizes by comparing it with the theoretical optimal fit (black dotted line).
 
 ```python
 import plotly.express as px
@@ -254,6 +264,7 @@ df['split'] = 'train'
 df.loc[test_idx, 'split'] = 'test'
 
 X = df[['sepal_width', 'sepal_length']]
+y = df['petal_width']
 X_train = df.loc[train_idx, ['sepal_width', 'sepal_length']]
 y_train = df.loc[train_idx, 'petal_width']
 
@@ -263,7 +274,7 @@ model.fit(X_train, y_train)
 df['prediction'] = model.predict(X)
 
 fig = px.scatter(
-    df, x='petal_width', y='prediction',
+    df, x='prediction', y='petal_width',
     marginal_x='histogram', marginal_y='histogram',
     color='split', trendline='ols'
 )

From 1de7a14986e4e53fc29307caf9d577c08ac49ac2 Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Mon, 2 Mar 2020 17:31:59 -0500
Subject: [PATCH 12/35] ML Docs: updated ml regression header

---
 doc/python/ml-regression.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/python/ml-regression.md b/doc/python/ml-regression.md
index 6414dbf43a9..968858ec64b 100644
--- a/doc/python/ml-regression.md
+++ b/doc/python/ml-regression.md
@@ -22,7 +22,7 @@ jupyter:
     pygments_lexer: ipython3
     version: 3.7.6
   plotly:
-    description: Visualize regression in scikit-learn with Plotly
+    description: Visualize regression in scikit-learn with Plotly.
     display_as: ai_ml
     language: python
     layout: base
@@ -30,7 +30,7 @@ jupyter:
     order: 2
     page_type: example_index
     permalink: python/ml-regression/
-    thumbnail: thumbnail/knn-classification.png
+    thumbnail: thumbnail/ml-regression.png
 ---
 
 ## Basic linear regression plots

From a28ee1fb6d56550428b6a9fcb6c1247258efc0d2 Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Mon, 2 Mar 2020 21:11:47 -0500
Subject: [PATCH 13/35] ML Docs: Added new section to regression, updated
 references

---
 doc/python/ml-regression.md | 76 ++++++++++++++++++++++++++++++++++---
 1 file changed, 71 insertions(+), 5 deletions(-)

diff --git a/doc/python/ml-regression.md b/doc/python/ml-regression.md
index 968858ec64b..f345cc22445 100644
--- a/doc/python/ml-regression.md
+++ b/doc/python/ml-regression.md
@@ -323,6 +323,58 @@ fig = px.scatter(
 fig.show()
 ```
 
+## Regularization visualization
+
+
+### Plot alphas for individual folds
+
+```python
+import pandas as pd
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.linear_model import LassoCV
+
+# Load and preprocess the data
+df = px.data.gapminder()
+X = df.drop(columns=['lifeExp', 'iso_num'])
+X = pd.get_dummies(X, columns=['country', 'continent', 'iso_alpha'])
+y = df['lifeExp']
+
+# Train model to predict life expectancy
+model = LassoCV(cv=N_FOLD, normalize=True)
+model.fit(X, y)
+mean_alphas = model.mse_path_.mean(axis=-1)
+
+fig = go.Figure([
+    go.Scatter(
+        x=model.alphas_, y=model.mse_path_[:, i], 
+        name=f"Fold: {i+1}", opacity=.5, line=dict(dash='dash'),
+        hovertemplate="alpha: %{x} <br>MSE: %{y}"
+    )
+    for i in range(N_FOLD)
+])
+fig.add_traces(go.Scatter(
+    x=model.alphas_, y=mean_alphas, 
+    name='Mean', line=dict(color='black', width=3),
+    hovertemplate="alpha: %{x} <br>MSE: %{y}",
+))
+
+fig.add_shape(
+    type="line", line=dict(dash='dash'),
+    x0=model.alpha_, y0=0,
+    x1=model.alpha_, y1=1,
+    yref='paper'
+)
+
+fig.update_layout(
+    xaxis_title='alpha', 
+    xaxis_type="log", 
+    yaxis_title="Mean Square Error (MSE)"
+)
+fig.show()
+```
+
 ## Grid search visualization using `px.density_heatmap` and `px.box`
 
 In this example, we show how to visualize the results of a grid search on a `DecisionTreeRegressor`. The first plot shows how to visualize the score of each model parameter on individual splits (grouped using facets). The second plot aggregates the results of all splits such that each box represents a single model.
@@ -401,8 +453,22 @@ fig_box.show()
 
 ### Reference
 
-Learn more about `px` here:
-* https://plot.ly/python/plotly-express/
-
-This tutorial was inspired by amazing examples from the official scikit-learn docs:
-* https://scikit-learn.org/stable/auto_examples/neighbors/plot_regression.html
+Learn more about the `px` figures used in this tutorial:
+* Plotly Express: https://plot.ly/python/plotly-express/
+* Vertical Lines: https://plot.ly/python/shapes/
+* Heatmaps: https://plot.ly/python/heatmaps/
+* Box Plots: https://plot.ly/python/box-plots/
+* 3D Scatter: https://plot.ly/python/3d-scatter-plots/
+* Surface Plots: https://plot.ly/python/3d-surface-plots/
+
+Learn more about the Machine Learning models used in this tutorial:
+* https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
+* https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoCV.html
+* https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html
+* https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html
+* https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html
+
+Other tutorials that inspired this notebook:
+* https://seaborn.pydata.org/examples/residplot.html
+* https://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_model_selection.html
+* http://www.scikit-yb.org/zh/latest/api/regressor/peplot.html

From 0df5bcb3f1cbc61301adbc669f3092496b865002 Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Fri, 6 Mar 2020 15:05:49 -0500
Subject: [PATCH 14/35] ML Docs: Added coefficient MLR example

---
 doc/python/ml-regression.md | 101 ++++++++++++++++++++++++------------
 1 file changed, 68 insertions(+), 33 deletions(-)

diff --git a/doc/python/ml-regression.md b/doc/python/ml-regression.md
index f345cc22445..e37e1d04e95 100644
--- a/doc/python/ml-regression.md
+++ b/doc/python/ml-regression.md
@@ -39,7 +39,7 @@ jupyter:
 ### Ordinary Least Square (OLS) with `plotly.express`
 
 
-This example shows how to use `plotly.express` to train a simply Ordinary Least Square (OLS) that can predict the tips servers will receive based on the value of the total bill.
+This example shows how to use `plotly.express`'s `trendline` parameter to train a simply Ordinary Least Square (OLS) for predicting the tips servers will receive based on the value of the total bill.
 
 ```python
 import plotly.express as px
@@ -108,7 +108,7 @@ fig.show()
 
 ## Comparing different kNN models parameters
 
-Compare the performance of two different models on the same dataset. This can be easily combined with discrete color legends from `px`.
+Compare the performance of two different models on the same dataset. This can be easily combined with discrete color legends from `px`, such as coloring by the assigned `sex`.
 
 ```python
 import numpy as np
@@ -136,9 +136,51 @@ fig.add_traces(go.Scatter(x=x_range, y=y_dist, name='Weights: Distance'))
 fig.show()
 ```
 
+## Displaying `PolynomialFeatures` using $\LaTeX$
+
+It's easy to diplay latex equations in legend and titles by simply adding `$` before and after your equation.
+
+```python
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.linear_model import LinearRegression
+from sklearn.preprocessing import PolynomialFeatures
+
+def format_coefs(coefs):
+    equation_list = [f"{coef}x^{i}" for i, coef in enumerate(coefs)]
+    equation = "$" +  " + ".join(equation_list) + "$"
+    
+    replace_map = {"x^0": "", "x^1": "x", '+ -': '- '}
+    for old, new in replace_map.items():
+        equation = equation.replace(old, new)
+        
+    return equation
+
+df = px.data.tips()
+X = df.total_bill.values.reshape(-1, 1)
+x_range = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
+
+fig = px.scatter(df, x='total_bill', y='tip', opacity=0.65)
+for n_features in [1, 2, 3, 4]:
+    poly = PolynomialFeatures(n_features)
+    poly.fit(X)
+    X_poly = poly.transform(X)
+    x_range_poly = poly.transform(x_range)
+
+    model = LinearRegression(fit_intercept=False)
+    model.fit(X_poly, df.tip)
+    y_poly = model.predict(x_range_poly)
+    
+    equation = format_coefs(model.coef_.round(2))
+    fig.add_traces(go.Scatter(x=x_range.squeeze(), y=y_poly, name=equation))
+
+fig.show()
+```
+
 ## 3D regression surface with `px.scatter_3d` and `go.Surface`
 
-Visualize the decision plane of your model whenever you have more than one variable in your `X`.
+Visualize the decision plane of your model whenever you have more than one variable in your input data.
 
 ```python
 import numpy as np
@@ -176,53 +218,44 @@ fig.add_traces(go.Surface(x=xrange, y=yrange, z=pred, name='pred_surface'))
 fig.show()
 ```
 
-## Displaying `PolynomialFeatures` using $\LaTeX$
+## Visualizing coefficients for multiple linear regression (MLR)
 
-It's easy to diplay latex equations in legend and titles by simply adding `$` before and after your equation.
+When you are fitting a linear regression, you want to often know what feature matters the most in your regression's output.
 
 ```python
-import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
 from sklearn.linear_model import LinearRegression
-from sklearn.preprocessing import PolynomialFeatures
 
-def format_coefs(coefs):
-    equation_list = [f"{coef}x^{i}" for i, coef in enumerate(coefs)]
-    equation = "$" +  " + ".join(equation_list) + "$"
-    
-    replace_map = {"x^0": "", "x^1": "x", '+ -': '- '}
-    for old, new in replace_map.items():
-        equation = equation.replace(old, new)
-        
-    return equation
+df = px.data.iris()
 
-df = px.data.tips()
-X = df.total_bill.values.reshape(-1, 1)
-x_range = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
+X = df.drop(columns=['petal_width', 'species_id'])
+X = pd.get_dummies(X, columns=['species'], prefix_sep='=')
+y = df['petal_width']
 
-fig = px.scatter(df, x='total_bill', y='tip', opacity=0.65)
-for n_features in [1, 2, 3, 4]:
-    poly = PolynomialFeatures(n_features)
-    poly.fit(X)
-    X_poly = poly.transform(X)
-    x_range_poly = poly.transform(x_range)
+model = LinearRegression()
+model.fit(X, y)
 
-    model = LinearRegression(fit_intercept=False)
-    model.fit(X_poly, df.tip)
-    y_poly = model.predict(x_range_poly)
-    
-    equation = format_coefs(model.coef_.round(2))
-    fig.add_traces(go.Scatter(x=x_range.squeeze(), y=y_poly, name=equation))
+colors = ['Positive' if c > 0 else 'Negative' for c in model.coef_]
 
+fig = px.bar(
+    x=X.columns, y=model.coef_, color=colors,
+    color_discrete_sequence=['red', 'blue'],
+    labels=dict(x='Feature', y='Linear coefficient'),
+    title='Weight of each feature for predicting petal width'
+)
 fig.show()
 ```
 
 ## Prediction Error Plots
 
+When you are working with very high-dimensional data, it is inconvenient to plot every dimension with your output `y`. Instead, you can use methods such as prediction error plots, which let you visualize how well your model does compared to the ground truth.
+
 
 ### Simple actual vs predicted plot
 
+This example shows you the simplest way to compare the predicted output vs. the actual output. A good model will have most of the scatter dots near the diagonal black line.
+
 ```python
 import plotly.express as px
 import plotly.graph_objects as go
@@ -323,10 +356,10 @@ fig = px.scatter(
 fig.show()
 ```
 
-## Regularization visualization
+## Visualize regularization across different cross-validation folds
 
 
-### Plot alphas for individual folds
+In this example, we show how to plot the results of various $\alpha$ penalization values from the results of cross-validation using scikit-learn's `LassoCV`. This is useful to see how much the error of the optimal alpha actually varies across CV folds.
 
 ```python
 import pandas as pd
@@ -335,6 +368,8 @@ import plotly.express as px
 import plotly.graph_objects as go
 from sklearn.linear_model import LassoCV
 
+N_FOLD = 6
+
 # Load and preprocess the data
 df = px.data.gapminder()
 X = df.drop(columns=['lifeExp', 'iso_num'])

From 8e4dad233e0b4a03ac031db51f051e1b3a055132 Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Fri, 6 Mar 2020 16:18:32 -0500
Subject: [PATCH 15/35] ML Docs: Start pca notebook

---
 doc/python/ml-pca.md | 135 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 doc/python/ml-pca.md

diff --git a/doc/python/ml-pca.md b/doc/python/ml-pca.md
new file mode 100644
index 00000000000..105edd8af66
--- /dev/null
+++ b/doc/python/ml-pca.md
@@ -0,0 +1,135 @@
+---
+jupyter:
+  jupytext:
+    notebook_metadata_filter: all
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.1'
+      jupytext_version: 1.1.1
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+  language_info:
+    codemirror_mode:
+      name: ipython
+      version: 3
+    file_extension: .py
+    mimetype: text/x-python
+    name: python
+    nbconvert_exporter: python
+    pygments_lexer: ipython3
+    version: 3.7.6
+  plotly:
+    description: Visualize Principle Component Analysis (PCA) of your high-dimensional
+      data with Plotly on Python.
+    display_as: ai_ml
+    language: python
+    layout: base
+    name: PCA Visualization
+    order: 4
+    page_type: example_index
+    permalink: python/pca-visualization/
+    thumbnail: thumbnail/ml-pca.png
+---
+
+## Basic PCA Scatter Plot
+
+This example shows you how to simply visualize the first two principal components of a PCA, by reducing a dataset of 4 dimensions to 2D. It uses scikit-learn's `PCA`.
+
+```python
+import plotly.express as px
+from sklearn.decomposition import PCA
+
+df = px.data.iris()
+X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
+
+pca = PCA(n_components=2)
+components = pca.fit_transform(X)
+
+fig = px.scatter(x=components[:, 0], y=components[:, 1], color=df['species'])
+fig.show()
+```
+
+## Visualize PCA with `px.scatter_3d`
+
+Just like the basic PCA plot, this let you visualize the first 3 dimensions. This additionally displays the total variance explained by those components.
+
+```python
+import plotly.express as px
+from sklearn.decomposition import PCA
+
+df = px.data.iris()
+X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
+
+pca = PCA(n_components=3)
+components = pca.fit_transform(X)
+
+total_var = pca.explained_variance_ratio_.sum() * 100
+
+fig = px.scatter_3d(
+    x=components[:, 0], y=components[:, 1], z=components[:, 2],
+    color=df['species'], 
+    title=f'Total Explained Variance: {total_var:.2f}%',
+    labels={'x': 'PC 1', 'y': 'PC 2', 'z': 'PC 3'},
+)
+fig.show()
+```
+
+## Plot high-dimensional components with `px.scatter_matrix`
+
+If you need to visualize more than 3 dimensions, you can use scatter plot matrices.
+
+```python
+import pandas as pd
+from sklearn.decomposition import PCA
+from sklearn.datasets import load_boston
+
+boston = load_boston()
+df = pd.DataFrame(boston.data, columns=boston.feature_names)
+
+pca = PCA(n_components=5)
+components = pca.fit_transform(df)
+
+total_var = pca.explained_variance_ratio_.sum() * 100
+
+labels = {str(i): f"PC {i+1}" for i in range(5)}
+labels['color'] = 'Median Price'
+
+fig = px.scatter_matrix(
+    components, 
+    color=boston.target,
+    dimensions=range(5),
+    labels=labels,
+    title=f'Total Explained Variance: {total_var:.2f}%',
+)
+fig.update_traces(diagonal_visible=False)
+fig.show()
+```
+
+## Plotting explained variance
+
+Often, you might be interested in seeing how much variance the PCA is able to explain as you increase the number of components, in order to decide how many dimensions to ultimately keep or analyze. This example shows you how to quickly plot the cumulative sum of explained variance for a high-dimensional dataset like [Diabetes](https://scikit-learn.org/stable/datasets/index.html#diabetes-dataset).
+
+```python
+import numpy as np
+import pandas as pd
+from sklearn.decomposition import PCA
+from sklearn.datasets import load_diabetes
+
+boston = load_diabetes()
+df = pd.DataFrame(boston.data, columns=boston.feature_names)
+
+pca = PCA()
+pca.fit(df)
+exp_var_cumul = np.cumsum(pca.explained_variance_ratio_)
+
+px.area(
+    x=range(1, exp_var_cumul.shape[0] + 1),
+    y=exp_var_cumul, 
+    labels={"x": "# Components", "y": "Explained Variance"}
+)
+```
+
+## Visualize loadings

From 4b7143061f0fe58289f5d796b8fc960f82844638 Mon Sep 17 00:00:00 2001
From: xhlulu <xhlperso@gmail.com>
Date: Fri, 6 Mar 2020 18:06:03 -0500
Subject: [PATCH 16/35] ML Docs: Start ROC/PR section

---
 doc/python/ml-roc-pr.md | 201 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 201 insertions(+)
 create mode 100644 doc/python/ml-roc-pr.md

diff --git a/doc/python/ml-roc-pr.md b/doc/python/ml-roc-pr.md
new file mode 100644
index 00000000000..8c1bc6becb4
--- /dev/null
+++ b/doc/python/ml-roc-pr.md
@@ -0,0 +1,201 @@
+---
+jupyter:
+  jupytext:
+    notebook_metadata_filter: all
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.1'
+      jupytext_version: 1.1.1
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+  language_info:
+    codemirror_mode:
+      name: ipython
+      version: 3
+    file_extension: .py
+    mimetype: text/x-python
+    name: python
+    nbconvert_exporter: python
+    pygments_lexer: ipython3
+    version: 3.7.6
+  plotly:
+    description: Interpret the results of your classification using Receiver Operating
+      Characteristics (ROC) and Precision-Recall (PR) Curves using Plotly on Python.
+    display_as: ai_ml
+    language: python
+    layout: base
+    name: ROC and PR Curves
+    order: 3
+    page_type: example_index
+    permalink: python/roc-and-pr-curves/
+    thumbnail: thumbnail/ml-roc-pr.png
+---
+
+## Basic Binary ROC Curve
+
+```python
+import plotly.express as px
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import roc_curve, auc
+from sklearn.datasets import make_classification
+
+X, y = make_classification(n_samples=500, random_state=0)
+
+model = LogisticRegression()
+model.fit(X, y)
+y_score = model.predict_proba(X)[:, 1]
+
+fpr, tpr, thresholds = roc_curve(y, y_score)
+
+fig = px.area(
+    x=fpr, y=tpr, 
+    title=f'ROC Curve (AUC={auc(fpr, tpr):.4f})',
+    labels=dict(x='False Positive Rate', y='True Positive Rate')
+)
+fig.add_shape(
+    type='line', line=dict(dash='dash'), 
+    x0=0, x1=1, y0=0, y1=1
+)
+fig.show()
+```
+
+## Multiclass ROC Curve
+
+When you have more than 2 classes, you will need to plot the ROC curve for each class separately. Make sure that you use a [one-versus-rest](https://scikit-learn.org/stable/modules/multiclass.html#one-vs-the-rest) model, or make sure that your problem has a [multi-label](https://scikit-learn.org/stable/modules/multiclass.html#multilabel-classification-format) format; otherwise, your ROC curve might not return the expected results.
+
+```python
+import numpy as np
+import pandas as pd
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import roc_curve, roc_auc_score
+import plotly.graph_objects as go
+import plotly.express as px
+
+np.random.seed(0)
+
+# Artificially add noise to make task harder
+df = px.data.iris()
+samples = df.species.sample(n=50, random_state=0)
+np.random.shuffle(samples.values)
+df.loc[samples.index, 'species'] = samples.values
+
+# Define the inputs and outputs
+X = df.drop(columns=['species', 'species_id'])
+y = df['species']
+y_onehot = pd.get_dummies(y, columns=model.classes_)
+
+# Fit the model
+model = LogisticRegression(max_iter=200)
+model.fit(X, y)
+y_scores = model.predict_proba(X)
+
+# Create an empty figure, and iteratively add new lines
+# every time we compute a new class
+fig = go.Figure()
+fig.add_shape(
+    type='line', line=dict(dash='dash'), 
+    x0=0, x1=1, y0=0, y1=1
+)
+
+for i in range(y_scores.shape[1]):
+    y_true = y_onehot.iloc[:, i]
+    y_score = y_scores[:, i]
+    
+    fpr, tpr, _ = roc_curve(y_true, y_score)
+    auc_score = roc_auc_score(y_true, y_score)
+    
+    name = f"{y_onehot.columns[i]} (AUC={auc_score:.2f})"
+    fig.add_trace(go.Scatter(x=fpr, y=tpr, name=name, mode='lines'))
+
+fig.update_layout(
+    xaxis_title='False Positive Rate',
+    yaxis_title='True Positive Rate'
+)
+fig.show()
+```
+
+## Precision-Recall Curves
+
+Plotting the PR curve is very similar to plotting the ROC curve. The following examples are slightly modified from the previous examples:
+
+```python
+import plotly.express as px
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import precision_recall_curve, auc
+from sklearn.datasets import make_classification
+
+X, y = make_classification(n_samples=500, random_state=0)
+
+model = LogisticRegression()
+model.fit(X, y)
+y_score = model.predict_proba(X)[:, 1]
+
+precision, recall, thresholds = precision_recall_curve(y, y_score)
+
+fig = px.area(
+    x=recall, y=precision, 
+    title=f'Precision-Recall Curve (AUC={auc(fpr, tpr):.4f})',
+    labels=dict(x='Recall', y='Precision')
+)
+fig.add_shape(
+    type='line', line=dict(dash='dash'), 
+    x0=0, x1=1, y0=1, y1=0
+)
+fig.show()
+```
+
+In this example, we use the [average precision](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html) metric, which is an alternative scoring method to the area under the PR curve.
+
+```python
+import numpy as np
+import pandas as pd
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import precision_recall_curve, average_precision_score
+import plotly.graph_objects as go
+import plotly.express as px
+
+np.random.seed(0)
+
+# Artificially add noise to make task harder
+df = px.data.iris()
+samples = df.species.sample(n=30, random_state=0)
+np.random.shuffle(samples.values)
+df.loc[samples.index, 'species'] = samples.values
+
+# Define the inputs and outputs
+X = df.drop(columns=['species', 'species_id'])
+y = df['species']
+y_onehot = pd.get_dummies(y, columns=model.classes_)
+
+# Fit the model
+model = LogisticRegression(max_iter=200)
+model.fit(X, y)
+y_scores = model.predict_proba(X)
+
+# Create an empty figure, and iteratively add new lines
+# every time we compute a new class
+fig = go.Figure()
+fig.add_shape(
+    type='line', line=dict(dash='dash'), 
+    x0=0, x1=1, y0=1, y1=0
+)
+
+for i in range(y_scores.shape[1]):
+    y_true = y_onehot.iloc[:, i]
+    y_score = y_scores[:, i]
+    
+    precision, recall, _ = precision_recall_curve(y_true, y_score)
+    auc_score = average_precision_score(y_true, y_score)
+    
+    name = f"{y_onehot.columns[i]} (AP={auc_score:.2f})"
+    fig.add_trace(go.Scatter(x=recall, y=precision, name=name, mode='lines'))
+
+fig.update_layout(
+    xaxis_title='Recall',
+    yaxis_title='Precision'
+)
+fig.show()
+```

From ca2494980ccd67e338c5791a6a4e70eb3a4bd42c Mon Sep 17 00:00:00 2001
From: xhlu <xhlperso@gmail.com>
Date: Fri, 13 Mar 2020 15:21:35 -0400
Subject: [PATCH 17/35] ML Docs: Remove 2 sections

Removed:
* 3D scatter
* Splom
---
 doc/python/ml-knn.md | 84 +++++++++-----------------------------------
 1 file changed, 16 insertions(+), 68 deletions(-)

diff --git a/doc/python/ml-knn.md b/doc/python/ml-knn.md
index 2bcab469875..adac06295f2 100644
--- a/doc/python/ml-knn.md
+++ b/doc/python/ml-knn.md
@@ -34,7 +34,20 @@ jupyter:
     thumbnail: thumbnail/knn-classification.png
 ---
 
-## Basic Binary Classification with `plotly.express`
+## Basic binary classification with kNN
+
+
+### Display training and test splits
+
+```python
+
+```
+
+### Visualize predictions on test split
+
+```python
+
+```
 
 ```python
 import numpy as np
@@ -113,7 +126,7 @@ fig.add_trace(
         showscale=False,
         colorscale=['Blue', 'Red'],
         opacity=0.4,
-        name='Confidence'
+        name='Score'
     )
 )
 fig.show()
@@ -150,7 +163,7 @@ Z = Z.reshape(ll.shape)
 proba = clf.predict_proba(np.c_[ll.ravel(), ww.ravel()])
 proba = proba.reshape(ll.shape + (3,))
 
-fig = px.scatter(df, x='sepal_length', y='sepal_width', color='species', width=1000, height=1000)
+fig = px.scatter(df, x='sepal_length', y='sepal_width', color='species')
 fig.update_traces(marker_size=10, marker_line_width=1)
 fig.add_trace(
     go.Heatmap(
@@ -173,77 +186,12 @@ fig.add_trace(
 fig.show()
 ```
 
-## 3D Classification with `px.scatter_3d`
-
-```python
-import numpy as np
-import plotly.express as px
-import plotly.graph_objects as go
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.model_selection import train_test_split
-
-df = px.data.iris()
-features = ["sepal_width", "sepal_length", "petal_width"]
-
-X = df[features]
-y = df.species
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
-
-# Create classifier, run predictions on grid
-clf = KNeighborsClassifier(15, weights='distance')
-clf.fit(X_train, y_train)
-y_pred = clf.predict(X_test)
-y_score = clf.predict_proba(X_test)
-y_score = np.around(y_score.max(axis=1), 4)
-
-fig = px.scatter_3d(
-    X_test, 
-    x='sepal_length', 
-    y='sepal_width', 
-    z='petal_width', 
-    symbol=y_pred,
-    color=y_score,
-    labels={'symbol': 'prediction', 'color': 'score'}
-)
-fig.update_layout(legend=dict(x=0, y=0))
-fig.show()
-```
-
-## High Dimension Visualization with `px.scatter_matrix`
-
-If you need to visualize classifications that go beyond 3D, you can use the [scatter plot matrix](https://plot.ly/python/splom/).
-
-```python
-import numpy as np
-import plotly.express as px
-import plotly.graph_objects as go
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.model_selection import train_test_split
-
-df = px.data.iris()
-features = ["sepal_width", "sepal_length", "petal_width", "petal_length"]
-
-X = df[features]
-y = df.species
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
-
-# Create classifier, run predictions on grid
-clf = KNeighborsClassifier(15, weights='distance')
-clf.fit(X_train, y_train)
-y_pred = clf.predict(X_test)
-
-fig = px.scatter_matrix(X_test, dimensions=features, color=y_pred, labels={'color': 'prediction'})
-fig.show()
-```
-
 ### Reference
 
 Learn more about `px`, `go.Contour`, and `go.Heatmap` here:
 * https://plot.ly/python/plotly-express/
 * https://plot.ly/python/heatmaps/
 * https://plot.ly/python/contour-plots/
-* https://plot.ly/python/3d-scatter-plots/
-* https://plot.ly/python/splom/
 
 This tutorial was inspired by amazing examples from the official scikit-learn docs:
 * https://scikit-learn.org/stable/auto_examples/neighbors/plot_classification.html

From 99621b08a4c5d2312caed7d39a36ab026f848a8f Mon Sep 17 00:00:00 2001
From: xhlu <xhlperso@gmail.com>
Date: Fri, 13 Mar 2020 15:25:58 -0400
Subject: [PATCH 18/35] ML Docs, Regression: fix import, update titles, colors

---
 doc/python/ml-regression.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/python/ml-regression.md b/doc/python/ml-regression.md
index e37e1d04e95..6f7f56c51b6 100644
--- a/doc/python/ml-regression.md
+++ b/doc/python/ml-regression.md
@@ -47,7 +47,7 @@ import plotly.express as px
 df = px.data.tips()
 fig = px.scatter(
     df, x='total_bill', y='tip', opacity=0.65,
-    trendline='ols', trendline_color_override='red'
+    trendline='ols', trendline_color_override='darkblue'
 )
 fig.show()
 ```
@@ -223,6 +223,7 @@ fig.show()
 When you are fitting a linear regression, you want to often know what feature matters the most in your regression's output.
 
 ```python
+import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 from sklearn.linear_model import LinearRegression
@@ -356,14 +357,14 @@ fig = px.scatter(
 fig.show()
 ```
 
-## Visualize regularization across different cross-validation folds
+## Visualize regularization across cross-validation folds
 
 
 In this example, we show how to plot the results of various $\alpha$ penalization values from the results of cross-validation using scikit-learn's `LassoCV`. This is useful to see how much the error of the optimal alpha actually varies across CV folds.
 
 ```python
-import pandas as pd
 import numpy as np
+import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 from sklearn.linear_model import LassoCV

From 0cde621d14a5aeeedffed5cb3b07aa9a7f321428 Mon Sep 17 00:00:00 2001
From: xhlu <xhlperso@gmail.com>
Date: Fri, 13 Mar 2020 19:35:06 -0400
Subject: [PATCH 19/35] ML Docs: Update all kNN sections based on discussions

---
 doc/python/ml-knn.md | 141 +++++++++++++++++++++++++++++++------------
 1 file changed, 103 insertions(+), 38 deletions(-)

diff --git a/doc/python/ml-knn.md b/doc/python/ml-knn.md
index adac06295f2..d53b18e9f95 100644
--- a/doc/python/ml-knn.md
+++ b/doc/python/ml-knn.md
@@ -36,56 +36,79 @@ jupyter:
 
 ## Basic binary classification with kNN
 
+This section gets us started with displaying basic binary classification using 2D data. We first show how to display training versus testing data using [various marker styles](https://plot.ly/python/marker-style/), then demonstrate how to evaluate a kNN classifier's performance on the **test split** using a continuous color gradient to indicate the model's predicted score.
 
-### Display training and test splits
-
-```python
-
-```
 
-### Visualize predictions on test split
+### Display training and test splits
 
-```python
 
-```
+Here, we display all the negative labels as squares, and positive labels as circles. We differentiate the training and test set by adding a dot to the center of test data.
 
 ```python
 import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
 from sklearn.datasets import make_moons
+from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
 
 X, y = make_moons(noise=0.3, random_state=0)
-X_test, _ = make_moons(noise=0.3, random_state=1)
-
-clf = KNeighborsClassifier(15)
-clf.fit(X, y.astype(str))  # Fit on training set
-y_pred = clf.predict(X_test)  # Predict on new data
-
-fig = px.scatter(x=X_test[:, 0], y=X_test[:, 1], color=y_pred, labels={'color': 'predicted'})
-fig.update_traces(marker_size=10)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y.astype(str), test_size=0.25, random_state=0)
+
+trace_specs = [
+    [X_train, y_train, '0', 'Train', 'square'],
+    [X_train, y_train, '1', 'Train', 'circle'],
+    [X_test, y_test, '0', 'Test', 'square-dot'],
+    [X_test, y_test, '1', 'Test', 'circle-dot']
+]
+
+fig = go.Figure(data=[
+    go.Scatter(
+        x=X[y==label, 0], y=X[y==label, 1],
+        name=f'{split} Split, Label {label}', 
+        mode='markers', marker_symbol=marker
+    )
+    for X, y, label, split, marker in trace_specs
+])
+fig.update_traces(
+    marker_size=12, marker_line_width=1.5, 
+    marker_color="lightyellow"
+)
 fig.show()
 ```
 
-## Visualize Binary Prediction Scores
+### Visualize predictions on test split
+
+
+Now, we evaluate the model only on the test set. Notice that `px.scatter` only require 1 function call to plot both negative and positive labels, and can additionally set a continuous color scale based on the `y_score` output by our kNN model.
 
 ```python
 import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
-from sklearn.datasets import make_classification
+from sklearn.datasets import make_moons
+from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
 
-X, y = make_classification(n_features=2, n_redundant=0, random_state=0)
-X_test, _ = make_classification(n_features=2, n_redundant=0, random_state=1)
+# Load and split data
+X, y = make_moons(noise=0.3, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y.astype(str), test_size=0.25, random_state=0)
 
+# Fit the model on training data, predict on test data
 clf = KNeighborsClassifier(15)
-clf.fit(X, y)  # Fit on training set
-y_score = clf.predict_proba(X_test)[:, 1]  # Predict on new data
-
-fig = px.scatter(x=X_test[:, 0], y=X_test[:, 1], color=y_score, labels={'color': 'score'})
-fig.update_traces(marker_size=10)
+clf.fit(X_train, y_train)
+y_score = clf.predict_proba(X_test)[:, 1]
+
+fig = px.scatter(
+    X_test, x=0, y=1, 
+    color=y_score, color_continuous_scale='RdBu',
+    symbol=y_test, symbol_map={'0': 'square-dot', '1': 'circle-dot'},
+    labels={'symbol': 'Label', 'color': 'Score'}
+)
+fig.update_traces(marker_size=12, marker_line_width=1.5)
+fig.update_layout(legend_orientation='h')
 fig.show()
 ```
 
@@ -96,12 +119,16 @@ import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
 from sklearn.datasets import make_moons
+from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
 
 mesh_size = .02
-margin = 1
+margin = 0.25
 
+# Load and split data
 X, y = make_moons(noise=0.3, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y.astype(str), test_size=0.25, random_state=0)
 
 # Create a mesh grid on which we will run our model
 x_min, x_max = X[:, 0].min() - margin, X[:, 0].max() + margin
@@ -116,17 +143,36 @@ clf.fit(X, y)
 Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
 Z = Z.reshape(xx.shape)
 
-fig = px.scatter(X, x=0, y=1, color=y.astype(str), labels={'0':'', '1':''})
-fig.update_traces(marker_size=10, marker_line_width=1)
+trace_specs = [
+    [X_train, y_train, '0', 'Train', 'square'],
+    [X_train, y_train, '1', 'Train', 'circle'],
+    [X_test, y_test, '0', 'Test', 'square-dot'],
+    [X_test, y_test, '1', 'Test', 'circle-dot']
+]
+
+fig = go.Figure(data=[
+    go.Scatter(
+        x=X[y==label, 0], y=X[y==label, 1],
+        name=f'{split} Split, Label {label}', 
+        mode='markers', marker_symbol=marker
+    )
+    for X, y, label, split, marker in trace_specs
+])
+fig.update_traces(
+    marker_size=12, marker_line_width=1.5, 
+    marker_color="lightyellow"
+)
+
 fig.add_trace(
     go.Contour(
         x=xrange, 
         y=yrange, 
         z=Z, 
         showscale=False,
-        colorscale=['Blue', 'Red'],
+        colorscale='RdBu',
         opacity=0.4,
-        name='Score'
+        name='Score',
+        hoverinfo='skip'
     )
 )
 fig.show()
@@ -134,6 +180,8 @@ fig.show()
 
 ## Multi-class prediction confidence with `go.Heatmap`
 
+It is also possible to visualize the prediction confidence of the model using `go.Heatmap`. In this example, you can see how to compute how confident the model is about its prediction at every point in the 2D grid. Here, we define the confidence as the difference between the highest score and the score of the other classes summed, at a certain point.
+
 ```python
 import numpy as np
 import plotly.express as px
@@ -145,8 +193,9 @@ margin = 1
 
 # We will use the iris data, which is included in px
 df = px.data.iris()
-X = df[['sepal_length', 'sepal_width']]
-y = df.species_id
+df_train, df_test = train_test_split(df, test_size=0.25, random_state=0)
+X_train = df_train[['sepal_length', 'sepal_width']]
+y_train = df_train.species_id
 
 # Create a mesh grid on which we will run our model
 l_min, l_max = df.sepal_length.min() - margin, df.sepal_length.max() + margin
@@ -157,23 +206,35 @@ ll, ww = np.meshgrid(lrange, wrange)
 
 # Create classifier, run predictions on grid
 clf = KNeighborsClassifier(15, weights='distance')
-clf.fit(X, y)
+clf.fit(X_train, y_train)
 Z = clf.predict(np.c_[ll.ravel(), ww.ravel()])
 Z = Z.reshape(ll.shape)
 proba = clf.predict_proba(np.c_[ll.ravel(), ww.ravel()])
 proba = proba.reshape(ll.shape + (3,))
 
-fig = px.scatter(df, x='sepal_length', y='sepal_width', color='species')
-fig.update_traces(marker_size=10, marker_line_width=1)
+# Compute the confidence, which is the difference
+diff = proba.max(axis=-1) - (proba.sum(axis=-1) - proba.max(axis=-1))
+
+fig = px.scatter(
+    df_test, x='sepal_length', y='sepal_width',
+    symbol='species', 
+    symbol_map={
+        'setosa': 'square-dot', 
+        'versicolor': 'circle-dot', 
+        'virginica': 'diamond-dot'},
+)
+fig.update_traces(
+    marker_size=12, marker_line_width=1.5, 
+    marker_color="lightyellow"
+)
 fig.add_trace(
     go.Heatmap(
         x=lrange, 
         y=wrange, 
-        z=Z, 
-        showscale=False,
-        colorscale=[[0.0, 'blue'], [0.5, 'red'], [1.0, 'green']],
+        z=diff, 
         opacity=0.25,
         customdata=proba,
+        colorscale='RdBu',
         hovertemplate=(
             'sepal length: %{x} <br>'
             'sepal width: %{y} <br>'
@@ -183,6 +244,10 @@ fig.add_trace(
         )
     )
 )
+fig.update_layout(
+    legend_orientation='h',
+    title='Prediction Confidence on Test Split'
+)
 fig.show()
 ```
 

From 7447304d8ba88e44cdc2d05a77c97212410b59c3 Mon Sep 17 00:00:00 2001
From: xhlu <xhlperso@gmail.com>
Date: Fri, 13 Mar 2020 22:52:45 -0400
Subject: [PATCH 20/35] ML Docs: Update Regression notebook

Added a preliminary section that introduces roc curves
---
 doc/python/ml-roc-pr.md | 77 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 74 insertions(+), 3 deletions(-)

diff --git a/doc/python/ml-roc-pr.md b/doc/python/ml-roc-pr.md
index 8c1bc6becb4..44f9e53ff48 100644
--- a/doc/python/ml-roc-pr.md
+++ b/doc/python/ml-roc-pr.md
@@ -34,7 +34,58 @@ jupyter:
     thumbnail: thumbnail/ml-roc-pr.png
 ---
 
-## Basic Binary ROC Curve
+## Preliminary plots
+
+Before diving into the receiver operating characteristic (ROC) curve, we will look at two plots that will give some context to the thresholds mechanism behind the ROC and PR curves.
+
+In the histogram, we observe that the score spread such that most of the positive labels are binned near 1, and a lot of the negative labels are close to 0. When we set a threshold on the score, all of the bins to its left will be classified as 0's, and everything to the right will be 1's. There are obviously a few outliers, such as **negative** samples that our model gave a high score, and *positive* samples with a low score. If we set a threshold right in the middle, those outliers will respectively become **false positives** and *false negatives*. 
+
+As we adjust thresholds, the number of positive positives will increase or decrease, and at the same time the number of true positives will also change; this is shown in the second plot. As you can see, the model seems to perform fairly well, because the true positive rate decreases slowly, whereas the false positive rate decreases sharply as we increase the threshold. Those two lines each represent a dimension of the ROC curve.
+
+```python
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import roc_curve, auc
+from sklearn.datasets import make_classification
+
+X, y = make_classification(n_samples=500, random_state=0)
+
+model = LogisticRegression()
+model.fit(X, y)
+y_score = model.predict_proba(X)[:, 1]
+fpr, tpr, thresholds = roc_curve(y, y_score)
+
+# The histogram of scores compared to true labels
+fig_hist = px.histogram(
+    x=y_score, color=y, nbins=50, 
+    labels=dict(color='True Labels', x='Score')
+)
+
+# Evaluating model performance at various thresholds
+fig_thresh = go.Figure([
+    go.Scatter(x=thresholds, y=fpr, name='False Positive Rate'),
+    go.Scatter(x=thresholds, y=tpr, name='True Positive Rate')
+])
+fig_thresh.update_layout(
+    title='TPR and FPR at every threshold',
+    xaxis_title='Threshold',
+    yaxis_title='Rate',
+    yaxis=dict(scaleanchor="x", scaleratio=1),
+    xaxis=dict(constrain='domain')
+)
+fig_thresh.update_xaxes(range=[0, 1])
+
+# Display plots
+fig_hist.show()
+fig_thresh.show()
+```
+
+## Basic binary ROC curve
+
+Notice how this ROC curve looks similar to the True Positive Rate curve from the previous plot. This is because they are the same curve, except the x-axis consists of increasing values of FPR instead of threshold, which is why the line is flipped and distorted.
+
+We also display the area under the ROC curve (ROC AUC), which is fairly high, thus consistent with our intepretation of the previous plots.
 
 ```python
 import plotly.express as px
@@ -59,6 +110,10 @@ fig.add_shape(
     type='line', line=dict(dash='dash'), 
     x0=0, x1=1, y0=0, y1=1
 )
+fig.update_layout(
+    yaxis=dict(scaleanchor="x", scaleratio=1),
+    xaxis=dict(constrain='domain')
+)
 fig.show()
 ```
 
@@ -112,7 +167,9 @@ for i in range(y_scores.shape[1]):
 
 fig.update_layout(
     xaxis_title='False Positive Rate',
-    yaxis_title='True Positive Rate'
+    yaxis_title='True Positive Rate',
+    yaxis=dict(scaleanchor="x", scaleratio=1),
+    xaxis=dict(constrain='domain')
 )
 fig.show()
 ```
@@ -144,6 +201,11 @@ fig.add_shape(
     type='line', line=dict(dash='dash'), 
     x0=0, x1=1, y0=1, y1=0
 )
+fig.update_layout(
+    yaxis=dict(scaleanchor="x", scaleratio=1),
+    xaxis=dict(constrain='domain')
+)
+
 fig.show()
 ```
 
@@ -195,7 +257,16 @@ for i in range(y_scores.shape[1]):
 
 fig.update_layout(
     xaxis_title='Recall',
-    yaxis_title='Precision'
+    yaxis_title='Precision',
+    yaxis=dict(scaleanchor="x", scaleratio=1),
+    xaxis=dict(constrain='domain')
 )
 fig.show()
 ```
+
+## References
+
+Learn more about `px`, `px.area`, `px.hist`:
+* https://plot.ly/python/histograms/
+* https://plot.ly/python/filled-area-plots/
+* https://plot.ly/python/line-charts/

From 7ab73cb4ff1a87b4e880bae800d5878e1fdc2458 Mon Sep 17 00:00:00 2001
From: xhlu <xhlperso@gmail.com>
Date: Fri, 13 Mar 2020 22:52:59 -0400
Subject: [PATCH 21/35] ML Docs: Updated PCA notebook

Added loadings, moved high-dimensional analysis first
---
 doc/python/ml-pca.md | 165 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 138 insertions(+), 27 deletions(-)

diff --git a/doc/python/ml-pca.md b/doc/python/ml-pca.md
index 105edd8af66..6fa3db760ef 100644
--- a/doc/python/ml-pca.md
+++ b/doc/python/ml-pca.md
@@ -34,73 +34,88 @@ jupyter:
     thumbnail: thumbnail/ml-pca.png
 ---
 
-## Basic PCA Scatter Plot
+## High-dimensional PCA Analysis with  `px.scatter_matrix`
 
-This example shows you how to simply visualize the first two principal components of a PCA, by reducing a dataset of 4 dimensions to 2D. It uses scikit-learn's `PCA`.
+
+### Visualize all the original dimensions
+
+First, let's plot all the features and see how the `species` in the Iris dataset are grouped. In a [splom](https://plot.ly/python/splom/), each subplot displays a feature against another, so if we have $N$ features we have a $N \times N$ matrix.
+
+In our example, we are plotting all 4 features from the Iris dataset, thus we can see how `sepal_width` is compared against `sepal_length`, then against `petal_width`, and so forth. Keep in mind how some pairs of features can more easily separate different species.
 
 ```python
 import plotly.express as px
-from sklearn.decomposition import PCA
 
 df = px.data.iris()
-X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
+features = ["sepal_width", "sepal_length", "petal_width", "petal_length"]
 
-pca = PCA(n_components=2)
-components = pca.fit_transform(X)
-
-fig = px.scatter(x=components[:, 0], y=components[:, 1], color=df['species'])
+fig = px.scatter_matrix(
+    df,
+    dimensions=features,
+    color="species"
+)
+fig.update_traces(diagonal_visible=False)
 fig.show()
 ```
 
-## Visualize PCA with `px.scatter_3d`
+###  Visualize all the principal components
+
+Now, we apply `PCA` the same dataset, and retrieve **all** the components. We use the same `px.scatter_matrix` trace to display our results, but this time our features are the resulting *principal components*, ordered by how much variance they are able to explain. 
 
-Just like the basic PCA plot, this let you visualize the first 3 dimensions. This additionally displays the total variance explained by those components.
+The importance of explained variance is demonstrated in the example below. The subplot between PC3 and PC4 is clearly unable to separate each class, whereas the subplot between PC1 and PC2 shows a clear separation between each species.
 
 ```python
 import plotly.express as px
 from sklearn.decomposition import PCA
 
 df = px.data.iris()
-X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
-
-pca = PCA(n_components=3)
-components = pca.fit_transform(X)
+features = ["sepal_width", "sepal_length", "petal_width", "petal_length"]
 
-total_var = pca.explained_variance_ratio_.sum() * 100
+pca = PCA()
+components = pca.fit_transform(df[features])
+labels = {
+    str(i): f"PC {i+1} ({var:.1f}%)" 
+    for i, var in enumerate(pca.explained_variance_ratio_ * 100)
+}
 
-fig = px.scatter_3d(
-    x=components[:, 0], y=components[:, 1], z=components[:, 2],
-    color=df['species'], 
-    title=f'Total Explained Variance: {total_var:.2f}%',
-    labels={'x': 'PC 1', 'y': 'PC 2', 'z': 'PC 3'},
+fig = px.scatter_matrix(
+    components,
+    labels=labels,
+    dimensions=range(4),
+    color=df["species"]
 )
+fig.update_traces(diagonal_visible=False)
 fig.show()
 ```
 
-## Plot high-dimensional components with `px.scatter_matrix`
+### Visualize a subset of the principal components
+
+When you will have too many features to visualize, you might be interested in only visualizing the most relevant components. Those components often capture a majority of the [explained variance](https://en.wikipedia.org/wiki/Explained_variation), which is a good way to tell if those components are sufficient for modelling this dataset.
 
-If you need to visualize more than 3 dimensions, you can use scatter plot matrices.
+In the example below, our dataset contains 10 features, but we only select the first 4 components, since they explain over 99% of the total variance.
 
 ```python
 import pandas as pd
+import plotly.express as px
 from sklearn.decomposition import PCA
 from sklearn.datasets import load_boston
 
 boston = load_boston()
 df = pd.DataFrame(boston.data, columns=boston.feature_names)
+n_components = 4
 
-pca = PCA(n_components=5)
+pca = PCA(n_components=n_components)
 components = pca.fit_transform(df)
 
 total_var = pca.explained_variance_ratio_.sum() * 100
 
-labels = {str(i): f"PC {i+1}" for i in range(5)}
+labels = {str(i): f"PC {i+1}" for i in range(n_components)}
 labels['color'] = 'Median Price'
 
 fig = px.scatter_matrix(
     components, 
     color=boston.target,
-    dimensions=range(5),
+    dimensions=range(n_components),
     labels=labels,
     title=f'Total Explained Variance: {total_var:.2f}%',
 )
@@ -108,13 +123,56 @@ fig.update_traces(diagonal_visible=False)
 fig.show()
 ```
 
+## 2D PCA Scatter Plot
+
+In the previous examples, you saw how to visualize high-dimensional PCs. In this example, we show you how to simply visualize the first two principal components of a PCA, by reducing a dataset of 4 dimensions to 2D.
+
+```python
+import plotly.express as px
+from sklearn.decomposition import PCA
+
+df = px.data.iris()
+X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
+
+pca = PCA(n_components=2)
+components = pca.fit_transform(X)
+
+fig = px.scatter(components, x=0, y=1, color=df['species'])
+fig.show()
+```
+
+## Visualize PCA with `px.scatter_3d`
+
+With `px.scatter_3d`, you can visualize an additional dimension, which let you capture even more variance.
+
+```python
+import plotly.express as px
+from sklearn.decomposition import PCA
+
+df = px.data.iris()
+X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
+
+pca = PCA(n_components=3)
+components = pca.fit_transform(X)
+
+total_var = pca.explained_variance_ratio_.sum() * 100
+
+fig = px.scatter_3d(
+    components, x=0, y=1, z=2, color=df['species'], 
+    title=f'Total Explained Variance: {total_var:.2f}%',
+    labels={'0': 'PC 1', '1': 'PC 2', '2': 'PC 3'}
+)
+fig.show()
+```
+
 ## Plotting explained variance
 
-Often, you might be interested in seeing how much variance the PCA is able to explain as you increase the number of components, in order to decide how many dimensions to ultimately keep or analyze. This example shows you how to quickly plot the cumulative sum of explained variance for a high-dimensional dataset like [Diabetes](https://scikit-learn.org/stable/datasets/index.html#diabetes-dataset).
+Often, you might be interested in seeing how much variance PCA is able to explain as you increase the number of components, in order to decide how many dimensions to ultimately keep or analyze. This example shows you how to quickly plot the cumulative sum of explained variance for a high-dimensional dataset like [Diabetes](https://scikit-learn.org/stable/datasets/index.html#diabetes-dataset).
 
 ```python
 import numpy as np
 import pandas as pd
+import plotly.express as px
 from sklearn.decomposition import PCA
 from sklearn.datasets import load_diabetes
 
@@ -132,4 +190,57 @@ px.area(
 )
 ```
 
-## Visualize loadings
+## Visualize Loadings
+
+It is also possible to visualize loadings using `shapes`, and use `annotations` to indicate which feature a certain loading original belong to. Here, we define loadings as:
+
+$$
+loadings = eigenvectors \cdot \sqrt{eigenvalues}
+$$
+
+```python
+import plotly.express as px
+from sklearn.decomposition import PCA
+from sklearn import datasets
+from sklearn.preprocessing import StandardScaler
+
+df = px.data.iris()
+features = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
+X = df[features]
+
+pca = PCA(n_components=2)
+components = pca.fit_transform(X)
+
+loadings = pca.components_.T * np.sqrt(pca.explained_variance_)
+
+fig = px.scatter(components, x=0, y=1, color=df['species'])
+
+for i, feature in enumerate(features):
+    fig.add_shape(
+        type='line',
+        x0=0, y0=0,
+        x1=loadings[i, 0],
+        y1=loadings[i, 1]
+    )
+    fig.add_annotation(
+        x=loadings[i, 0],
+        y=loadings[i, 1],
+        ax=0, ay=0,
+        xanchor="center",
+        yanchor="bottom",
+        text=feature,
+    )
+fig.show()
+```
+
+## References
+
+Learn more about `px`, `px.scatter_3d`, and `px.scatter_matrix` here:
+* https://plot.ly/python/plotly-express/
+* https://plot.ly/python/3d-scatter-plots/
+* https://plot.ly/python/splom/
+
+The following resources offer an in-depth overview of PCA and explained variance:
+* https://en.wikipedia.org/wiki/Explained_variation
+* https://scikit-learn.org/stable/modules/decomposition.html#pca
+* https://stats.stackexchange.com/questions/2691/making-sense-of-principal-component-analysis-eigenvectors-eigenvalues/140579#140579

From 0e8b5d64daecff24ee7c82ee6090ac508b495ce7 Mon Sep 17 00:00:00 2001
From: xhlu <xhlperso@gmail.com>
Date: Tue, 17 Mar 2020 12:59:57 -0400
Subject: [PATCH 22/35] ML Docs: Update knn and regression based on Emma's
 reviews

---
 doc/python/ml-knn.md        |  2 +-
 doc/python/ml-regression.md | 25 +++++++++++++------------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/doc/python/ml-knn.md b/doc/python/ml-knn.md
index d53b18e9f95..74b830a3b3f 100644
--- a/doc/python/ml-knn.md
+++ b/doc/python/ml-knn.md
@@ -105,7 +105,7 @@ fig = px.scatter(
     X_test, x=0, y=1, 
     color=y_score, color_continuous_scale='RdBu',
     symbol=y_test, symbol_map={'0': 'square-dot', '1': 'circle-dot'},
-    labels={'symbol': 'Label', 'color': 'Score'}
+    labels={'symbol': 'label', 'color': 'score of <br>first class'}
 )
 fig.update_traces(marker_size=12, marker_line_width=1.5)
 fig.update_layout(legend_orientation='h')
diff --git a/doc/python/ml-regression.md b/doc/python/ml-regression.md
index 6f7f56c51b6..d1945f742b3 100644
--- a/doc/python/ml-regression.md
+++ b/doc/python/ml-regression.md
@@ -39,7 +39,7 @@ jupyter:
 ### Ordinary Least Square (OLS) with `plotly.express`
 
 
-This example shows how to use `plotly.express`'s `trendline` parameter to train a simply Ordinary Least Square (OLS) for predicting the tips servers will receive based on the value of the total bill.
+This example shows how to use `plotly.express`'s `trendline` parameter to train a simply Ordinary Least Square (OLS) for predicting the tips waiters will receive based on the value of the total bill.
 
 ```python
 import plotly.express as px
@@ -88,7 +88,7 @@ from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import train_test_split
 
 df = px.data.tips()
-X = df.total_bill.values.reshape(-1, 1)
+X = df.total_bill[:, None]
 X_train, X_test, y_train, y_test = train_test_split(X, df.tip, random_state=0)
 
 model = LinearRegression()
@@ -162,8 +162,8 @@ X = df.total_bill.values.reshape(-1, 1)
 x_range = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
 
 fig = px.scatter(df, x='total_bill', y='tip', opacity=0.65)
-for n_features in [1, 2, 3, 4]:
-    poly = PolynomialFeatures(n_features)
+for degree in [1, 2, 3, 4]:
+    poly = PolynomialFeatures(degree)
     poly.fit(X)
     X_poly = poly.transform(X)
     x_range_poly = poly.transform(x_range)
@@ -180,13 +180,13 @@ fig.show()
 
 ## 3D regression surface with `px.scatter_3d` and `go.Surface`
 
-Visualize the decision plane of your model whenever you have more than one variable in your input data.
+Visualize the decision plane of your model whenever you have more than one variable in your input data. Here, we will use [`sklearn.svm.SVR`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html), which is a Support Vector Machine (SVM) model specifically designed for regression.
 
 ```python
 import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
-from sklearn.neighbors import KNeighborsRegressor
+from sklearn.svm import SVR
 
 mesh_size = .02
 margin = 0
@@ -197,8 +197,8 @@ X = df[['sepal_width', 'sepal_length']]
 y = df['petal_width']
 
 # Condition the model on sepal width and length, predict the petal width
-knn = KNeighborsRegressor(10, weights='distance')
-knn.fit(X, y)
+model = SVR(C=1.)
+model.fit(X, y)
 
 # Create a mesh grid on which we will run our model
 x_min, x_max = X.sepal_width.min() - margin, X.sepal_width.max() + margin
@@ -207,8 +207,8 @@ xrange = np.arange(x_min, x_max, mesh_size)
 yrange = np.arange(y_min, y_max, mesh_size)
 xx, yy = np.meshgrid(xrange, yrange)
 
-# Run kNN
-pred = knn.predict(np.c_[xx.ravel(), yy.ravel()])
+# Run model
+pred = model.predict(np.c_[xx.ravel(), yy.ravel()])
 pred = pred.reshape(xx.shape)
 
 # Generate the plot
@@ -271,7 +271,7 @@ model = LinearRegression()
 model.fit(X, y)
 y_pred = model.predict(X)
 
-fig = px.scatter(x=y_pred, y=y, labels={'x': 'prediction', 'y': 'actual'})
+fig = px.scatter(x=y, y=y_pred, labels={'x': 'ground truth', 'y': 'prediction'})
 fig.add_shape(
     type="line", line=dict(dash='dash'),
     x0=y.min(), y0=y.min(), 
@@ -308,10 +308,11 @@ model.fit(X_train, y_train)
 df['prediction'] = model.predict(X)
 
 fig = px.scatter(
-    df, x='prediction', y='petal_width',
+    df, x='petal_width', y='prediction',
     marginal_x='histogram', marginal_y='histogram',
     color='split', trendline='ols'
 )
+fig.update_traces(histnorm='probability', selector={'type':'histogram'})
 fig.add_shape(
     type="line", line=dict(dash='dash'),
     x0=y.min(), y0=y.min(), 

From 3bb49a3d960c05b964ce9298f23c3b1959b0d163 Mon Sep 17 00:00:00 2001
From: xhlu <xhlperso@gmail.com>
Date: Tue, 17 Mar 2020 15:11:36 -0400
Subject: [PATCH 23/35] ML Docs: Update header description

---
 doc/python/ml-knn.md    | 2 +-
 doc/python/ml-pca.md    | 2 +-
 doc/python/ml-roc-pr.md | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/python/ml-knn.md b/doc/python/ml-knn.md
index 74b830a3b3f..fd5e87668e9 100644
--- a/doc/python/ml-knn.md
+++ b/doc/python/ml-knn.md
@@ -23,7 +23,7 @@ jupyter:
     version: 3.7.6
   plotly:
     description: Visualize scikit-learn's k-Nearest Neighbors (kNN) classification
-      with Plotly
+      in Python with Plotly.
     display_as: ai_ml
     language: python
     layout: base
diff --git a/doc/python/ml-pca.md b/doc/python/ml-pca.md
index 6fa3db760ef..41cab7a64ff 100644
--- a/doc/python/ml-pca.md
+++ b/doc/python/ml-pca.md
@@ -23,7 +23,7 @@ jupyter:
     version: 3.7.6
   plotly:
     description: Visualize Principle Component Analysis (PCA) of your high-dimensional
-      data with Plotly on Python.
+      data in Python with Plotly.
     display_as: ai_ml
     language: python
     layout: base
diff --git a/doc/python/ml-roc-pr.md b/doc/python/ml-roc-pr.md
index 44f9e53ff48..ad30cd951b3 100644
--- a/doc/python/ml-roc-pr.md
+++ b/doc/python/ml-roc-pr.md
@@ -23,7 +23,7 @@ jupyter:
     version: 3.7.6
   plotly:
     description: Interpret the results of your classification using Receiver Operating
-      Characteristics (ROC) and Precision-Recall (PR) Curves using Plotly on Python.
+      Characteristics (ROC) and Precision-Recall (PR) Curves in Python with Plotly.
     display_as: ai_ml
     language: python
     layout: base

From 895231f9bf56d95495339c634ebaae7a66099af1 Mon Sep 17 00:00:00 2001
From: xhlu <xhlperso@gmail.com>
Date: Tue, 17 Mar 2020 16:30:58 -0400
Subject: [PATCH 24/35] ML Docs: Add t-SNE/UMAP notebook (read todo)

TODO: Add thumbnail, references, description of sections
---
 doc/python/tsne-umap-projections.md | 149 ++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 doc/python/tsne-umap-projections.md

diff --git a/doc/python/tsne-umap-projections.md b/doc/python/tsne-umap-projections.md
new file mode 100644
index 00000000000..a2a2aa78604
--- /dev/null
+++ b/doc/python/tsne-umap-projections.md
@@ -0,0 +1,149 @@
+---
+jupyter:
+  jupytext:
+    notebook_metadata_filter: all
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.1'
+      jupytext_version: 1.1.1
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+  language_info:
+    codemirror_mode:
+      name: ipython
+      version: 3
+    file_extension: .py
+    mimetype: text/x-python
+    name: python
+    nbconvert_exporter: python
+    pygments_lexer: ipython3
+    version: 3.7.6
+  plotly:
+    description: Visualize scikit-learn's k-Nearest Neighbors (kNN) classification
+      in Python with Plotly.
+    display_as: ai_ml
+    language: python
+    layout: base
+    name: t-SNE and UMAP projections
+    order: 1
+    page_type: example_index
+    permalink: python/t-sne-and-umap-projections/
+    thumbnail: thumbnail/tsne-umap-projections.png
+---
+
+## Basic t-SNE projections
+
+
+### Visualizing high-dimensional data with `px.scatter_matrix`
+
+```python
+import plotly.express as px
+
+df = px.data.iris()
+features = ["sepal_width", "sepal_length", "petal_width", "petal_length"]
+fig = px.scatter_matrix(df, dimensions=features, color="species")
+fig.show()
+```
+
+### Project data into 2D with t-SNE and `px.scatter`
+
+```python
+from sklearn.manifold import TSNE
+import plotly.express as px
+
+df = px.data.iris()
+
+features = df.loc[:, :'petal_width']
+
+tsne = TSNE(n_components=2, random_state=0)
+projections = tsne.fit_transform(features)
+
+fig = px.scatter(
+    projections, x=0, y=1, 
+    color=df.species, labels={'color': 'species'}
+)
+fig.show()
+```
+
+### Project data into 3D with t-SNE and `px.scatter_3d`
+
+```python
+from sklearn.manifold import TSNE
+import plotly.express as px
+
+df = px.data.iris()
+
+features = df.loc[:, :'petal_width']
+
+tsne = TSNE(n_components=3, random_state=0)
+projections = tsne.fit_transform(features, )
+
+fig = px.scatter_3d(
+    projections, x=0, y=1, z=2, 
+    color=df.species, labels={'color': 'species'}
+)
+fig.update_traces(marker_size=8)
+fig.show()
+```
+
+## Projections with UMAP
+
+Just like t-SNE, [UMAP](https://umap-learn.readthedocs.io/en/latest/index.html) is a dimensionality reduction specifically designed for visualizing complex data in low dimensions (2D or 3D). As the number of data points increase, [UMAP becomes more time efficient](https://umap-learn.readthedocs.io/en/latest/benchmarking.html) compared to TSNE.
+
+In the example below, we see how easy it is to use UMAP as a drop-in replacement for scikit-learn's `manifold.TSNE`.
+
+```python
+from umap import UMAP
+import plotly.express as px
+
+df = px.data.iris()
+
+features = df.loc[:, :'petal_width']
+
+umap_2d = UMAP(n_components=2, init='random', random_state=0)
+umap_3d = UMAP(n_components=3, init='random', random_state=0)
+
+proj_2d = umap_2d.fit_transform(features)
+proj_3d = umap_3d.fit_transform(features)
+
+fig_2d = px.scatter(
+    proj_2d, x=0, y=1,
+    color=df.species, labels={'color': 'species'}
+)
+fig_3d = px.scatter_3d(
+    proj_3d, x=0, y=1, z=2, 
+    color=df.species, labels={'color': 'species'}
+)
+fig_3d.update_traces(marker_size=5)
+
+fig_2d.show()
+fig_3d.show()
+```
+
+## Visualizing image datasets
+
+In the following example, we show how to visualize large image datasets using UMAP. Here, we use [`load_digits`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html), a subset of the famous MNIST dataset that was downsized to 8x8 and flattened to 64 dimensions.
+
+```python
+import plotly.express as px
+from sklearn.datasets import load_digits
+from umap import UMAP
+
+digits = load_digits()
+
+umap_2d = UMAP(random_state=0)
+umap_2d.fit(digits.data)
+
+projections = umap_2d.transform(digits.data)
+
+fig = px.scatter(
+    projections, x=0, y=1, 
+    color=digits.target.astype(str), labels={'color': 'digit'}
+)
+fig.show()
+```
+
+### Reference

From 802d1ef15a2cd0f4c3f3636a97a6659c52a39bf4 Mon Sep 17 00:00:00 2001
From: xhlulu <xinghan@plot.ly>
Date: Wed, 12 Aug 2020 13:38:36 -0400
Subject: [PATCH 25/35] ML Docs: More explanations for the KNN section

---
 doc/python/ml-knn.md | 73 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 66 insertions(+), 7 deletions(-)

diff --git a/doc/python/ml-knn.md b/doc/python/ml-knn.md
index fd5e87668e9..e22be45f43a 100644
--- a/doc/python/ml-knn.md
+++ b/doc/python/ml-knn.md
@@ -20,7 +20,7 @@ jupyter:
     name: python
     nbconvert_exporter: python
     pygments_lexer: ipython3
-    version: 3.7.6
+    version: 3.7.7
   plotly:
     description: Visualize scikit-learn's k-Nearest Neighbors (kNN) classification
       in Python with Plotly.
@@ -36,13 +36,19 @@ jupyter:
 
 ## Basic binary classification with kNN
 
-This section gets us started with displaying basic binary classification using 2D data. We first show how to display training versus testing data using [various marker styles](https://plot.ly/python/marker-style/), then demonstrate how to evaluate a kNN classifier's performance on the **test split** using a continuous color gradient to indicate the model's predicted score.
+This section gets us started with displaying basic binary classification using 2D data. We first show how to display training versus testing data using [various marker styles](https://plot.ly/python/marker-style/), then demonstrate how to evaluate our classifier's performance on the **test split** using a continuous color gradient to indicate the model's predicted score.
+
+We will use [Scikit-learn](https://scikit-learn.org/) for training our model and for loading and splitting data. Scikit-learn is a popular Machine Learning (ML) library that offers various tools for creating and training ML algorithms, feature engineering, data cleaning, and evaluating and testing models. It was designed to be accessible, and to work seamlessly with popular libraries like NumPy and Pandas.
+
+We will train a [k-Nearest Neighbors (kNN)](https://scikit-learn.org/stable/modules/neighbors.html) classifier. First, the model records the label of each training sample. Then, whenever we give it a new sample, it will look at the `k` closest samples from the training set to find the most common label, and assign it to our new sample.
 
 
 ### Display training and test splits
 
 
-Here, we display all the negative labels as squares, and positive labels as circles. We differentiate the training and test set by adding a dot to the center of test data.
+Using Scikit-learn, we first generate synthetic data that form the shape of a moon. We then split it into a training and testing set. Finally, we display the ground truth labels using [a scatter plot](https://plotly.com/python/line-and-scatter/).
+
+In the graph, we display all the negative labels as squares, and positive labels as circles. We differentiate the training and test set by adding a dot to the center of test data.
 
 ```python
 import numpy as np
@@ -52,6 +58,7 @@ from sklearn.datasets import make_moons
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
 
+# Load and split data
 X, y = make_moons(noise=0.3, random_state=0)
 X_train, X_test, y_train, y_test = train_test_split(
     X, y.astype(str), test_size=0.25, random_state=0)
@@ -78,10 +85,12 @@ fig.update_traces(
 fig.show()
 ```
 
-### Visualize predictions on test split
+### Visualize predictions on test split with [`plotly.express`](https://plotly.com/python/plotly-express/)
+
 
+Now, we train the kNN model on the same training data displayed in the previous graph. Then, we predict the confidence score of the model for each of the data points in the test set. We will use shapes to denote the true labels, and the color will indicate the confidence of the model for assign that score.
 
-Now, we evaluate the model only on the test set. Notice that `px.scatter` only require 1 function call to plot both negative and positive labels, and can additionally set a continuous color scale based on the `y_score` output by our kNN model.
+Notice that `px.scatter` only require 1 function call to plot both negative and positive labels, and can additionally set a continuous color scale based on the `y_score` output by our kNN model.
 
 ```python
 import numpy as np
@@ -114,6 +123,56 @@ fig.show()
 
 ## Probability Estimates with `go.Contour`
 
+Just like the previous example, we will first train our kNN model on the training set.
+
+Instead of predicting the conference for the test set, we can predict the confidence map for the entire area that wraps around the dimensions of our dataset. To do this, we use [`np.meshgrid`](https://numpy.org/doc/stable/reference/generated/numpy.meshgrid.html) to create a grid, where the distance between each point is denoted by the `mesh_size` variable. 
+
+Then, for each of those points, we will use our model to give a confidence score, and plot it with a [contour plot](https://plotly.com/python/contour-plots/).
+
+```python
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.datasets import make_moons
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+
+mesh_size = .02
+margin = 0.25
+
+# Load and split data
+X, y = make_moons(noise=0.3, random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y.astype(str), test_size=0.25, random_state=0)
+
+# Create a mesh grid on which we will run our model
+x_min, x_max = X[:, 0].min() - margin, X[:, 0].max() + margin
+y_min, y_max = X[:, 1].min() - margin, X[:, 1].max() + margin
+xrange = np.arange(x_min, x_max, mesh_size)
+yrange = np.arange(y_min, y_max, mesh_size)
+xx, yy = np.meshgrid(xrange, yrange)
+
+# Create classifier, run predictions on grid
+clf = KNeighborsClassifier(15, weights='uniform')
+clf.fit(X, y)
+Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
+Z = Z.reshape(xx.shape)
+
+
+# Plot the figure
+fig = go.Figure(data=[
+    go.Contour(
+        x=xrange, 
+        y=yrange, 
+        z=Z, 
+        colorscale='RdBu'
+    )    
+])
+fig.show()
+```
+
+Now, let's try to combine our `go.Contour` plot with the first scatter plot of our data points, so that we can visually compare the confidence of our model with the true labels.
+
 ```python
 import numpy as np
 import plotly.express as px
@@ -178,9 +237,9 @@ fig.add_trace(
 fig.show()
 ```
 
-## Multi-class prediction confidence with `go.Heatmap`
+## Multi-class prediction confidence with [`go.Heatmap`](https://plotly.com/python/heatmaps/)
 
-It is also possible to visualize the prediction confidence of the model using `go.Heatmap`. In this example, you can see how to compute how confident the model is about its prediction at every point in the 2D grid. Here, we define the confidence as the difference between the highest score and the score of the other classes summed, at a certain point.
+It is also possible to visualize the prediction confidence of the model using [heatmaps](https://plotly.com/python/heatmaps/). In this example, you can see how to compute how confident the model is about its prediction at every point in the 2D grid. Here, we define the confidence as the difference between the highest score and the score of the other classes summed, at a certain point.
 
 ```python
 import numpy as np

From 5cda611bfe6ca3757a0864594c1f0f6afb533591 Mon Sep 17 00:00:00 2001
From: xhlulu <xinghan@plot.ly>
Date: Wed, 12 Aug 2020 16:04:41 -0400
Subject: [PATCH 26/35] Rename Tsne tutorial

---
 .../{tsne-umap-projections.md => ml-tsne-umap-projections.md}   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename doc/python/{tsne-umap-projections.md => ml-tsne-umap-projections.md} (99%)

diff --git a/doc/python/tsne-umap-projections.md b/doc/python/ml-tsne-umap-projections.md
similarity index 99%
rename from doc/python/tsne-umap-projections.md
rename to doc/python/ml-tsne-umap-projections.md
index a2a2aa78604..f43f2e18d14 100644
--- a/doc/python/tsne-umap-projections.md
+++ b/doc/python/ml-tsne-umap-projections.md
@@ -20,7 +20,7 @@ jupyter:
     name: python
     nbconvert_exporter: python
     pygments_lexer: ipython3
-    version: 3.7.6
+    version: 3.7.7
   plotly:
     description: Visualize scikit-learn's k-Nearest Neighbors (kNN) classification
       in Python with Plotly.

From 46d93de46bf8163dc4c4e5d201c2787909338a3f Mon Sep 17 00:00:00 2001
From: xhlulu <xinghan@plot.ly>
Date: Wed, 12 Aug 2020 16:04:59 -0400
Subject: [PATCH 27/35] Update kNN page

---
 doc/python/ml-knn.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/python/ml-knn.md b/doc/python/ml-knn.md
index e22be45f43a..6c86de79240 100644
--- a/doc/python/ml-knn.md
+++ b/doc/python/ml-knn.md
@@ -45,7 +45,6 @@ We will train a [k-Nearest Neighbors (kNN)](https://scikit-learn.org/stable/modu
 
 ### Display training and test splits
 
-
 Using Scikit-learn, we first generate synthetic data that form the shape of a moon. We then split it into a training and testing set. Finally, we display the ground truth labels using [a scatter plot](https://plotly.com/python/line-and-scatter/).
 
 In the graph, we display all the negative labels as squares, and positive labels as circles. We differentiate the training and test set by adding a dot to the center of test data.

From 38ef59d28d9300b4545eef7931c8363e926423fa Mon Sep 17 00:00:00 2001
From: xhlulu <xinghan@plot.ly>
Date: Wed, 12 Aug 2020 16:05:07 -0400
Subject: [PATCH 28/35] ML Docs: Update PCA page

---
 doc/python/ml-pca.md | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/doc/python/ml-pca.md b/doc/python/ml-pca.md
index 41cab7a64ff..5395fd3dc04 100644
--- a/doc/python/ml-pca.md
+++ b/doc/python/ml-pca.md
@@ -20,7 +20,7 @@ jupyter:
     name: python
     nbconvert_exporter: python
     pygments_lexer: ipython3
-    version: 3.7.6
+    version: 3.7.7
   plotly:
     description: Visualize Principle Component Analysis (PCA) of your high-dimensional
       data in Python with Plotly.
@@ -34,12 +34,21 @@ jupyter:
     thumbnail: thumbnail/ml-pca.png
 ---
 
+This page first shows how to visualize higher dimension data using various Plotly figures combined with dimensionality reduction (aka projection). Then, we dive into the specific details of our projection algorithm.
+
+We will use [Scikit-learn](https://scikit-learn.org/) to load one of the datasets, and apply dimensionality reduction. Scikit-learn is a popular Machine Learning (ML) library that offers various tools for creating and training ML algorithms, feature engineering, data cleaning, and evaluating and testing models. It was designed to be accessible, and to work seamlessly with popular libraries like NumPy and Pandas.
+
+
 ## High-dimensional PCA Analysis with  `px.scatter_matrix`
 
+The dimensionality reduction technique we will be using is called the [Principal Component Analysis (PCA)](https://scikit-learn.org/stable/modules/decomposition.html#pca). It is a powerful technique that arises from linear algebra and probability theory. In essense, it computes a matrix that represents the variation of your data ([covariance matrix/eigenvectors][covmatrix]), and rank them by their relevance (explained variance/eigenvalues). For a video tutorial, see [this segment on PCA](https://youtu.be/rng04VJxUt4?t=98) from the Coursera ML course.
+
+[covmatrix]: https://stats.stackexchange.com/questions/2691/making-sense-of-principal-component-analysis-eigenvectors-eigenvalues#:~:text=As%20it%20is%20a%20square%20symmetric%20matrix%2C%20it%20can%20be%20diagonalized%20by%20choosing%20a%20new%20orthogonal%20coordinate%20system%2C%20given%20by%20its%20eigenvectors%20(incidentally%2C%20this%20is%20called%20spectral%20theorem)%3B%20corresponding%20eigenvalues%20will%20then%20be%20located%20on%20the%20diagonal.%20In%20this%20new%20coordinate%20system%2C%20the%20covariance%20matrix%20is%20diagonal%20and%20looks%20like%20that%3A
+
 
 ### Visualize all the original dimensions
 
-First, let's plot all the features and see how the `species` in the Iris dataset are grouped. In a [splom](https://plot.ly/python/splom/), each subplot displays a feature against another, so if we have $N$ features we have a $N \times N$ matrix.
+First, let's plot all the features and see how the `species` in the Iris dataset are grouped. In a [Scatter Plot Matrix (splom)](https://plot.ly/python/splom/), each subplot displays a feature against another, so if we have $N$ features we have a $N \times N$ matrix.
 
 In our example, we are plotting all 4 features from the Iris dataset, thus we can see how `sepal_width` is compared against `sepal_length`, then against `petal_width`, and so forth. Keep in mind how some pairs of features can more easily separate different species.
 
@@ -169,6 +178,8 @@ fig.show()
 
 Often, you might be interested in seeing how much variance PCA is able to explain as you increase the number of components, in order to decide how many dimensions to ultimately keep or analyze. This example shows you how to quickly plot the cumulative sum of explained variance for a high-dimensional dataset like [Diabetes](https://scikit-learn.org/stable/datasets/index.html#diabetes-dataset).
 
+With a higher explained variance, you are able to capture more variability in your dataset, which could potentially lead to better performance when training your model. For a more mathematical explanation, see this [Q&A thread](https://stats.stackexchange.com/questions/22569/pca-and-proportion-of-variance-explained).
+
 ```python
 import numpy as np
 import pandas as pd
@@ -198,6 +209,8 @@ $$
 loadings = eigenvectors \cdot \sqrt{eigenvalues}
 $$
 
+For more details about the linear algebra behind eigenvectors and loadings, see this [Q&A thread](https://stats.stackexchange.com/questions/143905/loadings-vs-eigenvectors-in-pca-when-to-use-one-or-another).
+
 ```python
 import plotly.express as px
 from sklearn.decomposition import PCA
@@ -244,3 +257,5 @@ The following resources offer an in-depth overview of PCA and explained variance
 * https://en.wikipedia.org/wiki/Explained_variation
 * https://scikit-learn.org/stable/modules/decomposition.html#pca
 * https://stats.stackexchange.com/questions/2691/making-sense-of-principal-component-analysis-eigenvectors-eigenvalues/140579#140579
+* https://stats.stackexchange.com/questions/143905/loadings-vs-eigenvectors-in-pca-when-to-use-one-or-another
+* https://stats.stackexchange.com/questions/22569/pca-and-proportion-of-variance-explained

From a954d0d3631374b1c1ad520c577221af0a526111 Mon Sep 17 00:00:00 2001
From: xhlulu <xinghan@plot.ly>
Date: Wed, 12 Aug 2020 16:05:13 -0400
Subject: [PATCH 29/35] ML Docs: Update regression page

---
 doc/python/ml-regression.md | 38 +++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/doc/python/ml-regression.md b/doc/python/ml-regression.md
index d1945f742b3..2dd99e0b067 100644
--- a/doc/python/ml-regression.md
+++ b/doc/python/ml-regression.md
@@ -20,7 +20,7 @@ jupyter:
     name: python
     nbconvert_exporter: python
     pygments_lexer: ipython3
-    version: 3.7.6
+    version: 3.7.7
   plotly:
     description: Visualize regression in scikit-learn with Plotly.
     display_as: ai_ml
@@ -33,14 +33,29 @@ jupyter:
     thumbnail: thumbnail/ml-regression.png
 ---
 
+<!-- #region -->
+This page shows how to use Plotly charts for displaying various types of regression models, starting from simple models like [Linear Regression](https://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html), and progressively move towards models like [Decision Tree][tree] and [Polynomial Features][poly]. We highlight various capabilities of plotly, such as comparative analysis of the same model with different parameters, displaying Latex, [surface plots](https://plotly.com/python/3d-surface-plots/) for 3D data, and enhanced prediction error analysis with [Plotly Express](https://plotly.com/python/plotly-express/).
+
+We will use [Scikit-learn](https://scikit-learn.org/) to split and preprocess our data and train various regression models. Scikit-learn is a popular Machine Learning (ML) library that offers various tools for creating and training ML algorithms, feature engineering, data cleaning, and evaluating and testing models. It was designed to be accessible, and to work seamlessly with popular libraries like NumPy and Pandas.
+
+
+[lasso]: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoCV.html
+[tree]: https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html
+[poly]: https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html
+<!-- #endregion -->
+
 ## Basic linear regression plots
 
+In this section, we show you how to apply a simple regression model for predicting tips a server will receive based on various client attributes (such as sex, time of the week, and whether they are a smoker).
 
-### Ordinary Least Square (OLS) with `plotly.express`
+We will be using the [Linear Regression][lr], which is a simple model that fit an intercept (the mean tip received by a server), and add a slope for each feature we use, such as the value of the total bill. We show you how to do that with both Plotly Express and Scikit-learn.
 
+### Ordinary Least Square (OLS) with `plotly.express`
 
 This example shows how to use `plotly.express`'s `trendline` parameter to train a simply Ordinary Least Square (OLS) for predicting the tips waiters will receive based on the value of the total bill.
 
+[lr]: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
+
 ```python
 import plotly.express as px
 
@@ -78,7 +93,7 @@ fig.show()
 
 ## Model generalization on unseen data
 
-Easily color your plot based on a predefined data split.
+With `go.Scatter`, you can easily color your plot based on a predefined data split. By coloring the training and the testing data points with different colors, you can easily see if whether the model generalizes well to the test data or not.
 
 ```python
 import numpy as np
@@ -108,7 +123,11 @@ fig.show()
 
 ## Comparing different kNN models parameters
 
-Compare the performance of two different models on the same dataset. This can be easily combined with discrete color legends from `px`, such as coloring by the assigned `sex`.
+In addition to linear regression, it's possible to fit the same data using [k-Nearest Neighbors][knn]. When you perform a prediction on a new sample, this model either takes the weighted or un-weighted average of the neighbors. In order to see the difference between those two averaging options, we train a kNN model with both of those parameters, and we plot them in the same way as the previous graph.
+
+Notice how we can combine scatter points with lines using Plotly.py. You can learn more about [multiple chart types](https://plotly.com/python/graphing-multiple-chart-types/).
+
+[knn]: https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html
 
 ```python
 import numpy as np
@@ -136,9 +155,14 @@ fig.add_traces(go.Scatter(x=x_range, y=y_dist, name='Weights: Distance'))
 fig.show()
 ```
 
+<!-- #region -->
 ## Displaying `PolynomialFeatures` using $\LaTeX$
 
-It's easy to diplay latex equations in legend and titles by simply adding `$` before and after your equation.
+Notice how linear regression fits a straight line, but kNN can take non-linear shapes. Moreover, it is possible to extend linear regression to polynomial regression by using scikit-learn's `PolynomialFeatures`, which lets you fit a slope for your features raised to the power of `n`, where `n=1,2,3,4` in our example.
+
+
+With Plotly, it's easy to diplay latex equations in legend and titles by simply adding `$` before and after your equation. This way, you can see the coefficients that our polynomial regression fitted.
+<!-- #endregion -->
 
 ```python
 import numpy as np
@@ -220,7 +244,9 @@ fig.show()
 
 ## Visualizing coefficients for multiple linear regression (MLR)
 
-When you are fitting a linear regression, you want to often know what feature matters the most in your regression's output.
+Visualizing regression with one or two variables is straightforward, since we can respectively plot them with scatter plots and 3D scatter plots. Moreover, if you have more than 2 features, you will need to find alternative ways to visualize your data.
+
+One way is to use [bar charts](https://plotly.com/python/bar-charts/). In our example, each bar indicates the coefficients of our linear regression model for each input feature. Our model was trained on the [Iris dataset](https://archive.ics.uci.edu/ml/datasets/iris).
 
 ```python
 import pandas as pd

From 2152601d7cb79dedd39417b0e7c62c590aa0d794 Mon Sep 17 00:00:00 2001
From: xhlulu <xinghan@plot.ly>
Date: Wed, 12 Aug 2020 16:05:21 -0400
Subject: [PATCH 30/35] ML Docs: Update ROC/PR Page

---
 doc/python/ml-roc-pr.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/python/ml-roc-pr.md b/doc/python/ml-roc-pr.md
index ad30cd951b3..21abf071880 100644
--- a/doc/python/ml-roc-pr.md
+++ b/doc/python/ml-roc-pr.md
@@ -20,7 +20,7 @@ jupyter:
     name: python
     nbconvert_exporter: python
     pygments_lexer: ipython3
-    version: 3.7.6
+    version: 3.7.7
   plotly:
     description: Interpret the results of your classification using Receiver Operating
       Characteristics (ROC) and Precision-Recall (PR) Curves in Python with Plotly.

From 209dfea292d35cb09a9bcb89d7756426c181733e Mon Sep 17 00:00:00 2001
From: xhlulu <xinghan@plot.ly>
Date: Wed, 12 Aug 2020 19:38:37 -0400
Subject: [PATCH 31/35] ML Docs: Update T-sne and UMAP section

---
 doc/python/ml-tsne-umap-projections.md | 35 ++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/doc/python/ml-tsne-umap-projections.md b/doc/python/ml-tsne-umap-projections.md
index f43f2e18d14..db0ef55b5d0 100644
--- a/doc/python/ml-tsne-umap-projections.md
+++ b/doc/python/ml-tsne-umap-projections.md
@@ -34,11 +34,22 @@ jupyter:
     thumbnail: thumbnail/tsne-umap-projections.png
 ---
 
+This page presents various ways to visualize two popular dimensionality reduction techniques, namely the [t-distributed stochastic neighbor embedding](https://lvdmaaten.github.io/tsne/) (t-SNE) and [Uniform Manifold Approximation and Projection](https://umap-learn.readthedocs.io/en/latest/index.html) (UMAP). They are needed whenever you want to visualize data with more than two or three features (i.e. dimensions). 
+
+We first show how to visualize data with more than three features using the [scatter plot matrix](https://medium.com/plotly/what-is-a-splom-chart-make-scatterplot-matrices-in-python-8dc4998921c3), then we apply dimensionality reduction techniques to get 2D/3D representation of our data, and visualize the results with [scatter plots](https://plotly.com/python/line-and-scatter/) and [3D scatter plots](https://plotly.com/python/3d-scatter-plots/). 
+
+
 ## Basic t-SNE projections
 
+t-SNE is a popular dimensionality reduction algorithm that arises from probability theory. Simply put, it projects the high-dimensional data points (sometimes with hundreds of features) into 2D/3D by inducing the projected data to have a similar distribution as the original data points by minimizing something called the [KL divergence](https://towardsdatascience.com/light-on-math-machine-learning-intuitive-guide-to-understanding-kl-divergence-2b382ca2b2a8). 
+
+Compared to a method like Principal Component Analysis (PCA), it takes signficantly more time to converge, but present signficiantly better insights when visualized. For example, by projecting features of a flowers, it will be able to distinctly group 
+
 
 ### Visualizing high-dimensional data with `px.scatter_matrix`
 
+First, let's try to visualize every feature of the [Iris dataset](https://archive.ics.uci.edu/ml/datasets/iris), and color everything by the species. We will use the Scatter Plot Matrix ([splom](https://plotly.com/python/splom/)), which lets us plot each feature against everything else, which is convenient when your dataset has more than 3 dimensions.
+
 ```python
 import plotly.express as px
 
@@ -50,6 +61,8 @@ fig.show()
 
 ### Project data into 2D with t-SNE and `px.scatter`
 
+Now, let's use the t-SNE algorithm to project the data shown above into two dimensions. Notice how each of the species is physically separate from each other.
+
 ```python
 from sklearn.manifold import TSNE
 import plotly.express as px
@@ -70,6 +83,8 @@ fig.show()
 
 ### Project data into 3D with t-SNE and `px.scatter_3d`
 
+t-SNE can reduce your data to any number of dimensions you want! Here, we show you how to project it to 3D and visualize with a 3D scatter plot.
+
 ```python
 from sklearn.manifold import TSNE
 import plotly.express as px
@@ -125,7 +140,9 @@ fig_3d.show()
 
 ## Visualizing image datasets
 
-In the following example, we show how to visualize large image datasets using UMAP. Here, we use [`load_digits`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html), a subset of the famous MNIST dataset that was downsized to 8x8 and flattened to 64 dimensions.
+In the following example, we show how to visualize large image datasets using UMAP. Here, we use [`load_digits`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html), a subset of the famous [MNIST dataset](http://yann.lecun.com/exdb/mnist/) that was downsized to 8x8 and flattened to 64 dimensions.
+
+Although there's over 1000 data points, and many more dimensions than the previous example, it is still extremely fast. This is because UMAP is optimized for speed, both from a theoretical perspective, and in the way it is implemented. Learn more in [this comparison post](https://umap-learn.readthedocs.io/en/latest/benchmarking.html).
 
 ```python
 import plotly.express as px
@@ -146,4 +163,18 @@ fig = px.scatter(
 fig.show()
 ```
 
-### Reference
+<!-- #region -->
+## Reference
+
+Plotly figures:
+* https://plotly.com/python/line-and-scatter/
+* https://plotly.com/python/3d-scatter-plots/
+* https://plotly.com/python/splom/
+
+
+Details about algorithms:
+* UMAP library: https://umap-learn.readthedocs.io/en/latest/
+* t-SNE User guide: https://scikit-learn.org/stable/modules/manifold.html#t-sne
+* t-SNE paper: https://www.jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf
+* MNIST: http://yann.lecun.com/exdb/mnist/
+<!-- #endregion -->

From c419b09284b6f4aa6bbbc982ded08e955c59d742 Mon Sep 17 00:00:00 2001
From: xhlulu <xinghan@plot.ly>
Date: Thu, 13 Aug 2020 11:23:35 -0400
Subject: [PATCH 32/35] Add umap to requirements

---
 doc/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/requirements.txt b/doc/requirements.txt
index 8b46ed96701..63860785d93 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -26,3 +26,4 @@ python-frontmatter
 datashader
 pyarrow 
 cufflinks==0.17.3
+umap-learn
\ No newline at end of file

From caebf4951b900006a7499e0272ec1b32317d6028 Mon Sep 17 00:00:00 2001
From: Nicolas Kruchten <nicolas@plot.ly>
Date: Tue, 18 Aug 2020 13:35:17 -0400
Subject: [PATCH 33/35] fixups

---
 doc/python/ml-knn.md                   | 70 +++++++++----------
 doc/python/ml-pca.md                   | 18 +++--
 doc/python/ml-regression.md            | 36 +++++-----
 doc/python/ml-roc-pr.md                | 93 +++++++++++++-------------
 doc/python/ml-tsne-umap-projections.md | 23 +++----
 5 files changed, 124 insertions(+), 116 deletions(-)

diff --git a/doc/python/ml-knn.md b/doc/python/ml-knn.md
index 6c86de79240..6d823cf49b1 100644
--- a/doc/python/ml-knn.md
+++ b/doc/python/ml-knn.md
@@ -5,8 +5,8 @@ jupyter:
     text_representation:
       extension: .md
       format_name: markdown
-      format_version: '1.1'
-      jupytext_version: 1.1.1
+      format_version: '1.2'
+      jupytext_version: 1.4.2
   kernelspec:
     display_name: Python 3
     language: python
@@ -28,8 +28,8 @@ jupyter:
     language: python
     layout: base
     name: kNN Classification
-    order: 1
-    page_type: example_index
+    order: 2
+    page_type: u-guide
     permalink: python/knn-classification/
     thumbnail: thumbnail/knn-classification.png
 ---
@@ -49,10 +49,11 @@ Using Scikit-learn, we first generate synthetic data that form the shape of a mo
 
 In the graph, we display all the negative labels as squares, and positive labels as circles. We differentiate the training and test set by adding a dot to the center of test data.
 
+In this example, we will use [graph objects](/python/graph-objects/), Plotly's low-level API for building figures.
+
 ```python
-import numpy as np
-import plotly.express as px
 import plotly.graph_objects as go
+import numpy as np
 from sklearn.datasets import make_moons
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
@@ -72,13 +73,13 @@ trace_specs = [
 fig = go.Figure(data=[
     go.Scatter(
         x=X[y==label, 0], y=X[y==label, 1],
-        name=f'{split} Split, Label {label}', 
+        name=f'{split} Split, Label {label}',
         mode='markers', marker_symbol=marker
     )
     for X, y, label, split, marker in trace_specs
 ])
 fig.update_traces(
-    marker_size=12, marker_line_width=1.5, 
+    marker_size=12, marker_line_width=1.5,
     marker_color="lightyellow"
 )
 fig.show()
@@ -89,12 +90,11 @@ fig.show()
 
 Now, we train the kNN model on the same training data displayed in the previous graph. Then, we predict the confidence score of the model for each of the data points in the test set. We will use shapes to denote the true labels, and the color will indicate the confidence of the model for assign that score.
 
-Notice that `px.scatter` only require 1 function call to plot both negative and positive labels, and can additionally set a continuous color scale based on the `y_score` output by our kNN model.
+In this example, we will use [Plotly Express](/python/plotly-express/), Plotly's high-level API for building figures. Notice that `px.scatter` only require 1 function call to plot both negative and positive labels, and can additionally set a continuous color scale based on the `y_score` output by our kNN model.
 
 ```python
-import numpy as np
 import plotly.express as px
-import plotly.graph_objects as go
+import numpy as np
 from sklearn.datasets import make_moons
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
@@ -110,7 +110,7 @@ clf.fit(X_train, y_train)
 y_score = clf.predict_proba(X_test)[:, 1]
 
 fig = px.scatter(
-    X_test, x=0, y=1, 
+    X_test, x=0, y=1,
     color=y_score, color_continuous_scale='RdBu',
     symbol=y_test, symbol_map={'0': 'square-dot', '1': 'circle-dot'},
     labels={'symbol': 'label', 'color': 'score of <br>first class'}
@@ -124,14 +124,15 @@ fig.show()
 
 Just like the previous example, we will first train our kNN model on the training set.
 
-Instead of predicting the conference for the test set, we can predict the confidence map for the entire area that wraps around the dimensions of our dataset. To do this, we use [`np.meshgrid`](https://numpy.org/doc/stable/reference/generated/numpy.meshgrid.html) to create a grid, where the distance between each point is denoted by the `mesh_size` variable. 
+Instead of predicting the conference for the test set, we can predict the confidence map for the entire area that wraps around the dimensions of our dataset. To do this, we use [`np.meshgrid`](https://numpy.org/doc/stable/reference/generated/numpy.meshgrid.html) to create a grid, where the distance between each point is denoted by the `mesh_size` variable.
 
 Then, for each of those points, we will use our model to give a confidence score, and plot it with a [contour plot](https://plotly.com/python/contour-plots/).
 
+In this example, we will use [graph objects](/python/graph-objects/), Plotly's low-level API for building figures.
+
 ```python
-import numpy as np
-import plotly.express as px
 import plotly.graph_objects as go
+import numpy as np
 from sklearn.datasets import make_moons
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
@@ -161,11 +162,11 @@ Z = Z.reshape(xx.shape)
 # Plot the figure
 fig = go.Figure(data=[
     go.Contour(
-        x=xrange, 
-        y=yrange, 
-        z=Z, 
+        x=xrange,
+        y=yrange,
+        z=Z,
         colorscale='RdBu'
-    )    
+    )
 ])
 fig.show()
 ```
@@ -173,9 +174,8 @@ fig.show()
 Now, let's try to combine our `go.Contour` plot with the first scatter plot of our data points, so that we can visually compare the confidence of our model with the true labels.
 
 ```python
-import numpy as np
-import plotly.express as px
 import plotly.graph_objects as go
+import numpy as np
 from sklearn.datasets import make_moons
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
@@ -211,21 +211,21 @@ trace_specs = [
 fig = go.Figure(data=[
     go.Scatter(
         x=X[y==label, 0], y=X[y==label, 1],
-        name=f'{split} Split, Label {label}', 
+        name=f'{split} Split, Label {label}',
         mode='markers', marker_symbol=marker
     )
     for X, y, label, split, marker in trace_specs
 ])
 fig.update_traces(
-    marker_size=12, marker_line_width=1.5, 
+    marker_size=12, marker_line_width=1.5,
     marker_color="lightyellow"
 )
 
 fig.add_trace(
     go.Contour(
-        x=xrange, 
-        y=yrange, 
-        z=Z, 
+        x=xrange,
+        y=yrange,
+        z=Z,
         showscale=False,
         colorscale='RdBu',
         opacity=0.4,
@@ -240,10 +240,12 @@ fig.show()
 
 It is also possible to visualize the prediction confidence of the model using [heatmaps](https://plotly.com/python/heatmaps/). In this example, you can see how to compute how confident the model is about its prediction at every point in the 2D grid. Here, we define the confidence as the difference between the highest score and the score of the other classes summed, at a certain point.
 
+In this example, we will use [Plotly Express](/python/plotly-express/), Plotly's high-level API for building figures.
+
 ```python
-import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
+import numpy as np
 from sklearn.neighbors import KNeighborsClassifier
 
 mesh_size = .02
@@ -275,21 +277,21 @@ diff = proba.max(axis=-1) - (proba.sum(axis=-1) - proba.max(axis=-1))
 
 fig = px.scatter(
     df_test, x='sepal_length', y='sepal_width',
-    symbol='species', 
+    symbol='species',
     symbol_map={
-        'setosa': 'square-dot', 
-        'versicolor': 'circle-dot', 
+        'setosa': 'square-dot',
+        'versicolor': 'circle-dot',
         'virginica': 'diamond-dot'},
 )
 fig.update_traces(
-    marker_size=12, marker_line_width=1.5, 
+    marker_size=12, marker_line_width=1.5,
     marker_color="lightyellow"
 )
 fig.add_trace(
     go.Heatmap(
-        x=lrange, 
-        y=wrange, 
-        z=diff, 
+        x=lrange,
+        y=wrange,
+        z=diff,
         opacity=0.25,
         customdata=proba,
         colorscale='RdBu',
diff --git a/doc/python/ml-pca.md b/doc/python/ml-pca.md
index 5395fd3dc04..3737a2c5e3a 100644
--- a/doc/python/ml-pca.md
+++ b/doc/python/ml-pca.md
@@ -29,7 +29,7 @@ jupyter:
     layout: base
     name: PCA Visualization
     order: 4
-    page_type: example_index
+    page_type: u-guide
     permalink: python/pca-visualization/
     thumbnail: thumbnail/ml-pca.png
 ---
@@ -52,6 +52,8 @@ First, let's plot all the features and see how the `species` in the Iris dataset
 
 In our example, we are plotting all 4 features from the Iris dataset, thus we can see how `sepal_width` is compared against `sepal_length`, then against `petal_width`, and so forth. Keep in mind how some pairs of features can more easily separate different species.
 
+In this example, we will use [Plotly Express](/python/plotly-express/), Plotly's high-level API for building figures.
+
 ```python
 import plotly.express as px
 
@@ -69,10 +71,12 @@ fig.show()
 
 ###  Visualize all the principal components
 
-Now, we apply `PCA` the same dataset, and retrieve **all** the components. We use the same `px.scatter_matrix` trace to display our results, but this time our features are the resulting *principal components*, ordered by how much variance they are able to explain. 
+Now, we apply `PCA` the same dataset, and retrieve **all** the components. We use the same `px.scatter_matrix` trace to display our results, but this time our features are the resulting *principal components*, ordered by how much variance they are able to explain.
 
 The importance of explained variance is demonstrated in the example below. The subplot between PC3 and PC4 is clearly unable to separate each class, whereas the subplot between PC1 and PC2 shows a clear separation between each species.
 
+In this example, we will use [Plotly Express](/python/plotly-express/), Plotly's high-level API for building figures.
+
 ```python
 import plotly.express as px
 from sklearn.decomposition import PCA
@@ -83,7 +87,7 @@ features = ["sepal_width", "sepal_length", "petal_width", "petal_length"]
 pca = PCA()
 components = pca.fit_transform(df[features])
 labels = {
-    str(i): f"PC {i+1} ({var:.1f}%)" 
+    str(i): f"PC {i+1} ({var:.1f}%)"
     for i, var in enumerate(pca.explained_variance_ratio_ * 100)
 }
 
@@ -122,7 +126,7 @@ labels = {str(i): f"PC {i+1}" for i in range(n_components)}
 labels['color'] = 'Median Price'
 
 fig = px.scatter_matrix(
-    components, 
+    components,
     color=boston.target,
     dimensions=range(n_components),
     labels=labels,
@@ -167,7 +171,7 @@ components = pca.fit_transform(X)
 total_var = pca.explained_variance_ratio_.sum() * 100
 
 fig = px.scatter_3d(
-    components, x=0, y=1, z=2, color=df['species'], 
+    components, x=0, y=1, z=2, color=df['species'],
     title=f'Total Explained Variance: {total_var:.2f}%',
     labels={'0': 'PC 1', '1': 'PC 2', '2': 'PC 3'}
 )
@@ -181,9 +185,9 @@ Often, you might be interested in seeing how much variance PCA is able to explai
 With a higher explained variance, you are able to capture more variability in your dataset, which could potentially lead to better performance when training your model. For a more mathematical explanation, see this [Q&A thread](https://stats.stackexchange.com/questions/22569/pca-and-proportion-of-variance-explained).
 
 ```python
+import plotly.express as px
 import numpy as np
 import pandas as pd
-import plotly.express as px
 from sklearn.decomposition import PCA
 from sklearn.datasets import load_diabetes
 
@@ -196,7 +200,7 @@ exp_var_cumul = np.cumsum(pca.explained_variance_ratio_)
 
 px.area(
     x=range(1, exp_var_cumul.shape[0] + 1),
-    y=exp_var_cumul, 
+    y=exp_var_cumul,
     labels={"x": "# Components", "y": "Explained Variance"}
 )
 ```
diff --git a/doc/python/ml-regression.md b/doc/python/ml-regression.md
index 2dd99e0b067..42215c7ad9d 100644
--- a/doc/python/ml-regression.md
+++ b/doc/python/ml-regression.md
@@ -5,8 +5,8 @@ jupyter:
     text_representation:
       extension: .md
       format_name: markdown
-      format_version: '1.1'
-      jupytext_version: 1.1.1
+      format_version: '1.2'
+      jupytext_version: 1.4.2
   kernelspec:
     display_name: Python 3
     language: python
@@ -27,8 +27,8 @@ jupyter:
     language: python
     layout: base
     name: ML Regression
-    order: 2
-    page_type: example_index
+    order: 1
+    page_type: u-guide
     permalink: python/ml-regression/
     thumbnail: thumbnail/ml-regression.png
 ---
@@ -52,7 +52,7 @@ We will be using the [Linear Regression][lr], which is a simple model that fit a
 
 ### Ordinary Least Square (OLS) with `plotly.express`
 
-This example shows how to use `plotly.express`'s `trendline` parameter to train a simply Ordinary Least Square (OLS) for predicting the tips waiters will receive based on the value of the total bill.
+This example shows [how to use `plotly.express`'s `trendline` parameter to train a simply Ordinary Least Square (OLS)](/python/linear-fits/) for predicting the tips waiters will receive based on the value of the total bill.
 
 [lr]: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
 
@@ -174,11 +174,11 @@ from sklearn.preprocessing import PolynomialFeatures
 def format_coefs(coefs):
     equation_list = [f"{coef}x^{i}" for i, coef in enumerate(coefs)]
     equation = "$" +  " + ".join(equation_list) + "$"
-    
+
     replace_map = {"x^0": "", "x^1": "x", '+ -': '- '}
     for old, new in replace_map.items():
         equation = equation.replace(old, new)
-        
+
     return equation
 
 df = px.data.tips()
@@ -195,7 +195,7 @@ for degree in [1, 2, 3, 4]:
     model = LinearRegression(fit_intercept=False)
     model.fit(X_poly, df.tip)
     y_poly = model.predict(x_range_poly)
-    
+
     equation = format_coefs(model.coef_.round(2))
     fig.add_traces(go.Scatter(x=x_range.squeeze(), y=y_poly, name=equation))
 
@@ -300,7 +300,7 @@ y_pred = model.predict(X)
 fig = px.scatter(x=y, y=y_pred, labels={'x': 'ground truth', 'y': 'prediction'})
 fig.add_shape(
     type="line", line=dict(dash='dash'),
-    x0=y.min(), y0=y.min(), 
+    x0=y.min(), y0=y.min(),
     x1=y.max(), y1=y.max()
 )
 fig.show()
@@ -341,7 +341,7 @@ fig = px.scatter(
 fig.update_traces(histnorm='probability', selector={'type':'histogram'})
 fig.add_shape(
     type="line", line=dict(dash='dash'),
-    x0=y.min(), y0=y.min(), 
+    x0=y.min(), y0=y.min(),
     x1=y.max(), y1=y.max()
 )
 
@@ -411,14 +411,14 @@ mean_alphas = model.mse_path_.mean(axis=-1)
 
 fig = go.Figure([
     go.Scatter(
-        x=model.alphas_, y=model.mse_path_[:, i], 
+        x=model.alphas_, y=model.mse_path_[:, i],
         name=f"Fold: {i+1}", opacity=.5, line=dict(dash='dash'),
         hovertemplate="alpha: %{x} <br>MSE: %{y}"
     )
     for i in range(N_FOLD)
 ])
 fig.add_traces(go.Scatter(
-    x=model.alphas_, y=mean_alphas, 
+    x=model.alphas_, y=mean_alphas,
     name='Mean', line=dict(color='black', width=3),
     hovertemplate="alpha: %{x} <br>MSE: %{y}",
 ))
@@ -431,8 +431,8 @@ fig.add_shape(
 )
 
 fig.update_layout(
-    xaxis_title='alpha', 
-    xaxis_type="log", 
+    xaxis_title='alpha',
+    xaxis_type="log",
     yaxis_title="Mean Square Error (MSE)"
 )
 fig.show()
@@ -462,14 +462,14 @@ y = df['petal_width']
 # Define and fit the grid
 model = DecisionTreeRegressor()
 param_grid = {
-    'criterion': ['mse', 'friedman_mse', 'mae'], 
+    'criterion': ['mse', 'friedman_mse', 'mae'],
     'max_depth': range(2, 5)
 }
 grid = GridSearchCV(model, param_grid, cv=N_FOLD)
 grid.fit(X, y)
 grid_df = pd.DataFrame(grid.cv_results_)
 
-# Convert the wide format of the grid into the long format 
+# Convert the wide format of the grid into the long format
 # accepted by plotly.express
 melted = (
     grid_df
@@ -491,7 +491,7 @@ melted['cv_split'] = (
 
 # Single function call to plot each figure
 fig_hmap = px.density_heatmap(
-    melted, x="max_depth", y='criterion', 
+    melted, x="max_depth", y='criterion',
     histfunc="sum", z="r_squared",
     title='Grid search results on individual fold',
     hover_data=['mean_fit_time'],
@@ -500,7 +500,7 @@ fig_hmap = px.density_heatmap(
 )
 
 fig_box = px.box(
-    melted, x='max_depth', y='r_squared', 
+    melted, x='max_depth', y='r_squared',
     title='Grid search results ',
     hover_data=['mean_fit_time'],
     points='all',
diff --git a/doc/python/ml-roc-pr.md b/doc/python/ml-roc-pr.md
index 21abf071880..eced1074109 100644
--- a/doc/python/ml-roc-pr.md
+++ b/doc/python/ml-roc-pr.md
@@ -5,8 +5,8 @@ jupyter:
     text_representation:
       extension: .md
       format_name: markdown
-      format_version: '1.1'
-      jupytext_version: 1.1.1
+      format_version: '1.2'
+      jupytext_version: 1.4.2
   kernelspec:
     display_name: Python 3
     language: python
@@ -29,7 +29,7 @@ jupyter:
     layout: base
     name: ROC and PR Curves
     order: 3
-    page_type: example_index
+    page_type: u-guide
     permalink: python/roc-and-pr-curves/
     thumbnail: thumbnail/ml-roc-pr.png
 ---
@@ -38,13 +38,13 @@ jupyter:
 
 Before diving into the receiver operating characteristic (ROC) curve, we will look at two plots that will give some context to the thresholds mechanism behind the ROC and PR curves.
 
-In the histogram, we observe that the score spread such that most of the positive labels are binned near 1, and a lot of the negative labels are close to 0. When we set a threshold on the score, all of the bins to its left will be classified as 0's, and everything to the right will be 1's. There are obviously a few outliers, such as **negative** samples that our model gave a high score, and *positive* samples with a low score. If we set a threshold right in the middle, those outliers will respectively become **false positives** and *false negatives*. 
+In the histogram, we observe that the score spread such that most of the positive labels are binned near 1, and a lot of the negative labels are close to 0. When we set a threshold on the score, all of the bins to its left will be classified as 0's, and everything to the right will be 1's. There are obviously a few outliers, such as **negative** samples that our model gave a high score, and *positive* samples with a low score. If we set a threshold right in the middle, those outliers will respectively become **false positives** and *false negatives*.
 
 As we adjust thresholds, the number of positive positives will increase or decrease, and at the same time the number of true positives will also change; this is shown in the second plot. As you can see, the model seems to perform fairly well, because the true positive rate decreases slowly, whereas the false positive rate decreases sharply as we increase the threshold. Those two lines each represent a dimension of the ROC curve.
 
 ```python
 import plotly.express as px
-import plotly.graph_objects as go
+import pandas as pd
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import roc_curve, auc
 from sklearn.datasets import make_classification
@@ -58,26 +58,28 @@ fpr, tpr, thresholds = roc_curve(y, y_score)
 
 # The histogram of scores compared to true labels
 fig_hist = px.histogram(
-    x=y_score, color=y, nbins=50, 
+    x=y_score, color=y, nbins=50,
     labels=dict(color='True Labels', x='Score')
 )
 
+fig_hist.show()
+
+
 # Evaluating model performance at various thresholds
-fig_thresh = go.Figure([
-    go.Scatter(x=thresholds, y=fpr, name='False Positive Rate'),
-    go.Scatter(x=thresholds, y=tpr, name='True Positive Rate')
-])
-fig_thresh.update_layout(
-    title='TPR and FPR at every threshold',
-    xaxis_title='Threshold',
-    yaxis_title='Rate',
-    yaxis=dict(scaleanchor="x", scaleratio=1),
-    xaxis=dict(constrain='domain')
+df = pd.DataFrame({
+    'False Positive Rate': fpr,
+    'True Positive Rate': tpr
+}, index=thresholds)
+df.index.name = "Thresholds"
+df.columns.name = "Rate"
+
+fig_thresh = px.line(
+    df, title='TPR and FPR at every threshold',
+    width=700, height=500
 )
-fig_thresh.update_xaxes(range=[0, 1])
 
-# Display plots
-fig_hist.show()
+fig_thresh.update_yaxes(scaleanchor="x", scaleratio=1)
+fig_thresh.update_xaxes(range=[0, 1], constrain='domain')
 fig_thresh.show()
 ```
 
@@ -102,18 +104,18 @@ y_score = model.predict_proba(X)[:, 1]
 fpr, tpr, thresholds = roc_curve(y, y_score)
 
 fig = px.area(
-    x=fpr, y=tpr, 
+    x=fpr, y=tpr,
     title=f'ROC Curve (AUC={auc(fpr, tpr):.4f})',
-    labels=dict(x='False Positive Rate', y='True Positive Rate')
+    labels=dict(x='False Positive Rate', y='True Positive Rate'),
+    width=700, height=500
 )
 fig.add_shape(
-    type='line', line=dict(dash='dash'), 
+    type='line', line=dict(dash='dash'),
     x0=0, x1=1, y0=0, y1=1
 )
-fig.update_layout(
-    yaxis=dict(scaleanchor="x", scaleratio=1),
-    xaxis=dict(constrain='domain')
-)
+
+fig.update_yaxes(scaleanchor="x", scaleratio=1)
+fig.update_xaxes(constrain='domain')
 fig.show()
 ```
 
@@ -122,12 +124,12 @@ fig.show()
 When you have more than 2 classes, you will need to plot the ROC curve for each class separately. Make sure that you use a [one-versus-rest](https://scikit-learn.org/stable/modules/multiclass.html#one-vs-the-rest) model, or make sure that your problem has a [multi-label](https://scikit-learn.org/stable/modules/multiclass.html#multilabel-classification-format) format; otherwise, your ROC curve might not return the expected results.
 
 ```python
+import plotly.graph_objects as go
+import plotly.express as px
 import numpy as np
 import pandas as pd
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import roc_curve, roc_auc_score
-import plotly.graph_objects as go
-import plotly.express as px
 
 np.random.seed(0)
 
@@ -151,17 +153,17 @@ y_scores = model.predict_proba(X)
 # every time we compute a new class
 fig = go.Figure()
 fig.add_shape(
-    type='line', line=dict(dash='dash'), 
+    type='line', line=dict(dash='dash'),
     x0=0, x1=1, y0=0, y1=1
 )
 
 for i in range(y_scores.shape[1]):
     y_true = y_onehot.iloc[:, i]
     y_score = y_scores[:, i]
-    
+
     fpr, tpr, _ = roc_curve(y_true, y_score)
     auc_score = roc_auc_score(y_true, y_score)
-    
+
     name = f"{y_onehot.columns[i]} (AUC={auc_score:.2f})"
     fig.add_trace(go.Scatter(x=fpr, y=tpr, name=name, mode='lines'))
 
@@ -169,7 +171,8 @@ fig.update_layout(
     xaxis_title='False Positive Rate',
     yaxis_title='True Positive Rate',
     yaxis=dict(scaleanchor="x", scaleratio=1),
-    xaxis=dict(constrain='domain')
+    xaxis=dict(constrain='domain'),
+    width=700, height=500
 )
 fig.show()
 ```
@@ -193,18 +196,17 @@ y_score = model.predict_proba(X)[:, 1]
 precision, recall, thresholds = precision_recall_curve(y, y_score)
 
 fig = px.area(
-    x=recall, y=precision, 
+    x=recall, y=precision,
     title=f'Precision-Recall Curve (AUC={auc(fpr, tpr):.4f})',
-    labels=dict(x='Recall', y='Precision')
+    labels=dict(x='Recall', y='Precision'),
+    width=700, height=500
 )
 fig.add_shape(
-    type='line', line=dict(dash='dash'), 
+    type='line', line=dict(dash='dash'),
     x0=0, x1=1, y0=1, y1=0
 )
-fig.update_layout(
-    yaxis=dict(scaleanchor="x", scaleratio=1),
-    xaxis=dict(constrain='domain')
-)
+fig.update_yaxes(scaleanchor="x", scaleratio=1)
+fig.update_xaxes(constrain='domain')
 
 fig.show()
 ```
@@ -212,12 +214,12 @@ fig.show()
 In this example, we use the [average precision](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html) metric, which is an alternative scoring method to the area under the PR curve.
 
 ```python
+import plotly.graph_objects as go
+import plotly.express as px
 import numpy as np
 import pandas as pd
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import precision_recall_curve, average_precision_score
-import plotly.graph_objects as go
-import plotly.express as px
 
 np.random.seed(0)
 
@@ -241,17 +243,17 @@ y_scores = model.predict_proba(X)
 # every time we compute a new class
 fig = go.Figure()
 fig.add_shape(
-    type='line', line=dict(dash='dash'), 
+    type='line', line=dict(dash='dash'),
     x0=0, x1=1, y0=1, y1=0
 )
 
 for i in range(y_scores.shape[1]):
     y_true = y_onehot.iloc[:, i]
     y_score = y_scores[:, i]
-    
+
     precision, recall, _ = precision_recall_curve(y_true, y_score)
     auc_score = average_precision_score(y_true, y_score)
-    
+
     name = f"{y_onehot.columns[i]} (AP={auc_score:.2f})"
     fig.add_trace(go.Scatter(x=recall, y=precision, name=name, mode='lines'))
 
@@ -259,7 +261,8 @@ fig.update_layout(
     xaxis_title='Recall',
     yaxis_title='Precision',
     yaxis=dict(scaleanchor="x", scaleratio=1),
-    xaxis=dict(constrain='domain')
+    xaxis=dict(constrain='domain'),
+    width=700, height=500
 )
 fig.show()
 ```
diff --git a/doc/python/ml-tsne-umap-projections.md b/doc/python/ml-tsne-umap-projections.md
index db0ef55b5d0..26ca99d51b4 100644
--- a/doc/python/ml-tsne-umap-projections.md
+++ b/doc/python/ml-tsne-umap-projections.md
@@ -22,28 +22,27 @@ jupyter:
     pygments_lexer: ipython3
     version: 3.7.7
   plotly:
-    description: Visualize scikit-learn's k-Nearest Neighbors (kNN) classification
-      in Python with Plotly.
+    description: Visualize scikit-learn's t-SNE and UMAP in Python with Plotly.
     display_as: ai_ml
     language: python
     layout: base
     name: t-SNE and UMAP projections
-    order: 1
-    page_type: example_index
+    order: 5
+    page_type: u-guide
     permalink: python/t-sne-and-umap-projections/
     thumbnail: thumbnail/tsne-umap-projections.png
 ---
 
-This page presents various ways to visualize two popular dimensionality reduction techniques, namely the [t-distributed stochastic neighbor embedding](https://lvdmaaten.github.io/tsne/) (t-SNE) and [Uniform Manifold Approximation and Projection](https://umap-learn.readthedocs.io/en/latest/index.html) (UMAP). They are needed whenever you want to visualize data with more than two or three features (i.e. dimensions). 
+This page presents various ways to visualize two popular dimensionality reduction techniques, namely the [t-distributed stochastic neighbor embedding](https://lvdmaaten.github.io/tsne/) (t-SNE) and [Uniform Manifold Approximation and Projection](https://umap-learn.readthedocs.io/en/latest/index.html) (UMAP). They are needed whenever you want to visualize data with more than two or three features (i.e. dimensions).
 
-We first show how to visualize data with more than three features using the [scatter plot matrix](https://medium.com/plotly/what-is-a-splom-chart-make-scatterplot-matrices-in-python-8dc4998921c3), then we apply dimensionality reduction techniques to get 2D/3D representation of our data, and visualize the results with [scatter plots](https://plotly.com/python/line-and-scatter/) and [3D scatter plots](https://plotly.com/python/3d-scatter-plots/). 
+We first show how to visualize data with more than three features using the [scatter plot matrix](https://medium.com/plotly/what-is-a-splom-chart-make-scatterplot-matrices-in-python-8dc4998921c3), then we apply dimensionality reduction techniques to get 2D/3D representation of our data, and visualize the results with [scatter plots](https://plotly.com/python/line-and-scatter/) and [3D scatter plots](https://plotly.com/python/3d-scatter-plots/).
 
 
 ## Basic t-SNE projections
 
-t-SNE is a popular dimensionality reduction algorithm that arises from probability theory. Simply put, it projects the high-dimensional data points (sometimes with hundreds of features) into 2D/3D by inducing the projected data to have a similar distribution as the original data points by minimizing something called the [KL divergence](https://towardsdatascience.com/light-on-math-machine-learning-intuitive-guide-to-understanding-kl-divergence-2b382ca2b2a8). 
+t-SNE is a popular dimensionality reduction algorithm that arises from probability theory. Simply put, it projects the high-dimensional data points (sometimes with hundreds of features) into 2D/3D by inducing the projected data to have a similar distribution as the original data points by minimizing something called the [KL divergence](https://towardsdatascience.com/light-on-math-machine-learning-intuitive-guide-to-understanding-kl-divergence-2b382ca2b2a8).
 
-Compared to a method like Principal Component Analysis (PCA), it takes signficantly more time to converge, but present signficiantly better insights when visualized. For example, by projecting features of a flowers, it will be able to distinctly group 
+Compared to a method like Principal Component Analysis (PCA), it takes signficantly more time to converge, but present signficiantly better insights when visualized. For example, by projecting features of a flowers, it will be able to distinctly group
 
 
 ### Visualizing high-dimensional data with `px.scatter_matrix`
@@ -75,7 +74,7 @@ tsne = TSNE(n_components=2, random_state=0)
 projections = tsne.fit_transform(features)
 
 fig = px.scatter(
-    projections, x=0, y=1, 
+    projections, x=0, y=1,
     color=df.species, labels={'color': 'species'}
 )
 fig.show()
@@ -97,7 +96,7 @@ tsne = TSNE(n_components=3, random_state=0)
 projections = tsne.fit_transform(features, )
 
 fig = px.scatter_3d(
-    projections, x=0, y=1, z=2, 
+    projections, x=0, y=1, z=2,
     color=df.species, labels={'color': 'species'}
 )
 fig.update_traces(marker_size=8)
@@ -129,7 +128,7 @@ fig_2d = px.scatter(
     color=df.species, labels={'color': 'species'}
 )
 fig_3d = px.scatter_3d(
-    proj_3d, x=0, y=1, z=2, 
+    proj_3d, x=0, y=1, z=2,
     color=df.species, labels={'color': 'species'}
 )
 fig_3d.update_traces(marker_size=5)
@@ -157,7 +156,7 @@ umap_2d.fit(digits.data)
 projections = umap_2d.transform(digits.data)
 
 fig = px.scatter(
-    projections, x=0, y=1, 
+    projections, x=0, y=1,
     color=digits.target.astype(str), labels={'color': 'digit'}
 )
 fig.show()

From f3507e4326c4bf0feb2a78c037e8a1f7fbe8add8 Mon Sep 17 00:00:00 2001
From: Nicolas Kruchten <nicolas@plot.ly>
Date: Tue, 18 Aug 2020 13:55:30 -0400
Subject: [PATCH 34/35] longer timeout for umap

---
 doc/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/Makefile b/doc/Makefile
index 5e9861159a1..996f18ebdaa 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -39,7 +39,8 @@ $(HTML_DIR)/2019-07-03-%.html: $(IPYNB_DIR)/%.ipynb
 	@echo "[nbconvert]  $<"
 	@jupyter nbconvert $< --to html --template nb.tpl \
 	  	--output-dir $(HTML_DIR) --output 2019-07-03-$*.html \
-	  	--execute > $(FAIL_DIR)/$* 2>&1  && rm -f $(FAIL_DIR)/$*
+	  	--execute > $(FAIL_DIR)/$* 2>&1  && rm -f $(FAIL_DIR)/$* \
+			--ExecutePreprocessor.timeout=600
 
 
 $(REDIR_DIR)/2019-07-03-redirect-next-%.html: $(IPYNB_DIR)/%.ipynb

From 53de99c7632dd42e8384ab5d9458a837e92abdcd Mon Sep 17 00:00:00 2001
From: Nicolas Kruchten <nicolas@plot.ly>
Date: Tue, 18 Aug 2020 14:29:27 -0400
Subject: [PATCH 35/35] longer timeout for umap

---
 doc/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/Makefile b/doc/Makefile
index 996f18ebdaa..4390fed7e0b 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -38,9 +38,9 @@ $(HTML_DIR)/2019-07-03-%.html: $(IPYNB_DIR)/%.ipynb
 	@mkdir -p $(FAIL_DIR)
 	@echo "[nbconvert]  $<"
 	@jupyter nbconvert $< --to html --template nb.tpl \
+			--ExecutePreprocessor.timeout=600\
 	  	--output-dir $(HTML_DIR) --output 2019-07-03-$*.html \
-	  	--execute > $(FAIL_DIR)/$* 2>&1  && rm -f $(FAIL_DIR)/$* \
-			--ExecutePreprocessor.timeout=600
+	  	--execute > $(FAIL_DIR)/$* 2>&1  && rm -f $(FAIL_DIR)/$*
 
 
 $(REDIR_DIR)/2019-07-03-redirect-next-%.html: $(IPYNB_DIR)/%.ipynb