From 9e710ed8437908c3eaaf6e68f5523842a846c6ea Mon Sep 17 00:00:00 2001
From: CJ Carey <perimosocordiae@gmail.com>
Date: Thu, 23 Apr 2015 12:58:17 -0400
Subject: [PATCH 1/4] WIP: adding 'max' normalizer to normalize()

This still needs tests and doc updates.
---
 sklearn/preprocessing/data.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index c97d0ba3e5aed..88a0452f2dc4a 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -570,7 +570,7 @@ def normalize(X, norm='l2', axis=1, copy=True):
         scipy.sparse matrices should be in CSR format to avoid an
         un-necessary copy.
 
-    norm : 'l1' or 'l2', optional ('l2' by default)
+    norm : 'l1', 'l2', or 'max', optional ('l2' by default)
         The norm to use to normalize each non zero sample (or each non-zero
         feature if axis is 0).
 
@@ -589,7 +589,7 @@ def normalize(X, norm='l2', axis=1, copy=True):
     using the ``Transformer`` API (e.g. as part of a preprocessing
     :class:`sklearn.pipeline.Pipeline`)
     """
-    if norm not in ('l1', 'l2'):
+    if norm not in ('l1', 'l2', 'max'):
         raise ValueError("'%s' is not a supported norm" % norm)
 
     if axis == 0:
@@ -609,13 +609,19 @@ def normalize(X, norm='l2', axis=1, copy=True):
             inplace_csr_row_normalize_l1(X)
         elif norm == 'l2':
             inplace_csr_row_normalize_l2(X)
+        elif norm == 'max':
+            norms = X.max(axis=1).toarray()
+            norms = norms.repeat(np.diff(X.indptr))
+            mask = norms != 0
+            X.data[mask] /= norms[mask]
     else:
         if norm == 'l1':
             norms = np.abs(X).sum(axis=1)
-            norms[norms == 0.0] = 1.0
         elif norm == 'l2':
             norms = row_norms(X)
-            norms[norms == 0.0] = 1.0
+        elif norm == 'max':
+            norms = np.max(X, axis=1)
+        norms[norms == 0.0] = 1.0
         X /= norms[:, np.newaxis]
 
     if axis == 0:

From 007ae76cb358fe7836b8a37618062bdfa5629c16 Mon Sep 17 00:00:00 2001
From: CJ Carey <perimosocordiae@gmail.com>
Date: Fri, 8 May 2015 14:20:54 -0400
Subject: [PATCH 2/4] TST: covering norm='max' branches of normalize()

---
 sklearn/preprocessing/tests/test_data.py | 49 ++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index 6e565010d2ada..fe536517837d3 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -604,6 +604,55 @@ def test_normalizer_l2():
         assert_almost_equal(la.norm(X_norm[3]), 0.0)
 
 
+def test_normalizer_max():
+    rng = np.random.RandomState(0)
+    X_dense = rng.randn(4, 5)
+    X_sparse_unpruned = sparse.csr_matrix(X_dense)
+
+    # set the row number 3 to zero
+    X_dense[3, :] = 0.0
+
+    # set the row number 3 to zero without pruning (can happen in real life)
+    indptr_3 = X_sparse_unpruned.indptr[3]
+    indptr_4 = X_sparse_unpruned.indptr[4]
+    X_sparse_unpruned.data[indptr_3:indptr_4] = 0.0
+
+    # build the pruned variant using the regular constructor
+    X_sparse_pruned = sparse.csr_matrix(X_dense)
+
+    # check inputs that support the no-copy optim
+    for X in (X_dense, X_sparse_pruned, X_sparse_unpruned):
+
+        normalizer = Normalizer(norm='max', copy=True)
+        X_norm1 = normalizer.transform(X)
+        assert_true(X_norm1 is not X)
+        X_norm1 = toarray(X_norm1)
+
+        normalizer = Normalizer(norm='max', copy=False)
+        X_norm2 = normalizer.transform(X)
+        assert_true(X_norm2 is X)
+        X_norm2 = toarray(X_norm2)
+
+        for X_norm in (X_norm1, X_norm2):
+            row_maxs = X_norm.max(axis=1)
+            for i in range(3):
+                assert_almost_equal(row_maxs[i], 1.0)
+            assert_almost_equal(row_maxs[3], 0.0)
+
+    # check input for which copy=False won't prevent a copy
+    for init in (sparse.coo_matrix, sparse.csc_matrix, sparse.lil_matrix):
+        X = init(X_dense)
+        X_norm = normalizer = Normalizer(norm='l2', copy=False).transform(X)
+
+        assert_true(X_norm is not X)
+        assert_true(isinstance(X_norm, sparse.csr_matrix))
+
+        X_norm = toarray(X_norm)
+        for i in range(3):
+            assert_almost_equal(row_maxs[i], 1.0)
+        assert_almost_equal(la.norm(X_norm[3]), 0.0)
+
+
 def test_normalize():
     # Test normalize function
     # Only tests functionality not used by the tests for Normalizer.

From 84ee88db791c962d1cb4d3b99cf41b84a35856b6 Mon Sep 17 00:00:00 2001
From: CJ Carey <perimosocordiae@gmail.com>
Date: Fri, 8 May 2015 14:22:55 -0400
Subject: [PATCH 3/4] DOC: updating Normalizer docstring for norm='max'

---
 sklearn/preprocessing/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 88a0452f2dc4a..d619c26ca85d4 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -649,7 +649,7 @@ class Normalizer(BaseEstimator, TransformerMixin):
 
     Parameters
     ----------
-    norm : 'l1' or 'l2', optional ('l2' by default)
+    norm : 'l1', 'l2', or 'max', optional ('l2' by default)
         The norm to use to normalize each non zero sample.
 
     copy : boolean, optional, default True

From 5fcad7cc673b6dfbfbacfffe7bf8dbb2ac6e672f Mon Sep 17 00:00:00 2001
From: CJ Carey <perimosocordiae@gmail.com>
Date: Fri, 8 May 2015 15:13:27 -0400
Subject: [PATCH 4/4] Fixing sparse max for older scipy

---
 sklearn/preprocessing/data.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index d619c26ca85d4..fa268280e1241 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -22,7 +22,8 @@
 from ..utils.fixes import isclose
 from ..utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
                                       inplace_csr_row_normalize_l2)
-from ..utils.sparsefuncs import (inplace_column_scale, mean_variance_axis)
+from ..utils.sparsefuncs import (inplace_column_scale, mean_variance_axis,
+                                 min_max_axis)
 from ..utils.validation import check_is_fitted
 
 zip = six.moves.zip
@@ -610,7 +611,7 @@ def normalize(X, norm='l2', axis=1, copy=True):
         elif norm == 'l2':
             inplace_csr_row_normalize_l2(X)
         elif norm == 'max':
-            norms = X.max(axis=1).toarray()
+            _, norms = min_max_axis(X, 1)
             norms = norms.repeat(np.diff(X.indptr))
             mask = norms != 0
             X.data[mask] /= norms[mask]