32
32
'Binarizer' ,
33
33
'KernelCenterer' ,
34
34
'MinMaxScaler' ,
35
+ 'MaxAbsScaler' ,
35
36
'Normalizer' ,
36
37
'OneHotEncoder' ,
37
38
'RobustScaler' ,
41
42
'normalize' ,
42
43
'scale' ,
43
44
'robust_scale' ,
45
+ 'maxabs_scale' ,
44
46
]
45
47
46
48
@@ -59,16 +61,28 @@ def _mean_and_std(X, axis=0, with_mean=True, with_std=True):
59
61
60
62
if with_std :
61
63
std_ = Xr .std (axis = 0 )
62
- if isinstance (std_ , np .ndarray ):
63
- std_ [std_ == 0. ] = 1.0
64
- elif std_ == 0. :
65
- std_ = 1.
64
+ std_ = _handle_zeros_in_scale (std_ )
66
65
else :
67
66
std_ = None
68
67
69
68
return mean_ , std_
70
69
71
70
71
+ def _handle_zeros_in_scale (scale ):
72
+ ''' Makes sure that whenever scale is zero, we handle it correctly.
73
+
74
+ This happens in most scalers when we have constant features.'''
75
+
76
+ # if we are fitting on 1D arrays, scale might be a scalar
77
+ if np .isscalar (scale ):
78
+ if scale == 0 :
79
+ scale = 1.
80
+ elif isinstance (scale , np .ndarray ):
81
+ scale [scale == 0.0 ] = 1.0
82
+ scale [~ np .isfinite (scale )] = 1.0
83
+ return scale
84
+
85
+
72
86
def scale (X , axis = 0 , with_mean = True , with_std = True , copy = True ):
73
87
"""Standardize a dataset along any axis
74
88
@@ -132,7 +146,7 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True):
132
146
if copy :
133
147
X = X .copy ()
134
148
_ , var = mean_variance_axis (X , axis = 0 )
135
- var [ var == 0.0 ] = 1.0
149
+ var = _handle_zeros_in_scale ( var )
136
150
inplace_column_scale (X , 1 / np .sqrt (var ))
137
151
else :
138
152
X = np .asarray (X )
@@ -233,11 +247,7 @@ def fit(self, X, y=None):
233
247
" than maximum. Got %s." % str (feature_range ))
234
248
data_min = np .min (X , axis = 0 )
235
249
data_range = np .max (X , axis = 0 ) - data_min
236
- # Do not scale constant features
237
- if isinstance (data_range , np .ndarray ):
238
- data_range [data_range == 0.0 ] = 1.0
239
- elif data_range == 0. :
240
- data_range = 1.
250
+ data_range = _handle_zeros_in_scale (data_range )
241
251
self .scale_ = (feature_range [1 ] - feature_range [0 ]) / data_range
242
252
self .min_ = feature_range [0 ] - data_min * self .scale_
243
253
self .data_range = data_range
@@ -359,7 +369,7 @@ def fit(self, X, y=None):
359
369
if self .with_std :
360
370
var = mean_variance_axis (X , axis = 0 )[1 ]
361
371
self .std_ = np .sqrt (var )
362
- self .std_ [ var == 0.0 ] = 1.0
372
+ self .std_ = _handle_zeros_in_scale ( self . std_ )
363
373
else :
364
374
self .std_ = None
365
375
return self
@@ -430,6 +440,119 @@ def inverse_transform(self, X, copy=None):
430
440
return X
431
441
432
442
443
+ class MaxAbsScaler (BaseEstimator , TransformerMixin ):
444
+ """Scale each feature by its maximum absolute value.
445
+
446
+ This estimator scales and translates each feature individually such
447
+ that the maximal absolute value of each feature in the
448
+ training set will be 1.0. It does not shift/center the data, and
449
+ thus does not destroy any sparsity.
450
+
451
+ This scaler can also be applied to sparse CSR or CSC matrices.
452
+
453
+ Parameters
454
+ ----------
455
+ copy : boolean, optional, default is True
456
+ Set to False to perform inplace scaling and avoid a copy (if the input
457
+ is already a numpy array).
458
+
459
+ Attributes
460
+ ----------
461
+ scale_ : ndarray, shape (n_features,)
462
+ Per feature relative scaling of the data.
463
+ """
464
+
465
+ def __init__ (self , copy = True ):
466
+ self .copy = copy
467
+
468
+ def fit (self , X , y = None ):
469
+ """Compute the minimum and maximum to be used for later scaling.
470
+
471
+ Parameters
472
+ ----------
473
+ X : array-like, shape [n_samples, n_features]
474
+ The data used to compute the per-feature minimum and maximum
475
+ used for later scaling along the features axis.
476
+ """
477
+ X = check_array (X , accept_sparse = ('csr' , 'csc' ), copy = self .copy ,
478
+ ensure_2d = False , estimator = self , dtype = FLOAT_DTYPES )
479
+ if sparse .issparse (X ):
480
+ mins , maxs = min_max_axis (X , axis = 0 )
481
+ scales = np .maximum (np .abs (mins ), np .abs (maxs ))
482
+ else :
483
+ scales = np .abs (X ).max (axis = 0 )
484
+ scales = np .array (scales )
485
+ scales = scales .reshape (- 1 )
486
+ self .scale_ = _handle_zeros_in_scale (scales )
487
+ return self
488
+
489
+ def transform (self , X , y = None ):
490
+ """Scale the data
491
+
492
+ Parameters
493
+ ----------
494
+ X : array-like or CSR matrix.
495
+ The data that should be scaled.
496
+ """
497
+ check_is_fitted (self , 'scale_' )
498
+ X = check_array (X , accept_sparse = ('csr' , 'csc' ), copy = self .copy ,
499
+ ensure_2d = False , estimator = self , dtype = FLOAT_DTYPES )
500
+ if sparse .issparse (X ):
501
+ if X .shape [0 ] == 1 :
502
+ inplace_row_scale (X , 1.0 / self .scale_ )
503
+ else :
504
+ inplace_column_scale (X , 1.0 / self .scale_ )
505
+ else :
506
+ X /= self .scale_
507
+ return X
508
+
509
+ def inverse_transform (self , X ):
510
+ """Scale back the data to the original representation
511
+
512
+ Parameters
513
+ ----------
514
+ X : array-like or CSR matrix.
515
+ The data that should be transformed back.
516
+ """
517
+ check_is_fitted (self , 'scale_' )
518
+ X = check_array (X , accept_sparse = ('csr' , 'csc' ), copy = self .copy ,
519
+ ensure_2d = False , estimator = self , dtype = FLOAT_DTYPES )
520
+ if sparse .issparse (X ):
521
+ if X .shape [0 ] == 1 :
522
+ inplace_row_scale (X , self .scale_ )
523
+ else :
524
+ inplace_column_scale (X , self .scale_ )
525
+ else :
526
+ X *= self .scale_
527
+ return X
528
+
529
+
530
+ def maxabs_scale (X , axis = 0 , copy = True ):
531
+ """Scale each feature to the [-1, 1] range without breaking the sparsity.
532
+
533
+ This estimator scales each feature individually such
534
+ that the maximal absolute value of each feature in the
535
+ training set will be 1.0.
536
+
537
+ This scaler can also be applied to sparse CSR or CSC matrices.
538
+
539
+ Parameters
540
+ ----------
541
+ axis : int (0 by default)
542
+ axis used to scale along. If 0, independently scale each feature,
543
+ otherwise (if 1) scale each sample.
544
+
545
+ copy : boolean, optional, default is True
546
+ Set to False to perform inplace scaling and avoid a copy (if the input
547
+ is already a numpy array).
548
+ """
549
+ s = MaxAbsScaler (copy = copy )
550
+ if axis == 0 :
551
+ return s .fit_transform (X )
552
+ else :
553
+ return s .fit_transform (X .T ).T
554
+
555
+
433
556
class RobustScaler (BaseEstimator , TransformerMixin ):
434
557
"""Scale features using statistics that are robust to outliers.
435
558
@@ -498,28 +621,15 @@ def __init__(self, with_centering=True, with_scaling=True, copy=True):
498
621
499
622
def _check_array (self , X , copy ):
500
623
"""Makes sure centering is not enabled for sparse matrices."""
501
- X = check_array (X , accept_sparse = ('csr' , 'csc' ), dtype = np . float ,
502
- copy = copy , ensure_2d = False )
624
+ X = check_array (X , accept_sparse = ('csr' , 'csc' ), copy = self . copy ,
625
+ ensure_2d = False , estimator = self , dtype = FLOAT_DTYPES )
503
626
if sparse .issparse (X ):
504
627
if self .with_centering :
505
628
raise ValueError (
506
629
"Cannot center sparse matrices: use `with_centering=False`"
507
630
" instead. See docstring for motivation and alternatives." )
508
631
return X
509
632
510
- def _handle_zeros_in_scale (self , scale ):
511
- ''' Makes sure that whenever scale is zero, we handle it correctly.
512
-
513
- This happens in most scalers when we have constant features.'''
514
- # if we are fitting on 1D arrays, scale might be a scalar
515
- if np .isscalar (scale ):
516
- if scale == 0 :
517
- scale = 1.
518
- elif isinstance (scale , np .ndarray ):
519
- scale [scale == 0.0 ] = 1.0
520
- scale [~ np .isfinite (scale )] = 1.0
521
- return scale
522
-
523
633
def fit (self , X , y = None ):
524
634
"""Compute the median and quantiles to be used for scaling.
525
635
@@ -539,12 +649,7 @@ def fit(self, X, y=None):
539
649
if self .with_scaling :
540
650
q = np .percentile (X , (25 , 75 ), axis = 0 )
541
651
self .scale_ = (q [1 ] - q [0 ])
542
- if np .isscalar (self .scale_ ):
543
- if self .scale_ == 0 :
544
- self .scale_ = 1.
545
- else :
546
- self .scale_ [self .scale_ == 0.0 ] = 1.0
547
- self .scale_ [~ np .isfinite (self .scale_ )] = 1.0
652
+ self .scale_ = _handle_zeros_in_scale (self .scale_ )
548
653
return self
549
654
550
655
def transform (self , X , y = None ):
@@ -847,7 +952,7 @@ def normalize(X, norm='l2', axis=1, copy=True):
847
952
norms = row_norms (X )
848
953
elif norm == 'max' :
849
954
norms = np .max (X , axis = 1 )
850
- norms [ norms == 0.0 ] = 1.0
955
+ norms = _handle_zeros_in_scale ( norms )
851
956
X /= norms [:, np .newaxis ]
852
957
853
958
if axis == 0 :
0 commit comments