32
32
'Binarizer' ,
33
33
'KernelCenterer' ,
34
34
'MinMaxScaler' ,
35
+ 'MaxAbsScaler' ,
35
36
'Normalizer' ,
36
37
'OneHotEncoder' ,
37
38
'RobustScaler' ,
41
42
'normalize' ,
42
43
'scale' ,
43
44
'robust_scale' ,
45
+ 'maxabs_scale' ,
44
46
]
45
47
46
48
@@ -59,16 +61,28 @@ def _mean_and_std(X, axis=0, with_mean=True, with_std=True):
59
61
60
62
if with_std :
61
63
std_ = Xr .std (axis = 0 )
62
- if isinstance (std_ , np .ndarray ):
63
- std_ [std_ == 0. ] = 1.0
64
- elif std_ == 0. :
65
- std_ = 1.
64
+ std_ = _handle_zeros_in_scale (std_ )
66
65
else :
67
66
std_ = None
68
67
69
68
return mean_ , std_
70
69
71
70
71
+ def _handle_zeros_in_scale (scale ):
72
+ ''' Makes sure that whenever scale is zero, we handle it correctly.
73
+
74
+ This happens in most scalers when we have constant features.'''
75
+
76
+ # if we are fitting on 1D arrays, scale might be a scalar
77
+ if np .isscalar (scale ):
78
+ if scale == 0 :
79
+ scale = 1.
80
+ elif isinstance (scale , np .ndarray ):
81
+ scale [scale == 0.0 ] = 1.0
82
+ scale [~ np .isfinite (scale )] = 1.0
83
+ return scale
84
+
85
+
72
86
def scale (X , axis = 0 , with_mean = True , with_std = True , copy = True ):
73
87
"""Standardize a dataset along any axis
74
88
@@ -132,7 +146,7 @@ def scale(X, axis=0, with_mean=True, with_std=True, copy=True):
132
146
if copy :
133
147
X = X .copy ()
134
148
_ , var = mean_variance_axis (X , axis = 0 )
135
- var [ var == 0.0 ] = 1.0
149
+ var = _handle_zeros_in_scale ( var )
136
150
inplace_column_scale (X , 1 / np .sqrt (var ))
137
151
else :
138
152
X = np .asarray (X )
@@ -233,11 +247,7 @@ def fit(self, X, y=None):
233
247
" than maximum. Got %s." % str (feature_range ))
234
248
data_min = np .min (X , axis = 0 )
235
249
data_range = np .max (X , axis = 0 ) - data_min
236
- # Do not scale constant features
237
- if isinstance (data_range , np .ndarray ):
238
- data_range [data_range == 0.0 ] = 1.0
239
- elif data_range == 0. :
240
- data_range = 1.
250
+ data_range = _handle_zeros_in_scale (data_range )
241
251
self .scale_ = (feature_range [1 ] - feature_range [0 ]) / data_range
242
252
self .min_ = feature_range [0 ] - data_min * self .scale_
243
253
self .data_range = data_range
@@ -359,7 +369,7 @@ def fit(self, X, y=None):
359
369
if self .with_std :
360
370
var = mean_variance_axis (X , axis = 0 )[1 ]
361
371
self .std_ = np .sqrt (var )
362
- self .std_ [ var == 0.0 ] = 1.0
372
+ self .std_ = _handle_zeros_in_scale ( self . std_ )
363
373
else :
364
374
self .std_ = None
365
375
return self
@@ -430,6 +440,119 @@ def inverse_transform(self, X, copy=None):
430
440
return X
431
441
432
442
443
+ class MaxAbsScaler (BaseEstimator , TransformerMixin ):
444
+ """Scale each feature to the [-1, 1] range without breaking the sparsity.
445
+
446
+ This estimator scales and translates each feature individually such
447
+ that the maximal absolute value of each feature in the
448
+ training set will be 1.0.
449
+
450
+ This scaler can also be applied to sparse CSR or CSC matrices.
451
+
452
+ Parameters
453
+ ----------
454
+ copy : boolean, optional, default is True
455
+ Set to False to perform inplace scaling and avoid a copy (if the input
456
+ is already a numpy array).
457
+
458
+ Attributes
459
+ ----------
460
+ `scale_` : ndarray, shape (n_features,)
461
+ Per feature relative scaling of the data.
462
+ """
463
+
464
+ def __init__ (self , copy = True ):
465
+ self .copy = copy
466
+
467
+ def fit (self , X , y = None ):
468
+ """Compute the minimum and maximum to be used for later scaling.
469
+
470
+ Parameters
471
+ ----------
472
+ X : array-like, shape [n_samples, n_features]
473
+ The data used to compute the per-feature minimum and maximum
474
+ used for later scaling along the features axis.
475
+ """
476
+ X = check_array (X , accept_sparse = ('csr' , 'csc' ), copy = self .copy ,
477
+ ensure_2d = False , warn_on_dtype = True ,
478
+ estimator = self , dtype = FLOAT_DTYPES )
479
+ if sparse .issparse (X ):
480
+ mins , maxs = min_max_axis (X , axis = 0 )
481
+ scales = np .maximum (np .abs (mins ), np .abs (maxs ))
482
+ else :
483
+ scales = np .abs (X ).max (axis = 0 )
484
+ scales = np .array (scales )
485
+ scales = scales .reshape (- 1 )
486
+ self .scale_ = _handle_zeros_in_scale (scales )
487
+ return self
488
+
489
+ def transform (self , X , y = None ):
490
+ """Scale the data
491
+
492
+ Parameters
493
+ ----------
494
+ X : array-like or CSR matrix.
495
+ The data that should be scaled.
496
+ """
497
+ check_is_fitted (self , 'scale_' )
498
+ X = check_array (X , accept_sparse = ('csr' , 'csc' ), copy = self .copy ,
499
+ ensure_2d = False , warn_on_dtype = True ,
500
+ estimator = self , dtype = FLOAT_DTYPES )
501
+ if sparse .issparse (X ):
502
+ if X .shape [0 ] == 1 :
503
+ inplace_row_scale (X , 1.0 / self .scale_ )
504
+ elif self .axis == 0 :
505
+ inplace_column_scale (X , 1.0 / self .scale_ )
506
+ else :
507
+ X /= self .scale_
508
+ return X
509
+
510
+ def inverse_transform (self , X ):
511
+ """Scale back the data to the original representation
512
+
513
+ Parameters
514
+ ----------
515
+ X : array-like or CSR matrix.
516
+ The data that should be transformed back.
517
+ """
518
+ check_is_fitted (self , 'scale_' )
519
+ X = check_array (X , accept_sparse = ('csr' , 'csc' ), copy = self .copy ,
520
+ ensure_2d = False , warn_on_dtype = True ,
521
+ estimator = self , dtype = FLOAT_DTYPES )
522
+ if sparse .issparse (X ):
523
+ if X .shape [0 ] == 1 :
524
+ inplace_row_scale (X , self .scale_ )
525
+ else :
526
+ inplace_column_scale (X , self .scale_ )
527
+ else :
528
+ X *= self .scale_
529
+ return X
530
+
531
+
532
+ def maxabs_scale (X , copy = True ):
533
+ """Scale each feature to the [-1, 1] range without breaking the sparsity.
534
+
535
+ This estimator scales and translates each feature individually such
536
+ that the maximal absolute value of each feature in the
537
+ training set will be 1.0.
538
+
539
+ This scaler can also be applied to sparse CSR or CSC matrices.
540
+
541
+ Parameters
542
+ ----------
543
+ copy : boolean, optional, default is True
544
+ Set to False to perform inplace scaling and avoid a copy (if the input
545
+ is already a numpy array).
546
+
547
+ Attributes
548
+ ----------
549
+ `scale_` : ndarray, shape (n_features,)
550
+ Per feature relative scaling of the data.
551
+ """
552
+ s = MaxAbsScaler (copy = copy )
553
+ return s .fit_transform (X )
554
+
555
+
433
556
class RobustScaler (BaseEstimator , TransformerMixin ):
434
557
"""Scale features using statistics that are robust to outliers.
435
558
@@ -498,28 +621,16 @@ def __init__(self, with_centering=True, with_scaling=True, copy=True):
498
621
499
622
def _check_array (self , X , copy ):
500
623
"""Makes sure centering is not enabled for sparse matrices."""
501
- X = check_array (X , accept_sparse = ('csr' , 'csc' ), dtype = np .float ,
502
- copy = copy , ensure_2d = False )
624
+ X = check_array (X , accept_sparse = ('csr' , 'csc' ), copy = self .copy ,
625
+ ensure_2d = False , warn_on_dtype = True ,
626
+ estimator = self , dtype = FLOAT_DTYPES )
503
627
if sparse .issparse (X ):
504
628
if self .with_centering :
505
629
raise ValueError (
506
630
"Cannot center sparse matrices: use `with_centering=False`"
507
631
" instead. See docstring for motivation and alternatives." )
508
632
return X
509
633
510
- def _handle_zeros_in_scale (self , scale ):
511
- ''' Makes sure that whenever scale is zero, we handle it correctly.
512
-
513
- This happens in most scalers when we have constant features.'''
514
- # if we are fitting on 1D arrays, scale might be a scalar
515
- if np .isscalar (scale ):
516
- if scale == 0 :
517
- scale = 1.
518
- elif isinstance (scale , np .ndarray ):
519
- scale [scale == 0.0 ] = 1.0
520
- scale [~ np .isfinite (scale )] = 1.0
521
- return scale
522
-
523
634
def fit (self , X , y = None ):
524
635
"""Compute the median and quantiles to be used for scaling.
525
636
@@ -539,12 +650,7 @@ def fit(self, X, y=None):
539
650
if self .with_scaling :
540
651
q = np .percentile (X , (25 , 75 ), axis = 0 )
541
652
self .scale_ = (q [1 ] - q [0 ])
542
- if np .isscalar (self .scale_ ):
543
- if self .scale_ == 0 :
544
- self .scale_ = 1.
545
- else :
546
- self .scale_ [self .scale_ == 0.0 ] = 1.0
547
- self .scale_ [~ np .isfinite (self .scale_ )] = 1.0
653
+ self .scale_ = _handle_zeros_in_scale (self .scale_ )
548
654
return self
549
655
550
656
def transform (self , X , y = None ):
0 commit comments