4
4
# Andreas Mueller <[email protected] >
5
5
# License: BSD 3 clause
6
6
7
+ from itertools import chain , combinations
7
8
import numbers
8
- import warnings
9
- import itertools
10
9
11
10
import numpy as np
12
11
from scipy import sparse
20
19
from ..utils import safe_asarray
21
20
from ..utils import warn_if_not_float
22
21
from ..utils .extmath import row_norms
22
+ from ..utils .fixes import combinations_with_replacement as combinations_w_r
23
23
from ..utils .sparsefuncs_fast import inplace_csr_row_normalize_l1
24
24
from ..utils .sparsefuncs_fast import inplace_csr_row_normalize_l2
25
25
from ..utils .sparsefuncs import inplace_column_scale
26
26
from ..utils .sparsefuncs import mean_variance_axis0
27
27
28
28
zip = six .moves .zip
29
29
map = six .moves .map
30
+ range = six .moves .range
30
31
31
32
__all__ = [
32
33
'Binarizer' ,
@@ -389,7 +390,7 @@ def inverse_transform(self, X, copy=None):
389
390
390
391
391
392
class PolynomialFeatures (BaseEstimator , TransformerMixin ):
392
- """Generate polynomial ( interaction) features.
393
+ """Generate polynomial and interaction features.
393
394
394
395
Generate a new feature matrix consisting of all polynomial combinations
395
396
of the features with degree less than or equal to the specified degree.
@@ -400,7 +401,11 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
400
401
----------
401
402
degree : integer
402
403
The degree of the polynomial features. Default = 2.
403
- include_bias : integer
404
+ interaction_only : boolean, default = False
405
+ If true, only interaction features are produced: features that are
406
+ products of at most ``degree`` *distinct* input features (so not
407
+ ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).
408
+ include_bias : boolean
404
409
If True (default), then include a bias column, the feature in which
405
410
all polynomial powers are zero (i.e. a column of ones - acts as an
406
411
intercept term in a linear model).
@@ -417,6 +422,11 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
417
422
array([[ 1, 0, 1, 0, 0, 1],
418
423
[ 1, 2, 3, 4, 6, 9],
419
424
[ 1, 4, 5, 16, 20, 25]])
425
+ >>> poly = PolynomialFeatures(interaction_only=True)
426
+ >>> poly.fit_transform(X)
427
+ array([[ 1, 0, 1, 0],
428
+ [ 1, 2, 3, 6],
429
+ [ 1, 4, 5, 20]])
420
430
421
431
Attributes
422
432
----------
@@ -427,36 +437,34 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
427
437
Notes
428
438
-----
429
439
Be aware that the number of features in the output array scales
430
- exponentially in the number of features of the input array, so this
431
- is not suitable for higher-dimensional data .
440
+ polynomially in the number of features of the input array, and
441
+ exponentially in the degree. High degrees can cause overfitting .
432
442
433
443
See :ref:`examples/plot_polynomial_regression.py
434
444
<example_plot_polynomial_regression.py>`
435
445
"""
436
- def __init__ (self , degree = 2 , include_bias = True ):
446
+ def __init__ (self , degree = 2 , interaction_only = False , include_bias = True ):
437
447
self .degree = degree
448
+ self .interaction_only = interaction_only
438
449
self .include_bias = include_bias
439
450
440
451
@staticmethod
441
- def _power_matrix (n_features , degree , include_bias ):
452
+ def _power_matrix (n_features , degree , interaction_only , include_bias ):
442
453
"""Compute the matrix of polynomial powers"""
443
- # Find permutations/combinations which add to degree or less
444
- deg_min = 0 if include_bias else 1
445
- powers = itertools .product (* (range (degree + 1 )
446
- for i in range (n_features )))
447
- powers = np .array ([c for c in powers if deg_min <= sum (c ) <= degree ])
448
-
449
- # sort so that the order of the powers makes sense
450
- i = np .lexsort (np .vstack ([powers .T , powers .sum (1 )]))
451
- return powers [i ]
454
+ comb = (combinations if interaction_only else combinations_w_r )
455
+ start = int (not include_bias )
456
+ combn = chain .from_iterable (comb (range (n_features ), i )
457
+ for i in range (start , degree + 1 ))
458
+ powers = np .vstack (np .bincount (c , minlength = n_features ) for c in combn )
459
+ return powers
452
460
453
461
def fit (self , X , y = None ):
454
462
"""
455
463
Compute the polynomial feature combinations
456
464
"""
457
465
n_samples , n_features = array2d (X ).shape
458
- self .powers_ = self ._power_matrix (n_features ,
459
- self .degree ,
466
+ self .powers_ = self ._power_matrix (n_features , self . degree ,
467
+ self .interaction_only ,
460
468
self .include_bias )
461
469
return self
462
470
0 commit comments