4
4
# Andreas Mueller <[email protected] >
5
5
# License: BSD 3 clause
6
6
7
+ from itertools import chain
7
8
import numbers
8
- import warnings
9
- import itertools
10
9
11
10
import numpy as np
12
11
from scipy import sparse
20
19
from ..utils import safe_asarray
21
20
from ..utils import warn_if_not_float
22
21
from ..utils .extmath import row_norms
22
+ from ..utils .fixes import combinations_with_replacement as comb_w_r
23
23
from ..utils .sparsefuncs_fast import inplace_csr_row_normalize_l1
24
24
from ..utils .sparsefuncs_fast import inplace_csr_row_normalize_l2
25
25
from ..utils .sparsefuncs import inplace_column_scale
26
26
from ..utils .sparsefuncs import mean_variance_axis0
27
27
28
28
zip = six .moves .zip
29
29
map = six .moves .map
30
+ range = six .moves .range
30
31
31
32
__all__ = [
32
33
'Binarizer' ,
@@ -427,8 +428,8 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
427
428
Notes
428
429
-----
429
430
Be aware that the number of features in the output array scales
430
- exponentially in the number of features of the input array, so this
431
- is not suitable for higher-dimensional data .
431
+ polynomially in the number of features of the input array, and
432
+ exponentially in the degree. High degrees can cause overfitting .
432
433
433
434
See :ref:`examples/plot_polynomial_regression.py
434
435
<example_plot_polynomial_regression.py>`
@@ -440,15 +441,11 @@ def __init__(self, degree=2, include_bias=True):
440
441
@staticmethod
441
442
def _power_matrix (n_features , degree , include_bias ):
442
443
"""Compute the matrix of polynomial powers"""
443
- # Find permutations/combinations which add to degree or less
444
- deg_min = 0 if include_bias else 1
445
- powers = itertools .product (* (range (degree + 1 )
446
- for i in range (n_features )))
447
- powers = np .array ([c for c in powers if deg_min <= sum (c ) <= degree ])
448
-
449
- # sort so that the order of the powers makes sense
450
- i = np .lexsort (np .vstack ([powers .T , powers .sum (1 )]))
451
- return powers [i ]
444
+ start = int (not include_bias )
445
+ combn = chain .from_iterable (comb_w_r (range (n_features ), i )
446
+ for i in range (start , degree + 1 ))
447
+ powers = np .vstack (np .bincount (c , minlength = n_features ) for c in combn )
448
+ return powers
452
449
453
450
def fit (self , X , y = None ):
454
451
"""
0 commit comments