Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f425134

Browse files
committed
ENH/DOC fix poly features complexity
Fixes scikit-learn#3191, scikit-learn#3194.
1 parent 96436df commit f425134

File tree

2 files changed

+34
-13
lines changed

2 files changed

+34
-13
lines changed

sklearn/preprocessing/data.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
# Andreas Mueller <[email protected]>
55
# License: BSD 3 clause
66

7+
from itertools import chain
78
import numbers
8-
import warnings
9-
import itertools
109

1110
import numpy as np
1211
from scipy import sparse
@@ -20,13 +19,15 @@
2019
from ..utils import safe_asarray
2120
from ..utils import warn_if_not_float
2221
from ..utils.extmath import row_norms
22+
from ..utils.fixes import combinations_with_replacement as comb_w_r
2323
from ..utils.sparsefuncs_fast import inplace_csr_row_normalize_l1
2424
from ..utils.sparsefuncs_fast import inplace_csr_row_normalize_l2
2525
from ..utils.sparsefuncs import inplace_column_scale
2626
from ..utils.sparsefuncs import mean_variance_axis0
2727

2828
zip = six.moves.zip
2929
map = six.moves.map
30+
range = six.moves.range
3031

3132
__all__ = [
3233
'Binarizer',
@@ -427,8 +428,8 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
427428
Notes
428429
-----
429430
Be aware that the number of features in the output array scales
430-
exponentially in the number of features of the input array, so this
431-
is not suitable for higher-dimensional data.
431+
polynomially in the number of features of the input array, and
432+
exponentially in the degree. High degrees can cause overfitting.
432433
433434
See :ref:`examples/plot_polynomial_regression.py
434435
<example_plot_polynomial_regression.py>`
@@ -440,15 +441,11 @@ def __init__(self, degree=2, include_bias=True):
440441
@staticmethod
441442
def _power_matrix(n_features, degree, include_bias):
442443
"""Compute the matrix of polynomial powers"""
443-
# Find permutations/combinations which add to degree or less
444-
deg_min = 0 if include_bias else 1
445-
powers = itertools.product(*(range(degree + 1)
446-
for i in range(n_features)))
447-
powers = np.array([c for c in powers if deg_min <= sum(c) <= degree])
448-
449-
# sort so that the order of the powers makes sense
450-
i = np.lexsort(np.vstack([powers.T, powers.sum(1)]))
451-
return powers[i]
444+
start = int(not include_bias)
445+
combn = chain.from_iterable(comb_w_r(range(n_features), i)
446+
for i in range(start, degree + 1))
447+
powers = np.vstack(np.bincount(c, minlength=n_features) for c in combn)
448+
return powers
452449

453450
def fit(self, X, y=None):
454451
"""

sklearn/utils/fixes.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,27 @@ def sparse_min_max(X, axis):
172172
# numpy.argpartition was introduced in v 1.8.0
173173
def argpartition(a, kth, axis=-1, kind='introselect', order=None):
174174
return np.argsort(a, axis=axis, order=order)
175+
176+
177+
try:
178+
from itertools import combinations_with_replacement
179+
except ImportError:
180+
# Backport of itertools.combinations_with_replacement for Python 2.6,
181+
# from Python 3.4 documentation (http://tinyurl.com/comb-w-r), copyright
182+
# Python Software Foundation (https://docs.python.org/3/license.html)
183+
def combinations_with_replacement(iterable, r):
184+
# combinations_with_replacement('ABC', 2) --> AA AB AC BB BC CC
185+
pool = tuple(iterable)
186+
n = len(pool)
187+
if not n and r:
188+
return
189+
indices = [0] * r
190+
yield tuple(pool[i] for i in indices)
191+
while True:
192+
for i in reversed(range(r)):
193+
if indices[i] != n - 1:
194+
break
195+
else:
196+
return
197+
indices[i:] = [indices[i] + 1] * (r - i)
198+
yield tuple(pool[i] for i in indices)

0 commit comments

Comments
 (0)