Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit de42d69

Browse files
committed
ENH interaction_only in PolynomialFeatures
1 parent f425134 commit de42d69

File tree

2 files changed

+48
-12
lines changed

2 files changed

+48
-12
lines changed

doc/modules/linear_model.rst

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -795,7 +795,7 @@ performance.
795795

796796
.. _polynomial_regression:
797797

798-
Polynomial Regression: Extending Linear Models with Basis Functions
798+
Polynomial regression: extending linear models with basis functions
799799
===================================================================
800800

801801
.. currentmodule:: sklearn.preprocessing
@@ -842,7 +842,7 @@ polynomial features of varying degrees:
842842

843843
This figure is created using the :class:`PolynomialFeatures` preprocessor.
844844
This preprocessor transforms an input data matrix into a new data matrix
845-
of a given degree. It can be used as follows:
845+
of a given degree. It can be used as follows::
846846

847847
>>> from sklearn.preprocessing import PolynomialFeatures
848848
>>> import numpy as np
@@ -863,7 +863,7 @@ any linear model.
863863

864864
This sort of preprocessing can be streamlined with the
865865
:ref:`Pipeline <pipeline>` tools. A single object representing a simple
866-
polynomial regression can be created and used as follows:
866+
polynomial regression can be created and used as follows::
867867

868868
>>> from sklearn.preprocessing import PolynomialFeatures
869869
>>> from sklearn.linear_model import LinearRegression
@@ -879,3 +879,28 @@ polynomial regression can be created and used as follows:
879879

880880
The linear model trained on polynomial features is able to exactly recover
881881
the input polynomial coefficients.
882+
883+
In some cases it's not necessary to include higher powers of any single feature,
884+
but only the so-called *interaction features*
885+
that multiply together at most :math:`d` distinct features.
886+
These can be gotten from :class:`PolynomialFeatures` with the setting
887+
``interaction_only=True``.
888+
889+
For example, when dealing with boolean features,
890+
:math:`x_i^n = x_i` for all :math:`n` and is therefore useless;
891+
but :math:`x_i x_j` represents the conjunction of two booleans.
892+
This way, we can solve the XOR problem with a linear classifier::
893+
894+
>>> from sklearn.linear_model import Perceptron
895+
>>> from sklearn.preprocessing import PolynomialFeatures
896+
>>> X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
897+
>>> y = X[:, 0] ^ X[:, 1]
898+
>>> X = PolynomialFeatures(interaction_only=True).fit_transform(X)
899+
>>> X
900+
array([[1, 0, 0, 0],
901+
[1, 0, 1, 0],
902+
[1, 1, 0, 0],
903+
[1, 1, 1, 1]])
904+
>>> clf = Perceptron(fit_intercept=False, n_iter=10).fit(X, y)
905+
>>> clf.score(X, y)
906+
1.0

sklearn/preprocessing/data.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Andreas Mueller <[email protected]>
55
# License: BSD 3 clause
66

7-
from itertools import chain
7+
from itertools import chain, combinations
88
import numbers
99

1010
import numpy as np
@@ -19,7 +19,7 @@
1919
from ..utils import safe_asarray
2020
from ..utils import warn_if_not_float
2121
from ..utils.extmath import row_norms
22-
from ..utils.fixes import combinations_with_replacement as comb_w_r
22+
from ..utils.fixes import combinations_with_replacement as combinations_w_r
2323
from ..utils.sparsefuncs_fast import inplace_csr_row_normalize_l1
2424
from ..utils.sparsefuncs_fast import inplace_csr_row_normalize_l2
2525
from ..utils.sparsefuncs import inplace_column_scale
@@ -390,7 +390,7 @@ def inverse_transform(self, X, copy=None):
390390

391391

392392
class PolynomialFeatures(BaseEstimator, TransformerMixin):
393-
"""Generate polynomial (interaction) features.
393+
"""Generate polynomial and interaction features.
394394
395395
Generate a new feature matrix consisting of all polynomial combinations
396396
of the features with degree less than or equal to the specified degree.
@@ -401,7 +401,11 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
401401
----------
402402
degree : integer
403403
The degree of the polynomial features. Default = 2.
404-
include_bias : integer
404+
interaction_only : boolean, default = False
405+
If true, only interaction features are produced: features that are
406+
products of at most ``degree`` *distinct* input features (so not
407+
``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).
408+
include_bias : boolean
405409
If True (default), then include a bias column, the feature in which
406410
all polynomial powers are zero (i.e. a column of ones - acts as an
407411
intercept term in a linear model).
@@ -418,6 +422,11 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
418422
array([[ 1, 0, 1, 0, 0, 1],
419423
[ 1, 2, 3, 4, 6, 9],
420424
[ 1, 4, 5, 16, 20, 25]])
425+
>>> poly = PolynomialFeatures(interaction_only=True)
426+
>>> poly.fit_transform(X)
427+
array([[ 1, 0, 1, 0],
428+
[ 1, 2, 3, 6],
429+
[ 1, 4, 5, 20]])
421430
422431
Attributes
423432
----------
@@ -434,15 +443,17 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
434443
See :ref:`examples/plot_polynomial_regression.py
435444
<example_plot_polynomial_regression.py>`
436445
"""
437-
def __init__(self, degree=2, include_bias=True):
446+
def __init__(self, degree=2, interaction_only=False, include_bias=True):
438447
self.degree = degree
448+
self.interaction_only = interaction_only
439449
self.include_bias = include_bias
440450

441451
@staticmethod
442-
def _power_matrix(n_features, degree, include_bias):
452+
def _power_matrix(n_features, degree, interaction_only, include_bias):
443453
"""Compute the matrix of polynomial powers"""
454+
comb = (combinations if interaction_only else combinations_w_r)
444455
start = int(not include_bias)
445-
combn = chain.from_iterable(comb_w_r(range(n_features), i)
456+
combn = chain.from_iterable(comb(range(n_features), i)
446457
for i in range(start, degree + 1))
447458
powers = np.vstack(np.bincount(c, minlength=n_features) for c in combn)
448459
return powers
@@ -452,8 +463,8 @@ def fit(self, X, y=None):
452463
Compute the polynomial feature combinations
453464
"""
454465
n_samples, n_features = array2d(X).shape
455-
self.powers_ = self._power_matrix(n_features,
456-
self.degree,
466+
self.powers_ = self._power_matrix(n_features, self.degree,
467+
self.interaction_only,
457468
self.include_bias)
458469
return self
459470

0 commit comments

Comments
 (0)