diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst index f54860c7d03fc..11aed220f5175 100644 --- a/doc/modules/linear_model.rst +++ b/doc/modules/linear_model.rst @@ -795,7 +795,7 @@ performance. .. _polynomial_regression: -Polynomial Regression: Extending Linear Models with Basis Functions +Polynomial regression: extending linear models with basis functions =================================================================== .. currentmodule:: sklearn.preprocessing @@ -842,7 +842,7 @@ polynomial features of varying degrees: This figure is created using the :class:`PolynomialFeatures` preprocessor. This preprocessor transforms an input data matrix into a new data matrix -of a given degree. It can be used as follows: +of a given degree. It can be used as follows:: >>> from sklearn.preprocessing import PolynomialFeatures >>> import numpy as np @@ -863,7 +863,7 @@ any linear model. This sort of preprocessing can be streamlined with the :ref:`Pipeline ` tools. A single object representing a simple -polynomial regression can be created and used as follows: +polynomial regression can be created and used as follows:: >>> from sklearn.preprocessing import PolynomialFeatures >>> from sklearn.linear_model import LinearRegression @@ -879,3 +879,28 @@ polynomial regression can be created and used as follows: The linear model trained on polynomial features is able to exactly recover the input polynomial coefficients. + +In some cases it's not necessary to include higher powers of any single feature, +but only the so-called *interaction features* +that multiply together at most :math:`d` distinct features. +These can be gotten from :class:`PolynomialFeatures` with the setting +``interaction_only=True``. + +For example, when dealing with boolean features, +:math:`x_i^n = x_i` for all :math:`n` and is therefore useless; +but :math:`x_i x_j` represents the conjunction of two booleans. +This way, we can solve the XOR problem with a linear classifier:: + + >>> from sklearn.linear_model import Perceptron + >>> from sklearn.preprocessing import PolynomialFeatures + >>> X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) + >>> y = X[:, 0] ^ X[:, 1] + >>> X = PolynomialFeatures(interaction_only=True).fit_transform(X) + >>> X + array([[1, 0, 0, 0], + [1, 0, 1, 0], + [1, 1, 0, 0], + [1, 1, 1, 1]]) + >>> clf = Perceptron(fit_intercept=False, n_iter=10).fit(X, y) + >>> clf.score(X, y) + 1.0 diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index 3ce2c74e7d77b..85646b4a12ae4 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -4,9 +4,8 @@ # Andreas Mueller # License: BSD 3 clause +from itertools import chain, combinations import numbers -import warnings -import itertools import numpy as np from scipy import sparse @@ -20,6 +19,7 @@ from ..utils import safe_asarray from ..utils import warn_if_not_float from ..utils.extmath import row_norms +from ..utils.fixes import combinations_with_replacement as combinations_w_r from ..utils.sparsefuncs_fast import inplace_csr_row_normalize_l1 from ..utils.sparsefuncs_fast import inplace_csr_row_normalize_l2 from ..utils.sparsefuncs import inplace_column_scale @@ -27,6 +27,7 @@ zip = six.moves.zip map = six.moves.map +range = six.moves.range __all__ = [ 'Binarizer', @@ -389,7 +390,7 @@ def inverse_transform(self, X, copy=None): class PolynomialFeatures(BaseEstimator, TransformerMixin): - """Generate polynomial (interaction) features. + """Generate polynomial and interaction features. Generate a new feature matrix consisting of all polynomial combinations of the features with degree less than or equal to the specified degree. @@ -400,7 +401,11 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin): ---------- degree : integer The degree of the polynomial features. Default = 2. - include_bias : integer + interaction_only : boolean, default = False + If true, only interaction features are produced: features that are + products of at most ``degree`` *distinct* input features (so not + ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.). + include_bias : boolean If True (default), then include a bias column, the feature in which all polynomial powers are zero (i.e. a column of ones - acts as an intercept term in a linear model). @@ -417,6 +422,11 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin): array([[ 1, 0, 1, 0, 0, 1], [ 1, 2, 3, 4, 6, 9], [ 1, 4, 5, 16, 20, 25]]) + >>> poly = PolynomialFeatures(interaction_only=True) + >>> poly.fit_transform(X) + array([[ 1, 0, 1, 0], + [ 1, 2, 3, 6], + [ 1, 4, 5, 20]]) Attributes ---------- @@ -427,36 +437,34 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin): Notes ----- Be aware that the number of features in the output array scales - exponentially in the number of features of the input array, so this - is not suitable for higher-dimensional data. + polynomially in the number of features of the input array, and + exponentially in the degree. High degrees can cause overfitting. See :ref:`examples/plot_polynomial_regression.py ` """ - def __init__(self, degree=2, include_bias=True): + def __init__(self, degree=2, interaction_only=False, include_bias=True): self.degree = degree + self.interaction_only = interaction_only self.include_bias = include_bias @staticmethod - def _power_matrix(n_features, degree, include_bias): + def _power_matrix(n_features, degree, interaction_only, include_bias): """Compute the matrix of polynomial powers""" - # Find permutations/combinations which add to degree or less - deg_min = 0 if include_bias else 1 - powers = itertools.product(*(range(degree + 1) - for i in range(n_features))) - powers = np.array([c for c in powers if deg_min <= sum(c) <= degree]) - - # sort so that the order of the powers makes sense - i = np.lexsort(np.vstack([powers.T, powers.sum(1)])) - return powers[i] + comb = (combinations if interaction_only else combinations_w_r) + start = int(not include_bias) + combn = chain.from_iterable(comb(range(n_features), i) + for i in range(start, degree + 1)) + powers = np.vstack(np.bincount(c, minlength=n_features) for c in combn) + return powers def fit(self, X, y=None): """ Compute the polynomial feature combinations """ n_samples, n_features = array2d(X).shape - self.powers_ = self._power_matrix(n_features, - self.degree, + self.powers_ = self._power_matrix(n_features, self.degree, + self.interaction_only, self.include_bias) return self diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index 4ee3dcfb148fb..0c7b2628b1708 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -172,3 +172,27 @@ def sparse_min_max(X, axis): # numpy.argpartition was introduced in v 1.8.0 def argpartition(a, kth, axis=-1, kind='introselect', order=None): return np.argsort(a, axis=axis, order=order) + + +try: + from itertools import combinations_with_replacement +except ImportError: + # Backport of itertools.combinations_with_replacement for Python 2.6, + # from Python 3.4 documentation (http://tinyurl.com/comb-w-r), copyright + # Python Software Foundation (https://docs.python.org/3/license.html) + def combinations_with_replacement(iterable, r): + # combinations_with_replacement('ABC', 2) --> AA AB AC BB BC CC + pool = tuple(iterable) + n = len(pool) + if not n and r: + return + indices = [0] * r + yield tuple(pool[i] for i in indices) + while True: + for i in reversed(range(r)): + if indices[i] != n - 1: + break + else: + return + indices[i:] = [indices[i] + 1] * (r - i) + yield tuple(pool[i] for i in indices)