scikit-learn · larsmans · Jun 5, 2014 · Jun 3, 2014 · Jun 4, 2014
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
@@ -795,7 +795,7 @@ performance.
 
 .. _polynomial_regression:
 
-Polynomial Regression: Extending Linear Models with Basis Functions
+Polynomial regression: extending linear models with basis functions
 ===================================================================
 
 .. currentmodule:: sklearn.preprocessing
@@ -842,7 +842,7 @@ polynomial features of varying degrees:
 
 This figure is created using the :class:`PolynomialFeatures` preprocessor.
 This preprocessor transforms an input data matrix into a new data matrix
-of a given degree.  It can be used as follows:
+of a given degree.  It can be used as follows::
 
     >>> from sklearn.preprocessing import PolynomialFeatures
     >>> import numpy as np
@@ -863,7 +863,7 @@ any linear model.
 
 This sort of preprocessing can be streamlined with the
 :ref:`Pipeline <pipeline>` tools. A single object representing a simple
-polynomial regression can be created and used as follows:
+polynomial regression can be created and used as follows::
 
     >>> from sklearn.preprocessing import PolynomialFeatures
     >>> from sklearn.linear_model import LinearRegression
@@ -879,3 +879,28 @@ polynomial regression can be created and used as follows:
 
 The linear model trained on polynomial features is able to exactly recover
 the input polynomial coefficients.
+
+In some cases it's not necessary to include higher powers of any single feature,
+but only the so-called *interaction features*
+that multiply together at most :math:`d` distinct features.
+These can be gotten from :class:`PolynomialFeatures` with the setting
+``interaction_only=True``.
+
+For example, when dealing with boolean features,
+:math:`x_i^n = x_i` for all :math:`n` and is therefore useless;
+but :math:`x_i x_j` represents the conjunction of two booleans.
+This way, we can solve the XOR problem with a linear classifier::
+
+    >>> from sklearn.linear_model import Perceptron
+    >>> from sklearn.preprocessing import PolynomialFeatures
+    >>> X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
+    >>> y = X[:, 0] ^ X[:, 1]
+    >>> X = PolynomialFeatures(interaction_only=True).fit_transform(X)
+    >>> X
+    array([[1, 0, 0, 0],
+           [1, 0, 1, 0],
+           [1, 1, 0, 0],
+           [1, 1, 1, 1]])
+    >>> clf = Perceptron(fit_intercept=False, n_iter=10).fit(X, y)
+    >>> clf.score(X, y)
+    1.0
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
@@ -4,9 +4,8 @@
 #          Andreas Mueller <[email protected]>
 # License: BSD 3 clause
 
+from itertools import chain, combinations
 import numbers
-import warnings
-import itertools
 
 import numpy as np
 from scipy import sparse
@@ -20,13 +19,15 @@
 from ..utils import safe_asarray
 from ..utils import warn_if_not_float
 from ..utils.extmath import row_norms
+from ..utils.fixes import combinations_with_replacement as combinations_w_r
 from ..utils.sparsefuncs_fast import inplace_csr_row_normalize_l1
 from ..utils.sparsefuncs_fast import inplace_csr_row_normalize_l2
 from ..utils.sparsefuncs import inplace_column_scale
 from ..utils.sparsefuncs import mean_variance_axis0
 
 zip = six.moves.zip
 map = six.moves.map
+range = six.moves.range
 
 __all__ = [
     'Binarizer',
@@ -389,7 +390,7 @@ def inverse_transform(self, X, copy=None):
 
 
 class PolynomialFeatures(BaseEstimator, TransformerMixin):
-    """Generate polynomial (interaction) features.
+    """Generate polynomial and interaction features.
 
     Generate a new feature matrix consisting of all polynomial combinations
     of the features with degree less than or equal to the specified degree.
@@ -400,7 +401,11 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
     ----------
     degree : integer
         The degree of the polynomial features. Default = 2.
-    include_bias : integer
+    interaction_only : boolean, default = False
+        If true, only interaction features are produced: features that are
+        products of at most ``degree`` *distinct* input features (so not
+        ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).
+    include_bias : boolean
         If True (default), then include a bias column, the feature in which
         all polynomial powers are zero (i.e. a column of ones - acts as an
         intercept term in a linear model).
@@ -417,6 +422,11 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
     array([[ 1,  0,  1,  0,  0,  1],
            [ 1,  2,  3,  4,  6,  9],
            [ 1,  4,  5, 16, 20, 25]])
+    >>> poly = PolynomialFeatures(interaction_only=True)
+    >>> poly.fit_transform(X)
+    array([[ 1,  0,  1,  0],
+           [ 1,  2,  3,  6],
+           [ 1,  4,  5, 20]])
 
     Attributes
     ----------
@@ -427,36 +437,34 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
     Notes
     -----
     Be aware that the number of features in the output array scales
-    exponentially in the number of features of the input array, so this
-    is not suitable for higher-dimensional data.
+    polynomially in the number of features of the input array, and
+    exponentially in the degree. High degrees can cause overfitting.
 
     See :ref:`examples/plot_polynomial_regression.py
     <example_plot_polynomial_regression.py>`
     """
-    def __init__(self, degree=2, include_bias=True):
+    def __init__(self, degree=2, interaction_only=False, include_bias=True):
         self.degree = degree
+        self.interaction_only = interaction_only
         self.include_bias = include_bias
 
     @staticmethod
-    def _power_matrix(n_features, degree, include_bias):
+    def _power_matrix(n_features, degree, interaction_only, include_bias):
         """Compute the matrix of polynomial powers"""
-        # Find permutations/combinations which add to degree or less
-        deg_min = 0 if include_bias else 1
-        powers = itertools.product(*(range(degree + 1)
-                                     for i in range(n_features)))
-        powers = np.array([c for c in powers if deg_min <= sum(c) <= degree])
-
-        # sort so that the order of the powers makes sense
-        i = np.lexsort(np.vstack([powers.T, powers.sum(1)]))
-        return powers[i]
+        comb = (combinations if interaction_only else combinations_w_r)
+        start = int(not include_bias)
+        combn = chain.from_iterable(comb(range(n_features), i)
+                                    for i in range(start, degree + 1))
+        powers = np.vstack(np.bincount(c, minlength=n_features) for c in combn)
+        return powers
 
     def fit(self, X, y=None):
         """
         Compute the polynomial feature combinations
         """
         n_samples, n_features = array2d(X).shape
-        self.powers_ = self._power_matrix(n_features,
-                                          self.degree,
+        self.powers_ = self._power_matrix(n_features, self.degree,
+                                          self.interaction_only,
                                           self.include_bias)
         return self
 

diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
@@ -172,3 +172,27 @@ def sparse_min_max(X, axis):
     # numpy.argpartition was introduced in v 1.8.0
     def argpartition(a, kth, axis=-1, kind='introselect', order=None):
         return np.argsort(a, axis=axis, order=order)
+
+
+try:
+    from itertools import combinations_with_replacement
+except ImportError:
+    # Backport of itertools.combinations_with_replacement for Python 2.6,
+    # from Python 3.4 documentation (http://tinyurl.com/comb-w-r), copyright
+    # Python Software Foundation (https://docs.python.org/3/license.html)
+    def combinations_with_replacement(iterable, r):
+        # combinations_with_replacement('ABC', 2) --> AA AB AC BB BC CC
+        pool = tuple(iterable)
+        n = len(pool)
+        if not n and r:
+            return
+        indices = [0] * r
+        yield tuple(pool[i] for i in indices)
+        while True:
+            for i in reversed(range(r)):
+                if indices[i] != n - 1:
+                    break
+            else:
+                return
+            indices[i:] = [indices[i] + 1] * (r - i)
+            yield tuple(pool[i] for i in indices)