Add tests for get_feature_names

maniteja123 · maniteja123 · commit d8fed2bc360b · 2016-02-25T01:04:03.000+05:30
diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
@@ -235,9 +235,21 @@ def get_feature_names(self, input_features=None, full_output=False):
         -------
         output_feature_names : list of string, shape (n_components)
             When full_output is "True", it is represented by the contribution
-            of input features like "a1 * x0 + a2 * x1", "b1 * x0 + b2 * x1".
-            If full_output is "False", it just represents the component names
-            like "pc0", "pc1"
+            of input features and full_output is "False", it just represents
+            the component names
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> from sklearn.decomposition import PCA
+        >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+        >>> pca = PCA(n_components=2)
+        >>> pca.fit(X)
+        PCA(copy=True, n_components=2, whiten=False)
+        >>> pca.get_feature_names(full_output=True)
+        ['0.838 * x0 + 0.545 * x1', '0.545 * x0 - 0.838 * x1']
+        >>> pca.get_feature_names()
+        ['pc0', 'pc1']
         """
         check_is_fitted(self, 'components_')
 
@@ -251,16 +263,18 @@ def get_feature_names(self, input_features=None, full_output=False):
                     "equal number of features when fitted: {1}.".format
                     (len(input_features), self.n_features))
 
-        feature_names = []
+        def name_generator(coefficients, names):
+            yield "{0:.3f} * {1}".format(coefficients[0], names[0])
+            for c, n in zip(coefficients[1:], names[1:]):
+                yield "{0:s} {1:.3f} * {2}".format('-' if c < 0 else '+', abs(c), n)
+
         if full_output:
+            feature_names = []
             for component in range(self.n_components):
-                pc = ""
-                for feature in range(n_features):
-                    pc += " {0:+1.3f}".format(components[component][feature])+ " * " + input_features[feature]
-                feature_names.append(pc)
+                coefficients = components[component]                
+                feature_names.append(' '.join(name_generator(coefficients,input_features)))
         else:
-            for component in range(self.n_components):
-                feature_names.append("pc" + str(component))
+            feature_names = ['pc{0}'.format(i) for i in range(self.n_components)]
         return feature_names
         
 
diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
@@ -349,3 +349,11 @@ def test_pca_score3():
         ll[k] = pca.score(Xt)
 
     assert_true(ll.argmax() == 1)
+
+def test_get_feature_names():
+    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+    pca = PCA(n_components=2)
+    pca.fit(X)
+    assert_equal(pca.get_feature_names(), ['pc0', 'pc1'])
+    assert_equal(pca.get_feature_names(full_output=True),
+        ['0.838 * x0 + 0.545 * x1', '0.545 * x0 - 0.838 * x1'])