Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d8fed2b

Browse files
committed
Add tests for get_feature_names
1 parent ce8815d commit d8fed2b

File tree

2 files changed

+32
-10
lines changed

2 files changed

+32
-10
lines changed

sklearn/decomposition/pca.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -235,9 +235,21 @@ def get_feature_names(self, input_features=None, full_output=False):
235235
-------
236236
output_feature_names : list of string, shape (n_components)
237237
When full_output is "True", it is represented by the contribution
238-
of input features like "a1 * x0 + a2 * x1", "b1 * x0 + b2 * x1".
239-
If full_output is "False", it just represents the component names
240-
like "pc0", "pc1"
238+
of input features and full_output is "False", it just represents
239+
the component names
240+
241+
Examples
242+
--------
243+
>>> import numpy as np
244+
>>> from sklearn.decomposition import PCA
245+
>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
246+
>>> pca = PCA(n_components=2)
247+
>>> pca.fit(X)
248+
PCA(copy=True, n_components=2, whiten=False)
249+
>>> pca.get_feature_names(full_output=True)
250+
['0.838 * x0 + 0.545 * x1', '0.545 * x0 - 0.838 * x1']
251+
>>> pca.get_feature_names()
252+
['pc0', 'pc1']
241253
"""
242254
check_is_fitted(self, 'components_')
243255

@@ -251,16 +263,18 @@ def get_feature_names(self, input_features=None, full_output=False):
251263
"equal number of features when fitted: {1}.".format
252264
(len(input_features), self.n_features))
253265

254-
feature_names = []
266+
def name_generator(coefficients, names):
267+
yield "{0:.3f} * {1}".format(coefficients[0], names[0])
268+
for c, n in zip(coefficients[1:], names[1:]):
269+
yield "{0:s} {1:.3f} * {2}".format('-' if c < 0 else '+', abs(c), n)
270+
255271
if full_output:
272+
feature_names = []
256273
for component in range(self.n_components):
257-
pc = ""
258-
for feature in range(n_features):
259-
pc += " {0:+1.3f}".format(components[component][feature])+ " * " + input_features[feature]
260-
feature_names.append(pc)
274+
coefficients = components[component]
275+
feature_names.append(' '.join(name_generator(coefficients,input_features)))
261276
else:
262-
for component in range(self.n_components):
263-
feature_names.append("pc" + str(component))
277+
feature_names = ['pc{0}'.format(i) for i in range(self.n_components)]
264278
return feature_names
265279

266280

sklearn/decomposition/tests/test_pca.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,3 +349,11 @@ def test_pca_score3():
349349
ll[k] = pca.score(Xt)
350350

351351
assert_true(ll.argmax() == 1)
352+
353+
def test_get_feature_names():
354+
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
355+
pca = PCA(n_components=2)
356+
pca.fit(X)
357+
assert_equal(pca.get_feature_names(), ['pc0', 'pc1'])
358+
assert_equal(pca.get_feature_names(full_output=True),
359+
['0.838 * x0 + 0.545 * x1', '0.545 * x0 - 0.838 * x1'])

0 commit comments

Comments
 (0)