15
15
from scipy import linalg
16
16
from scipy .special import gammaln
17
17
18
+ from ..externals import six
18
19
from ..base import BaseEstimator , TransformerMixin
19
20
from ..utils import check_random_state , as_float_array
20
21
from ..utils import check_array
@@ -226,10 +227,12 @@ def get_feature_names(self, input_features=None, full_output=False):
226
227
String names for input features if available. By default,
227
228
"x0", "x1", ... "xn_features" is used.
228
229
229
- full_output : boolean, default False
230
+ full_output : boolean or integer , default False
230
231
When it is "True", return the principal components as the
231
232
combination of the input features. If "False", will be just
232
- the component names
233
+ the component names. If it is an integer n, it returns the
234
+ sorted top n contributions to each component.
235
+
233
236
234
237
Returns
235
238
-------
@@ -243,18 +246,20 @@ def get_feature_names(self, input_features=None, full_output=False):
243
246
>>> import numpy as np
244
247
>>> from sklearn.decomposition import PCA
245
248
>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
246
- >>> pca = PCA(n_components=2)
247
- >>> pca.fit(X)
248
- PCA(copy=True, n_components=2, whiten=False)
249
+ >>> pca = PCA(n_components=2).fit(X)
249
250
>>> pca.get_feature_names(full_output=True)
250
- ['0.838 * x0 + 0.545 * x1', '0.545 * x0 - 0.838 * x1']
251
+ ['0.84*x0 + 0.54*x1', '0.54*x0 - 0.84*x1']
252
+ >>> pca.get_feature_names(full_output=1)
253
+ ['0.84*x0', '-0.84*x1']
251
254
>>> pca.get_feature_names()
252
255
['pc0', 'pc1']
253
256
"""
254
257
check_is_fitted (self , 'components_' )
255
258
256
259
n_features = self .components_ .shape [1 ]
257
260
components = self .components_
261
+ contribution = np .argsort (- np .abs (components ), axis = 1 )
262
+
258
263
if input_features is None :
259
264
input_features = ['x%d' % i for i in range (n_features )]
260
265
else :
@@ -264,17 +269,22 @@ def get_feature_names(self, input_features=None, full_output=False):
264
269
(len (input_features ), self .n_features ))
265
270
266
271
def name_generator (coefficients , names ):
267
- yield "{0:.3f} * {1}" .format (coefficients [0 ], names [0 ])
272
+ yield "{0:.2g}* {1}" .format (coefficients [0 ], names [0 ])
268
273
for c , n in zip (coefficients [1 :], names [1 :]):
269
- yield "{0:s} {1:.3f} * {2}" .format ('-' if c < 0 else '+' , abs (c ), n )
274
+ yield "{0:s} {1:.2g}* {2}" .format ('-' if c < 0 else '+' , abs (c ), n )
270
275
271
- if full_output :
272
- feature_names = []
273
- for component in range (self .n_components ):
274
- coefficients = components [component ]
275
- feature_names .append (' ' .join (name_generator (coefficients ,input_features )))
276
- else :
276
+ if full_output is True :
277
+ feature_names = [' ' .join (name_generator (components [i ],input_features ))
278
+ for i in range (self .n_components )]
279
+ elif full_output is False :
277
280
feature_names = ['pc{0}' .format (i ) for i in range (self .n_components )]
281
+ elif isinstance (full_output , six .integer_types ) and full_output < n_features :
282
+ required = contribution [:,:full_output ]
283
+ input_features = np .asarray (input_features )
284
+ feature_names = [' ' .join (name_generator (components [i ][required [i ]], input_features [required [i ]]))
285
+ for i in range (self .n_components )]
286
+ else :
287
+ raise ValueError ("full_output must be integer or boolean" )
278
288
return feature_names
279
289
280
290
0 commit comments