@@ -759,6 +759,9 @@ def longest_ones(x):
759759
760760def prepca (P , frac = 0 ):
761761 """
762+
763+ WARNING: this function is deprecated -- please see class PCA instead
764+
762765 Compute the principal components of *P*. *P* is a (*numVars*,
763766 *numObs*) array. *frac* is the minimum fraction of variance that a
764767 component must contain to be included.
@@ -778,6 +781,7 @@ def prepca(P, frac=0):
778781 R13 Neural Network Toolbox but is not found in later versions;
779782 its successor seems to be called "processpcs".
780783 """
784+ warnings .warn ('This function is deprecated -- see class PCA instead' )
781785 U ,s ,v = np .linalg .svd (P )
782786 varEach = s ** 2 / P .shape [1 ]
783787 totVar = varEach .sum ()
@@ -789,6 +793,83 @@ def prepca(P, frac=0):
789793 Pcomponents = np .dot (Trans ,P )
790794 return Pcomponents , Trans , fracVar [ind ]
791795
796+
797+ class PCA :
798+ def __init__ (self , a ):
799+ """
800+ compute the SVD of a and store data for PCA. Use project to
801+ project the data onto a reduced set of dimensions
802+
803+ Inputs:
804+
805+ *a*: a numobservations x numdims array
806+
807+ Attrs:
808+
809+ *a* a centered unit sigma version of input a
810+
811+ *numrows*, *numcols*: the dimensions of a
812+
813+ *mu* : a numdims array of means of a
814+
815+ *sigma* : a numdims array of atandard deviation of a
816+
817+ *fracs* : the proportion of variance of each of the principal components
818+
819+ *Wt* : the weight vector for projecting a numdims point or array into PCA space
820+
821+ *Y* : a projected into PCA space
822+
823+ """
824+ n , m = a .shape
825+ if n < m :
826+ raise RuntimeError ('we assume data in a is organized with numrows>numcols' )
827+
828+ self .numrows , self .numcols = n , m
829+ self .mu = a .mean (axis = 0 )
830+ self .sigma = a .std (axis = 0 )
831+
832+ a = self .center (a )
833+
834+ self .a = a
835+
836+ U , s , Vh = np .linalg .svd (a , full_matrices = False )
837+
838+
839+ Y = np .dot (Vh , a .T ).T
840+
841+ vars = s ** 2 / float (len (s ))
842+ self .fracs = vars / vars .sum ()
843+
844+
845+ self .Wt = Vh
846+ self .Y = Y
847+
848+
849+ def project (self , x , minfrac = 0. ):
850+ 'project x onto the principle axes, dropping any axes where fraction of variance<minfrac'
851+ x = np .asarray (x )
852+
853+ ndims = len (x .shape )
854+
855+ if (x .shape [- 1 ]!= self .numcols ):
856+ raise ValueError ('Expected an array with dims[-1]==%d' % self .numcols )
857+
858+
859+ Y = np .dot (self .Wt , self .center (x ).T ).T
860+ mask = self .fracs >= minfrac
861+ if ndims == 2 :
862+ Yreduced = Y [:,mask ]
863+ else :
864+ Yreduced = Y [mask ]
865+ return Yreduced
866+
867+
868+
869+ def center (self , x ):
870+ 'center the data using the mean and sigma from training set a'
871+ return (x - self .mu )/ self .sigma
872+
792873def prctile (x , p = (0.0 , 25.0 , 50.0 , 75.0 , 100.0 )):
793874 """
794875 Return the percentiles of *x*. *p* can either be a sequence of
0 commit comments