Home > SurfStat > SurfStatPCA.m

SurfStatPCA

PURPOSE ^

Principal Components Analysis (PCA).

SYNOPSIS ^

function [ pcntvar, U, V ] = SurfStatPCA( Y, mask, X, k );

DESCRIPTION ^

Principal Components Analysis (PCA).

 Usage: [ pcntvar, U, V ] = SurfStatPCA( Y [,mask [,X [,k] ] ] );

 Y    = n x v matrix or n x v x k array of data, v=#vertices,
        or memory map of same. 
 mask = 1 x v vector, 1=inside, 0=outside, default is ones(1,v),  
        i.e. the whole surface.
 X    = model formula of type term, or scalar, or n x p design matrix of 
        p covariates for the linear model. The PCA is done on the v x v 
        correlations of the residuals and the components are standardized 
        to have unit standard deviation about zero. If X=0, nothing is       
        removed. If X=1, the mean (over rows) is removed (default).
 c    = number of components in PCA, default 4.

 pcntvar = 1 x c vector of percent variance explained by the components.
 U       = n x c matrix of components for the rows (observations).
 V       = c x v x k array of components for the columns (vertices).

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [ pcntvar, U, V ] = SurfStatPCA( Y, mask, X, k );
0002 
0003 %Principal Components Analysis (PCA).
0004 %
0005 % Usage: [ pcntvar, U, V ] = SurfStatPCA( Y [,mask [,X [,k] ] ] );
0006 %
0007 % Y    = n x v matrix or n x v x k array of data, v=#vertices,
0008 %        or memory map of same.
0009 % mask = 1 x v vector, 1=inside, 0=outside, default is ones(1,v),
0010 %        i.e. the whole surface.
0011 % X    = model formula of type term, or scalar, or n x p design matrix of
0012 %        p covariates for the linear model. The PCA is done on the v x v
0013 %        correlations of the residuals and the components are standardized
0014 %        to have unit standard deviation about zero. If X=0, nothing is
0015 %        removed. If X=1, the mean (over rows) is removed (default).
0016 % c    = number of components in PCA, default 4.
0017 %
0018 % pcntvar = 1 x c vector of percent variance explained by the components.
0019 % U       = n x c matrix of components for the rows (observations).
0020 % V       = c x v x k array of components for the columns (vertices).
0021 
0022 maxchunk=2^20;
0023 if isnumeric(Y)
0024     [n,v,k]=size(Y);
0025     isnum=true;
0026 else
0027     Ym=Y;
0028     sz=Ym.Format{2};
0029     if length(sz)==2
0030         sz=sz([2 1]);
0031         k=1;
0032     else
0033         sz=sz([3 1 2]);
0034         k=sz(3);
0035     end
0036     n=sz(1);
0037     v=sz(2);
0038     isnum=false;
0039 end    
0040 
0041 if nargin<2 | isempty(mask)
0042     mask=ones(1,v);
0043 end
0044 if nargin<3 | isempty(X)
0045     X=1;
0046 end
0047 if nargin<4
0048     c=4;
0049 end
0050 
0051 if isa(X,'term')
0052     X=double(X);
0053 elseif size(X,1)>1 & size(X,2)==1
0054     warning('If you don''t convert vectors to terms it can give unexpected results :-(') 
0055 end
0056 if size(X,1)==1
0057     X=repmat(X,n,1);
0058 end
0059 df=n-rank(X);
0060 
0061 if isnum
0062     nc=1;
0063     chunk=v;
0064 else
0065     nc=ceil(v*n*k/maxchunk);
0066     chunk=ceil(v/nc);
0067 end
0068 if ~isnum
0069     fprintf(1,'%s',[num2str(round(v*n*k*4/2^20)) ' Mb to PCA, % remaining: 100 ']);
0070     n10=floor(n/10);
0071 end
0072 
0073 A=zeros(n);
0074 for ic=1:nc
0075     if ~isnum & rem(ic,n10)==0
0076         fprintf(1,'%s',[num2str(round(100*(1-ic/nc))) ' ']);
0077     end
0078     v1=1+(ic-1)*chunk;
0079     v2=min(v1+chunk-1,v);
0080     vc=v2-v1+1;
0081     if ~isnum
0082         if length(sz)==2
0083             Y=double(Ym.Data(1).Data(v1:v2,:)');
0084         else
0085             Y=double(permute(Ym.Data(1).Data(v1:v2,:,:),[3 1 2]));
0086         end
0087     end
0088     maskc=mask(v1:v2);
0089     if k==1
0090         Y=double(Y(:,maskc));
0091     else
0092         Y=double(reshape(Y(:,maskc,:),n,sum(maskc)*k));
0093     end
0094     if any(X(:)~=0)
0095         Y=Y-X*(pinv(X)*Y);
0096     end
0097     S=sum(Y.^2,1);
0098     Smhalf=(S>0)./sqrt(S+(S<=0));
0099     for i=1:n
0100         Y(i,:)=Y(i,:).*Smhalf;
0101     end
0102     A=A+Y*Y';
0103 end
0104 if ~isnum
0105     fprintf(1,'%s\n','Done');
0106     fprintf(1,'%s',[num2str(round(v*n*k*4/2^20)) ' Mb to PCA, % remaining: 100 ']);
0107 end
0108 
0109 [U, D]=eig(A);
0110 [ds,is]=sort(-diag(D));
0111 ds=-ds;
0112 pcntvar=ds(1:c)'/sum(ds)*100;
0113 
0114 U=U(:,is(1:c));
0115 V=zeros(c,v*k);
0116 
0117 if isnum
0118     V(:,repmat(mask,1,k))=U'*Y;
0119 else
0120     for ic=1:nc
0121         if rem(ic,n10)==0
0122             fprintf(1,'%s',[num2str(round(100*(1-ic/nc))) ' ']);
0123         end
0124         v1=1+(ic-1)*chunk;
0125         v2=min(v1+chunk-1,v);
0126         vc=v2-v1+1;
0127         if length(sz)==2
0128             Y=double(Ym.Data(1).Data(v1:v2,:)');
0129         else
0130             Y=double(permute(Ym.Data(1).Data(v1:v2,:,:),[3 1 2]));
0131         end
0132         maskc=mask(v1:v2);
0133         if k==1
0134             Y=double(Y(:,maskc));
0135         else
0136             Y=double(reshape(Y(:,maskc,:),n,sum(maskc)*k));
0137         end
0138         if any(X(:)~=0)
0139             Y=Y-X*(pinv(X)*Y);
0140         end
0141         S=sum(Y.^2,1);
0142         Smhalf=(S>0)./sqrt(S+(S<=0));
0143         for i=1:n
0144             Y(i,:)=Y(i,:).*Smhalf;
0145         end
0146         maskcc=logical(zeros(1,v));
0147         maskcc(v1:v2)=maskc;
0148         if k>1
0149             maskcc=repmat(maskcc,1,k);
0150         end
0151         V(:,maskcc)=U'*Y;
0152     end
0153     fprintf(1,'%s\n','Done');
0154 end
0155 
0156 s=sign(abs(max(V,[],2))-abs(min(V,[],2)));
0157 sv=sqrt(mean(V.^2,2));
0158 V=diag(s./(sv+(sv<=0)).*(sv>0))*V;
0159 U=U*diag(s*sqrt(df));
0160 if k>1
0161     V=reshape(V,c,v,k);
0162 end
0163 
0164 return
0165 end
0166

Generated on Fri 26-Sep-2008 14:05:29 by m2html © 2003