% Here is the example for the PCA stuff - Example 2.2
% Use the yeast data.

load yeast
[n,p] = size(data);
% Center the data.
datac = data - repmat(sum(data)/n,n,1); 
% Find the covariance matrix.
covm = cov(datac);
[eigvec,eigval] = eig(covm);
eigval = diag(eigval);  % extract the diagonal elements

% order in descending order
eigval = flipud(eigval);
eigvec = eigvec(:,p:-1:1);
% Do a scree plot.
figure, plot(1:length(eigval),eigval,'ko-')
title('Scree Plot')
xlabel('Eigenvalue Index - k')
ylabel('Eigenvalue')

% From this plot, dimensionality of 4 seems reasonable.

% Now for the percentage of variance explained.
pervar = 100*cumsum(eigval)/sum(eigval);
% > pervar'
%   Columns 1 through 10 
%    73.5923   85.0875   91.9656   94.3217   95.5616   96.4946   97.3680   98.0259   98.4699   98.8743
%   Columns 11 through 17 
%    99.1731   99.4232   99.5917   99.7438   99.8406   99.9322  100.0000
% From these results, we would keep 4 to 5 dimensions, depending on the
% cutoff value.

% Now for the broken stick test.
% First get the expected lengths/sizes of the eigenvalues.
g = zeros(1,p);
for k = 1:p
    for i = k:p 
    g(k) = g(k) + 1/i;
end
g = g/p;
% what is the proportion of variance explained. This is for the covariance
% method.
propvar = eigval/sum(eigval);
% % now find those that explain more than the expected amount.
ind = find(propvar' > g);
% According to this, only the first one qualifies as accounting for more variance than
% would be expected by chance. It explains around 74% of the variance.

% Now for the size of the variance.
avgeig = mean(eigval);
% Find the length of ind:
ind = find(eigval > avgeig);
length(ind)
% According to this test, the first 3 would be retained.

% So, using 3, we will reduce the dimensionality.
P = eigvec(:,1:3);
Xp = datac*P;
figure,plot3(Xp(:,1),Xp(:,2),Xp(:,3),'k*')
xlabel('PC 1'),ylabel('PC 2'),zlabel('PC 3')
grid on
axis tight

% We could look at them using:
figure,plotmatrix(Xp)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Alternative way of doing lines 36 - 41 offered by Tom Lane, The
% MathWorks, Inc.
% Sample data
p = 20;
eigval = sort(rand(1,p));

% Method used in text
g = zeros(1,p);
for k = 1:p
    for i = k:p
        g(k) = g(k) + 1/i;
    end
end
g = g/p;
g(1:4)

% No loop
g = (1 ./ (1:p)) * triu(ones(p))' / p;
g(1:4)



