function SpeedTest
% SpeedTest
% 
% Time the speed of Matlab operations.
% 
% SpeedTest's results, especially the summary it prints at the end, will
% help you understand what Matlab does quickly, and will teach you to
% avoid a few things that Matlab does very slowly.
% 
% To optimize your programs for speed, use Matlab's invaluable PROFILE
% command. PROFILE will tell you where you're spending most of your time.
% Try to change that code to make it run faster. Knowing SpeedTest's
% results will help you avoid Matlab's slow operations.
% 
% See also PROFILE.
% 
% HOW THE TIMING IS DONE
% 
% All we care about here is the time per iteration. So we ignore the time
% taken by the (slow) first iteration of the loop, and report only the
% incremental time taken by subsequent iterations, i.e. the slope of graph
% of time vs. number of iterations. The rationale is that long run times
% are usually due to code that's executed many times, so we can neglect
% any code that's executed only once.
% 
% If you have any disk drives with removable media (e.g. floppy, zip,
% jaz), don't leave them empty (insert a disk) when you want to eliminate
% interruptions. Macintosh drivers for removable disks periodically
% (perhaps once a second) check empty drives to see if a disk has been
% inserted, and this operation may tie up the Mac for several ms at a
% time, which can have a big impact on the timing of a 13 ms loop, e.g.
% showing a real time movie. The interruption can simply increase the
% duration of your task, or they can postpone the occurence of a clock
% tick (if the clock is interrupt based, unlike the excellent UpTime
% counter in the PPC chip itself). The delayed clock tick can increase or
% reduce the apparent duration of an interval, depending on whether it's
% at the beginning or end of your interval.
% 
% SpeedTest tries to edit out such interruptions by repeating every
% measurement five times, and keeping only the median time. This relies on
% the timing intervals being sufficiently brief that they are only rarely
% interrupted. However, the timing interval needs to be long enough to
% attain a reasonable precision after dividing the clock tick duration by
% the number of iterations. With slow-ticking clocks we have no choice but
% to use long intervals.
% 
% Note that the very first execution of the code in a loop is likely to be
% slow because any functions that are used for the first time must be
% loaded from disk into memory, and any variables that are used for the
% first time need to be allocated. Usually one eliminates these delays
% from critical-time loops by using the functions and variables in advance
% of the loop. Here we don't need to because the median trick that we
% introduced to eliminate the effect of interrupts also discards the
% extra-long first measurement.
% 
% SpeedTest.m may be distributed freely. It is part of the Psychtoolbox set of
% Matlab extensions for vision research.
% web http://psychtoolbox.org/

% 4/13/97  dgp  Wrote it.
% 4/13/97  dhb  Check for existence of bitand.mex, new in V5.
% 4/15/97  dgp  Cosmetic.
% 5/1/97   dgp  Report FOR loop time.
% 6/3/97   dgp  Warn that patching GetMenuBar breaks the Matlab debugger.
% 6/4/97   dgp  Add conv2.
% 8/3/97   dgp  Add ">". Check before complaining.
% 8/17/97  dgp  Systematically check all ops, applied to scalars, arrays, and combinations.
% 8/17/97  dgp  Expanded the summary.
% 8/17/97  dgp  Use DoNothing.mex if available.
% 1/17/98  dgp  Rewrote the timing code to measure the incremental time per iteration, 
%               after the first. SpeedTest now runs faster, and the results are
%               nearly independent of n.
% 6/16/00  dgp  Made compatible with new version of Screen('Computer').
% 9/14/00  dgp  Changed "less than" to "fewer than".
% 6/8/02   dgp  Add test and report about ~= taking twice as long as ==. 
% 6/10/02  dgp  Trying to test under Mac OS X. Turn off warnings and increase n from 10 to 1000.
%               Detabbed the print out, so that it can be sent by email.
%               More stringent test: if exist('DoNothing.mex') & strcmp(computer,'MAC2')
% 6/11/02  dgp  Measure TickTime and select number of iterations appropriately.
%               More stringent test: if exist('Screen.mex') & strcmp(computer,'MAC2')
%               Now compatible with Mac (OS 9), Win, and (hopefully) Mac OS X.
% 6/24/02  dgp  Upper bound of 1000 iterations.
% 6/27/02  dgp  Call gestalt('pclk') only if MATLAB 5.2.1.

tick=TickTime;
n=max(2,ceil(tick/1e-6)); % enough iterations to attain 1 us precision.
if n>1000
	fprintf('NOTE: full precision would require %d iterations, which would take too long.\n',n);
	n=1000;
	fprintf('So we''ll run only %d iterations.\n\n',n);
end

nn=n*[1 2;1 2;1 2;1 2;1 2]; % The complete set of values of n to use. 
% The multiple rows of n are used to repeat the measurement five times, of
% which only the median time will be kept, to minimize the effect of
% occasional long interrupts (e.g. 2 ms once a second) which may
% occasionally delay a tick. Using the median effectively discards the
% first iteration, if it's slow because functions have to be loaded or
% variables allocated.
tt=nn;j=0;k=1;t1=0;t2=0;x=0;xx=magic(32);yy=xx;zz=fliplr(xx);cputime;
bestTime=Inf;bestMexTime=Inf;
fprintf('SpeedTest will now time Matlab''s basic operations. Using %.0f iterations\n',nn(1,2));
fprintf('per test will attain %.0f us accuracy, given the %1g us per tick of CPUTIME.\n',tick*1e6/n,tick*1e6);
fprintf('All reported times are per iteration, omitting the loop overhead (in\n');
fprintf('first row of table). We ignore the first iteration, which is usually\n');
fprintf('slower. x,j,k are scalars. xx,yy,zz are %.0f-element vectors. m is a \n',length(xx(:)));
fprintf('32x32 matrix.\n');

% Measure loop overhead with 30x better precision.
nn=30*nn;

fprintf('\nLoop overhead.\n');
% Simple assignment, once
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
loopTime1=p(1);

% Simple assignment, twice
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j;
		y=j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
loopTime2=p(1);

loopTime=2*loopTime1-loopTime2;
fprintf('%10.1f us\n',1e6*loopTime);

% Measure scalar times with 10x better precision.
nn=10*nn/30;

fprintf('Operate on scalar.\n');

x=j;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=j                %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);
xk=p(1)-loopTime;
statementTime=p(1)-loopTime;

x=+j;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=+j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=+j               %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

x=-j;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=-j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=-j               %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);
minusTime=p(1)-loopTime;

x=~j;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=~j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=~j               %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);
notTime=p(1)-loopTime;

x=fix(j);
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=fix(j);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=fix(j)           %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

x=round(j);
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=round(j);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=round(j)         %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

x=sign(j);
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=sign(j);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=sign(j)          %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

x=sin(j);
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=sin(j);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=sin(j)           %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

% Restore original number of iterations
nn=nn/10;

fprintf('Operate on scalar and scalar.\n');

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j+0;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
%fprintf('%10.1f us  x=j+0               %% built-in function\n',1e6*(p(1)-loopTime));
%bestTime=min(bestTime,p(1)-loopTime);
xk0=p(1)-loopTime;

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j+k;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=j+k              %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j-k;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=j-k              %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j*k;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=j*k              %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j/k;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=j/k              %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

x=j>k;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j>k;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=j>k              %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

x=j==k;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j==k;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=j==k             %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

x=j~=k;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j~=k;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=j~=k             %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

x=j&k;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j&k;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=j&k              %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

x=j|k;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j|k;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=j|k              %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

x=j^k;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=j^k;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=j^k              %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

x=atan2(j,k);
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=atan2(j,k);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=atan2(j,k)       %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

if 0
	x=cputime;
	for k=1:length(nn(:))
		t0=cputime;
		for j=1:nn(k)
			x=cputime;
		end
		tt(k)=cputime-t0;
	end
	p=polyfit(nn(1,:),median(tt),1);
	fprintf('%10.1f us  x=cputime            %% built-in function\n',1e6*(p(1)-loopTime));
	bestTime=min(bestTime,p(1)-loopTime);
end

if exist('latcfilt')    % in signal toolbox
	x=latcfilt(j,k);
	for k=1:length(nn(:))
		t0=cputime;
		for j=1:nn(k)
			x=latcfilt(j,k);
		end
		tt(k)=cputime-t0;
	end
	p=polyfit(nn(1,:),median(tt),1);
	fprintf('%10.1f us  x=latcfilt(j,k)    %% mex or dll\n',1e6*(p(1)-loopTime));
	bestMexTime=min(bestMexTime,p(1)-loopTime);
end

x=conv2(j,k);
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		x=conv2(j,k);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%10.1f us  x=conv2(j,k)       %% mex or dll\n',1e6*(p(1)-loopTime));
bestMexTime=min(bestMexTime,p(1)-loopTime);

if exist('bitand')      % comes with Matlab 5, but not with Matlab 4
	x=bitand(j,k);
	for k=1:length(nn(:))
		t0=cputime;
		for j=1:nn(k)
			x=bitand(j,k);
		end
		tt(k)=cputime-t0;
	end
	p=polyfit(nn(1,:),median(tt),1);
	fprintf('%10.1f us  x=bitand(j,k)      %% mex or dll\n',1e6*(p(1)-loopTime));
	bestMexTime=min(bestMexTime,p(1)-loopTime);
end

% DoNothing ignores inputs, and returns empty matrix for each explicit output arg.
if exist('DoNothing.mex') & strcmp(computer,'MAC2')
	fprintf('Call dummy MEX function.\n');
	DoNothing;
	for k=1:length(nn(:))
		t0=cputime;
		for j=1:nn(k)
			DoNothing;
		end
		tt(k)=cputime-t0;
	end
	p=polyfit(nn(1,:),median(tt),1);
	fprintf('%8.0f   us  DoNothing          %% mex or dll\n',1e6*(p(1)-loopTime));
	bestMexTime=min(bestMexTime,p(1)-loopTime);

	DoNothing(j,k);
	for k=1:length(nn(:))
		t0=cputime;
		for j=1:nn(k)
			DoNothing(j,k);
		end
		tt(k)=cputime-t0;
	end
	p=polyfit(nn(1,:),median(tt),1);
	fprintf('%8.0f   us  DoNothing(j,k)     %% mex or dll\n',1e6*(p(1)-loopTime));
	bestMexTime=min(bestMexTime,p(1)-loopTime);

	x=DoNothing;
	for k=1:length(nn(:))
		t0=cputime;
		for j=1:nn(k)
			x=DoNothing;
		end
		tt(k)=cputime-t0;
	end
	p=polyfit(nn(1,:),median(tt),1);
	fprintf('%8.0f   us  x=DoNothing        %% mex or dll\n',1e6*(p(1)-loopTime));
	bestMexTime=min(bestMexTime,p(1)-loopTime);

	x=DoNothing(j,k);
	for k=1:length(nn(:))
		t0=cputime;
		for j=1:nn(k)
			x=DoNothing(j,k);
		end
		tt(k)=cputime-t0;
	end
	p=polyfit(nn(1,:),median(tt),1);
	fprintf('%8.0f   us  x=DoNothing(j,k)   %% mex or dll\n',1e6*(p(1)-loopTime));
	bestMexTime=min(bestMexTime,p(1)-loopTime);
end

fprintf('Operate on array.\n');

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy              %% built-in function\n',1e6*(p(1)-loopTime));
yyTime=p(1)-loopTime;

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=+yy;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=+yy             %% built-in function\n',1e6*(p(1)-loopTime));
yyPlusTime=p(1)-loopTime;

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=-yy;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=-yy             %% built-in function\n',1e6*(p(1)-loopTime));

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=~yy;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=~yy             %% built-in function\n',1e6*(p(1)-loopTime));

xx=fix(yy);
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=fix(yy);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=fix(yy)         %% built-in function\n',1e6*(p(1)-loopTime));

xx=round(yy);
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=round(yy);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=round(yy)       %% built-in function\n',1e6*(p(1)-loopTime));
yyRoundTime=p(1)-loopTime;

xx=sign(yy);
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=sign(yy);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=sign(yy)        %% built-in function\n',1e6*(p(1)-loopTime));
yySignTime=p(1)-loopTime;

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=sin(yy);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=sin(yy)         %% built-in function\n',1e6*(p(1)-loopTime));

fprintf('Operate on array and scalar.\n');

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy+j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy+j            %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy-j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy-j            %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);
yyMinusjTime=p(1)-loopTime;

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy*j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy*j            %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy/j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy/j            %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

xx=yy>j;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy>j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy>j            %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);
yyGreaterThanjTime=p(1)-loopTime;

xx=yy==j;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy==j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy==j           %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

xx=yy~=j;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy~=j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy~=j           %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

xx=yy&j;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy&j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy&j            %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

xx=yy|j;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy|j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy|j            %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

xx=yy.^j;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy.^j;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy.^j           %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

xx=atan2(yy,j);
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=atan2(yy,j);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=atan2(yy,j)     %% built-in function\n',1e6*(p(1)-loopTime));

if exist('bitand')      % Matlab 5
	xx=bitand(yy,j);
	for k=1:length(nn(:))
		t0=cputime;
		for j=1:nn(k)
			xx=bitand(yy,j);
		end
		tt(k)=cputime-t0;
	end
	p=polyfit(nn(1,:),median(tt),1);
	fprintf('%8.0f   us  xx=bitand(yy,j)    %% mex or dll\n',1e6*(p(1)-loopTime));
	bestMexTime=min(bestMexTime,p(1)-loopTime);
end

fprintf('Operate on array and array.\n');

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy+zz;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy+zz           %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);
operationTime=(p(1)-loopTime)/length(xx(:));

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy-zz;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy-zz           %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);
yyMinusjTime=p(1)-loopTime;

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy.*zz;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy.*zz          %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy./zz;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy./zz          %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

xx=yy>zz;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy>zz;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy>zz           %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);
yyGreaterThanjTime=p(1)-loopTime;

xx=yy==zz;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy==zz;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy==zz          %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);
yyEqzzTime=p(1)-loopTime;

xx=yy~=zz;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy~=zz;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy~=zz          %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);
yyNeqzzTime=p(1)-loopTime;

xx=yy&zz;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy&zz;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy&zz           %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);
yyAndzzTime=p(1)-loopTime;

xx=yy|zz;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy|zz;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy|zz           %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

xx=yy.^zz;
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=yy.^zz;
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=yy.^zz          %% built-in function\n',1e6*(p(1)-loopTime));
bestTime=min(bestTime,p(1)-loopTime);

xx=atan2(yy,zz);
for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		xx=atan2(yy,zz);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  xx=atan2(yy,zz)    %% built-in function\n',1e6*(p(1)-loopTime));

if exist('bitand')      % Matlab 5 or better
	xx=bitand(yy,zz);
	for k=1:length(nn(:))
		t0=cputime;
		for j=1:nn(k)
			xx=bitand(yy,zz);
		end
		tt(k)=cputime-t0;
	end
	p=polyfit(nn(1,:),median(tt),1);
	fprintf('%8.0f   us  xx=bitand(yy,zz)   %% mex or dll\n',1e6*(p(1)-loopTime));
	bestMexTime=min(bestMexTime,p(1)-loopTime);
	yyBitandzzTime=p(1)-loopTime;
else
	yyBitandzzTime=nan;
end

fprintf('Operate on matrix.\n');

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		m=magic(32);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  m=magic(32)        %% built-in function\n',1e6*(p(1)-loopTime));

for k=1:length(nn(:))
	t0=cputime;
	for j=1:nn(k)
		n=fft2(m);
	end
	tt(k)=cputime-t0;
end
p=polyfit(nn(1,:),median(tt),1);
fprintf('%8.0f   us  n=fft2(m)          %% built-in function\n',1e6*(p(1)-loopTime));

if exist('Screen.mex') & strcmp(computer,'MAC2')
	cmptr=Screen('Computer');
	fprintf('\n%s''s %s, %s\n',cmptr.owner,cmptr.model,cmptr.system);
else
	fprintf('\n%s Matlab %s\n',computer,version);
	if exist('gestalt')
		bits = gestalt('sysa');
		if bits(32)
			type='68K';
		end
		if bits(31)
			type='PowerPC';
		end
		fprintf('%s Macintosh',type);
		bits=[];
		if strmatch('5.2.1',version)
			bits=eval('gestalt(''pclk'')','[]');
		end
		if ~isempty(bits)
			hz=0;
			for i=1:32
				hz=hz*2+bits(i);
			end
			fprintf(' running at %.0f MHz',hz/1e6);
		end
		fprintf('.\n');
	end
end
	

fprintf('\nSUMMARY: Matlab %s is quick. The loop\n',version);
fprintf('overhead is %.1f us per iteration (after the first). The large %.0f:1 ratio\n',loopTime*1e6,loopTime/operationTime);
fprintf('of the %.1f us loop overhead to the roughly %.0f ns per elementary\n',1e6*loopTime,1e9*operationTime);
fprintf('operation (~,+,-,*,/,==,&,|,sin,sign) is a defining characteristic of\n');
fprintf('the language. The run time of loops that operate on fewer than %.0f\n',loopTime/operationTime);
fprintf('elements is mostly spent processing the loop, not the elements. However,\n');
fprintf('there are some anomalies:\n');
count=0;
if notTime>1.5*minusTime
	count=count+1;
	fprintf('%d. With scalars, the NOT function is slow.\n',count);
	fprintf('   x=~j takes %.1f times as long as x=-j.\n',notTime/minusTime);
end
if yyNeqzzTime/yyEqzzTime>1.3
	count=count+1;
	fprintf('%d. With arrays, it is surprising that ~= takes longer than ==.\n',count);
	fprintf('   xx~=yy takes %.1f times as long as xx==yy.\n',yyNeqzzTime/yyEqzzTime);
end
if yyPlusTime/yyTime>1.1
	count=count+1;
	fprintf('%d. With arrays, it is surprising that unary plus takes any time at all.\n',count);
	fprintf('   xx=+yy takes %.1f times as long as xx=yy.\n',yyPlusTime/yyTime);
end
if yyBitandzzTime/yyAndzzTime>1.5
	count=count+1;
	fprintf('%d. With arrays, the bit-wise logical functions are terribly slow.\n',count);
	fprintf('   bitand(yy,zz) takes %.0f times as long as yy&zz.\n',yyBitandzzTime/yyAndzzTime);
end
if yyGreaterThanjTime>1.5*yyMinusjTime
	count=count+1;
	fprintf('%d. With arrays, the comparison operators are slow.\n',count);
	fprintf('   yy>j takes %.1f times as long as yy-j.\n',yyGreaterThanjTime/yyMinusjTime);
end
if yyRoundTime/yySignTime>1.5
	count=count+1;
	fprintf('%d. With arrays, FIX and ROUND are slow.\n',count);
	fprintf('   round(yy) takes %.1f times as long as sign(yy).\n',yyRoundTime/yySignTime);
end
if xk>xk0 & xk>0 & xk0>0
	% Fixed in Matlab 5.1. Yay!
	count=count+1;
	fprintf('%d. It makes no sense, but executing ''x=j'' takes %.1f times longer \n',count,xk/xk0);
	fprintf('   than ''x=j+0''.\n');
end
if bestMexTime-bestTime>1e-6
	% In Matlab 5.2 this has been reduced to a mere 100 us on a PowerMac 6100. Yay!
	count=count+1;
	fprintf('%d. Matlab takes %.1f us longer to call a MEX or DLL than a built-in function.\n',count,(bestMexTime-bestTime)*1e6);
end

function tick=TickTime
% tick=TickTime
t0=cputime;
while 1
	tick=unique([cputime-t0 0 diff([cputime cputime cputime cputime cputime cputime])]);
	if length(tick)>1
		tick=tick(2);
		break
	end
end
