function O = lyngby_pde_binmix(NN, varargin)

% lyngby_pde_binmix     - Estimation of binomial mixture model
%
%       O = lyngby_pde_binmix(N)
%
%       Input:  N  Observed frequencies
%
%       Output: O  Parameters [ P1 P2 lambda ]
%
%       Estimation of mixture of two binomials (binomial mixture)
%
%       The first element in N should contain the observed frequency
%       zero positive outcomes, the second the observed frequency of
%       one positive outcomes, the third the observed frequency of two
%       positive outcomes, ...
% 
%       The input argument 'N' can also be a matrix. The rows are then
%       modeled with a common lambda.
%
%       The parameters in the output argument is ordered as: P1, P2,
%       lambda, where lambda is the mixing coefficient. The return
%       argument is sorted so that P1<=P2.
%
%
%       Ref: John Uebersax, MIXBIN.
%            Gelfand AE, Solomon A, JASA 69:32
%
%       Example:
%         N = [ 5 0 2 2 3 ];
%         lyngby_pde_binmix(N)
%
%         % Yerushalmy radiograph data from MIXBIN
%         %   P1=0.0100   P2=0.0003, lambda= 0.9808
%         N = [ 13560 877 168 66 42 28 23 39 64];
%         lyngby_pde_binmix(N)
% 
%       See also LYNGBY, LYNGBY_PDE_BINMIX_LL, LYNGBY_PDE_BINMIX_DER,
%                LYNGBY_OPT_CG, LYNGBY_OPT_GRADIENT.  
%
% $Id: lyngby_pde_binmix.m,v 1.1 2004/03/08 21:41:56 fnielsen Exp $


    % Check input arguments
    if nargin < 1
      error('Too few input arguments')
    end


    % Default properties
    info  = 0;
    theta = [ 0.6 0.03 0.0195 ];
    optimtype = 'gradient1';
    lambdaType = 'common'; 
    

    % Read properties
    n = 1;
    while n < nargin-1
      arg = lower(varargin{n});

      if strcmp(arg, 'optimtype')
	n = n + 1;
	arg = lower(varargin{n});
	if isstr(arg) & ismember(arg, { 'grid' 'conjgrad' 'gradient' ...
	      'gradient1' 'gridconjgrad'  })
	  optimtype = arg;
	else
	  error(sprintf(['Argument with ''OptimType'' should be '...
		'string and, eg, ''grid'', ...']));
	end


      elseif strcmp(arg, 'lambdatype')
	n = n + 1;
	arg = lower(varargin{n});
	if isstr(arg) & ismember(arg, { 'common' 'conjgrad' 'gradient' ...
	      'gradient1' 'gridconjgrad'  })
	  optimtype = arg;
	else
	  error(sprintf(['Argument with ''OptimType'' should be '...
		'string and, eg, ''grid'', ...']));
	end

	
      else
	error(sprintf('Invalid property: %s', arg));
      end
      n = n + 1;
    end

    


    if strcmp(lambdaType, 'common')

      theta = repmat(theta, size(NN,1), 1);

      if strcmp(optimtype, 'gradient1')
	maxIterations = 30;
	for n = 1:20	
	  for k = 1:size(NN,1)
	    N = NN(k,:);
	    theta(k,1) = lyngby_opt_gradient(@lyngby_pde_binmix_costder1, ... 
		theta(k,1), ...
		'parameters', { theta(k,2) theta(k,3) N }, ...
		'info', info, ...
		'maxiterations', maxIterations); 
	    theta(k,2) = lyngby_opt_gradient( @lyngby_pde_binmix_costder2, theta(k,2), ...
		'parameters', { theta(k,1) theta(k,3) N }, 'info', info, ...
		'maxiterations', maxIterations); 
	    if n < 10
	      theta(k,1) = min(max(theta(k,1),0.1^n),1-(1-0.9)^n);
	      theta(k,2) = min(max(theta(k,2),0.1^n),1-(1-0.9)^n);
	    else
	      theta(k,1) = min(max(theta(k,1),0),1);
	      theta(k,2) = min(max(theta(k,2),0),1);
	    end
	    if theta(k,1) > theta(k,2)
	      theta(k,[2 1]) = theta(k,1:2);
	    end
	    
	    if info >= 5
	      disp(theta);
	    end
	  end
	  theta(:,3) = lyngby_opt_gradient(@lyngby_pde_binmix_cd33, theta(k,3), ...
	      'parameters', { theta(:,1) theta(:,2) NN }, ...
	      'info', info-10, ...
	      'maxiterations', maxIterations); 
	  if n < 3
	    theta(:,3) = min(max(theta(1,3),0.1),0.9);
	  else
	    theta(:,3) = min(max(theta(1,3),0),1);
	  end
		
	end
      end

      
      O = theta;
      
    elseif strcmp(lambdaType, 'separate')
      

      for k = 1:size(NN,1)

	% Optimize for each individual rows
	N = NN(k,:);
	
	
	if strcmp(optimtype, 'grid')
	  P1 = 0:0.1:1;
	  P2 = 0:0.1:1;
	  lambda = 0:0.01:0.5;
	  [Theta1,Theta2,Theta3] = ndgrid(P1,P2,lambda);
	  Theta1 = Theta1(:);
	  Theta2 = Theta2(:);
	  Theta3 = Theta3(:);
	  maxll = -Inf;
	  for n = 1:length(Theta1)
	    ll = lyngby_pde_binmix_ll(N, Theta1(n), Theta2(n), Theta3(n));
	    if maxll < ll
	      maxll = ll;
	      theta(1) = Theta1(n);
	      theta(2) = Theta2(n);
	      theta(3) = Theta3(n);
	    end
	  end
	  
	elseif strcmp(optimtype, 'gradient')

	  theta = lyngby_opt_gradient(@lyngby_pde_binmix_costder, theta(:), ...
	      'parameters', { N }, 'info', 5); 
	  

	elseif strcmp(optimtype, 'gradient1')

	  maxIterations = 30;
	  for n = 1:20
	    theta(1) = lyngby_opt_gradient(@lyngby_pde_binmix_costder1, theta(1), ...
		'parameters', { theta(2) theta(3) N }, 'info', info, ...
		'maxiterations', maxIterations); 
	    theta(2) = lyngby_opt_gradient(@lyngby_pde_binmix_costder2, theta(2), ...
		'parameters', { theta(1) theta(3) N }, 'info', info, ...
		'maxiterations', maxIterations); 
	    theta(3) = lyngby_opt_gradient(@lyngby_pde_binmix_costder3, theta(3), ...
		'parameters', { theta(1) theta(2) N }, 'info', info, ...
		'maxiterations', maxIterations); 
	    if n < 2
	      theta(1) = min(max(theta(1),0.1),0.9);
	      theta(2) = min(max(theta(2),0.1),0.9);
	      theta(3) = min(max(theta(3),0.1),0.9);
	    else
	      theta(1) = min(max(theta(1),0),1);
	      theta(2) = min(max(theta(2),0),1);
	      theta(3) = min(max(theta(3),0),1);
	    end
	  
	    if info >= 5
	      disp(theta);
	    end
	  end
	  
	elseif strcmp(optimtype, 'conjgrad')

	  % This is somewhat onstable
	  theta = lyngby_opt_cg(@lyngby_pde_binmix_costder, theta(:), ...
	      'parameters', { N }, 'optimtype', 'hestenesstiefel', ...
	      'info', info); 
	  
	elseif strcmp(optimtype, 'gridconjgrad')
	  
	  % Initial grid search
	  theta = lyngby_pde_binmix(N, 'optimtype', 'grid');
	  
	  % Conjugate Gradient
	  theta = lyngby_opt_cg(@lyngby_pde_binmix_costder, theta(:), ...
	      'parameters', { N }, 'optimtype', 'hestenesstiefel', ...
	      'info', info); 
	  
	else
	  error('Internal error');
	end

	
	theta(1) = min(max(theta(1),0),1);
	theta(2) = min(max(theta(2),0),1);
	theta(3) = min(max(theta(3),0),1);

	
	% Permute so that theta(1) (P1) is also small or equal to theta(2)
	% (P2) 
	if theta(1) > theta(2)
	  theta([2 1]) = theta(1:2);
	  theta(3)     = 1 - theta(3);
	end
	
	O(k,:) = theta(:)'; 
      end
    end





function [f, df] = lyngby_pde_binmix_costder(theta, N)

    [ll, A, B] = lyngby_pde_binmix_ll(N, theta(1), theta(2), theta(3));
    
    % Cost function is the negative loglikelihood
    f = - ll(:); 

    df = lyngby_pde_binmix_der(N, A, B, theta(1), theta(2), theta(3) );
    df = - df(:);



function [f, df] = lyngby_pde_binmix_costder1(P1, P2, lambda, N)
    [ll, A, B] = lyngby_pde_binmix_ll(N, P1, P2, lambda);
    f = - ll(:); 
    df = lyngby_pde_binmix_der(N, A, B, P1, P2, lambda);
    df = - df(1);

function [f, df] = lyngby_pde_binmix_costder2(P2, P1, lambda, N)
    [ll, A, B] = lyngby_pde_binmix_ll(N, P1, P2, lambda);
    f = - ll(:); 
    df = lyngby_pde_binmix_der(N, A, B, P1, P2, lambda);
    df = - df(2);

function [f, df] = lyngby_pde_binmix_costder3(lambda, P1, P2, N)
    [ll, A, B] = lyngby_pde_binmix_ll(N, P1, P2, lambda);
    f = - ll(:); 
    df = lyngby_pde_binmix_der(N, A, B, P1, P2, lambda);
    df = - df(3);

function [f, df] = lyngby_pde_binmix_cd33(lambda, P1, P2, N)
    f = 0;
    df = 0;
    for k=1:size(N,1)
      [ll, A, B] = lyngby_pde_binmix_ll(N(k,:), P1(k), P2(k), lambda);
      f = f - ll(:); 
      dfall =  lyngby_pde_binmix_der(N(k,:), A, B, P1(k), P2(k), lambda);
      df = df - dfall(3);
    end
    

    