function [E, Y, VOut, WOut] = lyngby_nn_qtrain(X, T, VOld, WOld, Reg, ...
    arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, ...
    arg11, arg12)
	
% lyngby_nn_qtrain     - Quadratic neural network, training 
%
%	function [E, Y, VOut, WOut] = lyngby_nn_qtrain(X, T, VOld, ...
%	          WOld, Reg, 'PropertyName', 'PropertyValue')
%
%	Input:	X      Neural network input
%               T      Target output 
%		VOld   Old input weights
%		WOld   Old output weights
%               Reg    Regularization (weight decay)
%               Property:
%                  'MaxIteration'  Iteration stop criterion
%                  'MinCost'       Iteration stop criterion
%                  'MinGradient'   Iteration stop criterion
%                  'Method'        Optimization type
%                  'WeightAcc'     [ {off} | on ] Accumulate weights
%                  'Info'          [ {0} | ~0 ] Reporting of
%                                  costfunction and gradient
%
%	Output:	E      Entropic error without regularization term
%		Y      Computed Outputs
%		WOut   New trained output weights or accumulated
%		       weights (depending on 'WeightAcc')
%		VOut   New trained hidden weights or accumulated
%		       weights (depending on 'WeightAcc')
%
%       This function trains a neural network, either pruned or fully
%       connected. It will continue until one of the stop criterions
%       are meet: maxIteration is the number of epochs (optimization
%       steps), minCost is the highest acceptable value for the cost
%       function, minGradient is the hightest acceptable value for the
%       norm of the gradient.
%   
%       See also: lyngby_nn_qmain, lyngby_nn_qforward, lyngby_nn_qerror

% cvs : $Id: lyngby_nn_qtrain.m,v 1.10 1998/12/22 15:39:33 fnielsen Exp $
%       $Revision: 1.10 $

    % Stop criterions
    maxIteration = 200;
    minCost      = 0; 
    minGradient  = std(T) * 10^(-6);
    method       = 97;
    bWeightAcc   = 0;
    bInfo        = 0;
    
    n = 1;
    while n < nargin-5
      eval(sprintf('arg = lower(arg%d);', n)); 
      if strcmp(arg, 'maxiteration')
	n = n + 1;
	eval(sprintf('arg = arg%d;', n));
	if ~isstr(arg)
	  if arg > 0 
	    maxIteration = arg;
	  else
	    error([ 'Argument to ''MaxIteration'' should larger ' ...
	      'than 0.']);
	  end
	else	  
	  error('''MaxIteration'' PropertyName should be followed by a value');
	end
      elseif strcmp(arg, 'mincost')
	n = n + 1;
	eval(sprintf('arg = arg%d;', n));
	if ~isstr(arg)
	  if length(voxel) == 1
	    minCost = arg;
	  else
	    error(['''MinCost'' PropertyName should be followed by a ' ...
		  'a single value']);
	  end
	else
	  error('''MinCost'' PropertyName should be followed by a value');
	end
      elseif strcmp(arg, 'mingradient')
	n = n + 1;
	eval(sprintf('arg = arg%d;', n));
	if ~isstr(arg)
	  if length(arg) == 1
	    minGradient = arg;
	  else
	    error(['''MinGradient'' PropertyName should be followed by a ' ...
		  'a single value']);
	  end
	else 
	  error('''MinGradient'' PropertyName should be followed by a value');
	end
      elseif strcmp(arg, 'method')
	n = n + 1;
	eval(sprintf('arg = arg%d;', n));
	if ~isstr(arg)
	  if length(arg) == 1
	    method = arg;
	  else
	    error(['''Method'' PropertyName should be followed by a ' ...
		  'a single value']);
	  end
	else 
	  error('''Method'' PropertyName should be followed by a value');
	end
      
      elseif strcmp(arg, 'weightacc')
	n = n + 1;
	eval(sprintf('arg = arg%d;', n));
	if isstr(arg)
	  if strcmp(lower(arg), 'on')
	    bWeightAcc = 1;
	  elseif strcmp(lower(arg), 'off')
	    bWeightAcc = 0;
	  else
	    error(['''WeightAcc'' PropertyName should be followed by a ' ...
		  'a string, - either ''on'' or ''off''']);
	  end
	else 
	  error('''WeightAcc'' PropertyName should be followed by a string');
	end

      elseif strcmp(arg, 'info')
	n = n + 1;
	eval(sprintf('arg = arg%d;', n));
	if isstr(arg)
	  if strcmp(lower(arg), 'on')
	    bInfo = 1;
	  elseif strcmp(lower(arg), 'off')
	    bInfo = 0;
	  else
	    error(['''Info'' PropertyName should be followed by a ' ...
		  'a string, - either ''on'' or ''off''']);
	  end
	else 
	  bInfo = arg;
	  %error('''Info'' PropertyName should be followed by a string');
	end

	
      else
	error(sprintf('Invalid property: %s', arg));
      end
      n = n + 1;
    end

    stepsize = 0.1;
    stepsize2 = stepsize;
    stepsizeIncrease = 1.25;
    stepsizeDecrease = 0.5;
    momentum = 0.9;
    interpolate = 1;
    
    % Sizes
    [Np,  Ni] = size(X);
    [Nii, Nh]  = size(VOld);
    [Nhh, No]  = size(WOld);
    Nw = No*Nhh;
    Nv = Nh*Nii;
    N = [ Nii Nhh No ; Ni Nh No];
    
    % Bias unit
    if Nii > Ni
      X = [ X ones(Np, Nii-Ni) ];
    end
    HB = ones(Np, Nhh-Nh);
    
    % Pruning Mask
    VMask = ones(size(VOld));
    WMask = ones(size(WOld));
    VMaskI = find(VOld==0);
    WMaskI = find(WOld==0);
    VMask(VMaskI) = zeros(size(VMaskI));
    WMask(WMaskI) = zeros(size(WMaskI));

    % Apply Mask to Input
    if any(size(VMask) == [1 1])
      X = X .* (ones(Np,1) * VMask'); 
    else
      X = X .* (ones(Np,1) * any(VMask')); 
    end

    % Accumulation of weights
    if bWeightAcc
      VOut = zeros(prod(size(VOld)), maxIteration);
      WOut = zeros(prod(size(WOld)), maxIteration);
    end
    
    % Training
    V = VOld;
    W = WOld;
    DV = zeros(size(V));
    DW = zeros(size(W));

    
    [Y,H]   = lyngby_nn_qforward(X, V, W);
    [E, EY] = lyngby_nn_qerror(T, Y);
    C       = lyngby_nn_cost(E, V, W, Reg);
    
    iteration = 0;
    cost      = Inf;
    gradient  = Inf;
    while (iteration < maxIteration) & (cost >= minCost) & (gradient >= minGradient) 
      logString = '';

      % Hybrid methods
      if method > 1000
 	if method == 1583
	  if iteration < 50
	    met = 15;
	  else
	    met = 83;
	  end
	elseif method == 1593
	  if iteration < 50
	    met = 15;
	  else
	    met = 93;
	  end
	elseif method == 1594
	  if iteration < 50
	    met = 15;
	  else
	    met = 94;
	  end
	elseif method == 3765
	  if lyngby_mod(iteration / 50, 2) < 1
	    met = 65;
	  else
	    met = 37;
	  end
	elseif method == 6575
	  if lyngby_mod(iteration / 50, 2) < 1
	    met = 65;
	  else
	    met = 75;
	  end
	elseif method == 6585
	  if lyngby_mod(iteration / 50, 2) < 1
	    met = 65;
	  else
	    met = 85;
	  end
	end

      else
	met = method;
      end
      
      if met < 90
	[Y,H]   = lyngby_nn_qforward(X, V, W);
	[E, EY] = lyngby_nn_qerror(T, Y);
	C       = lyngby_nn_cost(E, V, W, Reg);
      end

      %%%%%%%%%%%%%%%%%%%%%%%
      % Compute derivatives %
      %%%%%%%%%%%%%%%%%%%%%%%
      
      if any(met == [ 15 16 17 34 35 36 37 65 75 83 85 93 94 97])
	% First order derivative
	
	% First order derivative of Regularization 
	dRUv  = lyngby_nn_qdru(V, W, Reg);      
	dRV   = reshape(dRUv(1:Nv), Nii, Nh);
	dRW   = reshape(dRUv(Nv+(1:Nw)), Nhh, No);
	
	% Output weights, First order derivative
	dEW       = lyngby_nn_qdew([H HB], W, EY);
	dCW       = dEW + dRW;

	% Hidden weights, First order derivative
	[dEV,H12] = lyngby_nn_qdev(X, H, V, W, EY);
	dCV       = dEV + dRV;

      end
      
      if any(met == [ 65 ])

	% Regularization, Diagonal Second order derivative
	ddRUv = Reg;
	ddRV  = reshape(ddRUv(1:Nv), Nii, Nh);
	ddRW  = reshape(ddRUv(Nv+(1:Nw)), Nhh, No);

	% Output weights, Diagonal second order derivative
	ddEW = lyngby_nn_qddewds(H, W);
	ddCW = ddEW + ddRW;

	% Input weights, Symmetric diagonal secord order derivative
	ddEV = lyngby_nn_qddevds(X, V, W, H12);
	ddCV = ddEV + ddRV;
      
      elseif any(method == [ 66 ])
	% Input weights, Asymmetric diagonal secord order derivative

	% ddEV = lyngby_nn_qddevda(X, Y, H, W, H12);
	% ddCV = ddEV + ddRV;
	
      elseif any(met == [ 75 ])
	
	% Regularization 
	ddRV = reshape(Reg(1:Nv), Nii, Nh);
	ddRWW  = diag(Reg(Nv+(1:Nw)));
	
	% Output weights
	ddEWW = lyngby_nn_qddews(H, W);
	ddCWW = ddEWW + ddRWW;

	% Input weights
	ddEV = lyngby_nn_qddevds(X, V, W, H12);
	ddCV = ddEV + ddRV;
      
      elseif any(met == [ 83 93 94 ])
	% Block symmetric hessian second order derivative 

	% Regularization 
	ddRVV = diag(Reg(1:Nv));
	ddRWW = diag(Reg(Nv+(1:Nw)));
	
	% Output weights
	ddEWW = lyngby_nn_qddews(H, W);
	ddCWW = ddEWW + ddRWW;

	% Input weights
	ddEVV = lyngby_nn_qddevs(X, V, W, H12);
	ddCVV = ddEVV + ddRVV;
      
      elseif any(met == [ 85 97 ])
	% Full Symmetric hessian second order derivative 

	ddRU = Reg;
	ddEUU = lyngby_nn_qddeus(X, V, W, H, H12);
	ddCUU = ddEUU + diag(ddRU);
	
      end
      
      %%%%%%%%%%%%%%%%%%%%%%%
      % Actual optimization %
      %%%%%%%%%%%%%%%%%%%%%%%
      
      if met == 15 
	% First order: softline gradient descent   
	
	DV = - stepsize * dCV .* VMask;
	DW = - stepsize * dCW .* WMask;
	[CNew, VNew, WNew, succes] = lyngby_nn_qsoftline(X, T, V, W, ...
	    Reg, DV, DW, C, 100);
	logString = sprintf('Succes: %2d, %s', succes, logString);
	
      elseif met == 16
	% First order: softline gradient descent with adaptive stepsize
	
	DV = - stepsize * dCV .* VMask;
	DW = - stepsize * dCW .* WMask;
	[CNew, VNew, WNew, succes] = lyngby_nn_qsoftline(X, T, V, W, ...
	    Reg, DV, DW, C, 10);
	if succes == 1 
	  stepsize = stepsize * stepsizeIncrease;
	elseif succes == 0
	  stepsize = stepsize * stepsizeDecrease;
	else
	  stepsize = stepsize * 2^(-succes+1);
	end
	if bInfo
	  logString = sprintf('Succes: %2d, Stepsize: %5g, %s', succes, ...
	      stepsize, logString); 
	end

      elseif met >= 30 & met < 40
	% First order with momentum         	
	  
	DVOld = DV;
	DWOld = DW;

	if met == 34
	  % Straight momentum gradient descent with soft linesearch
	  % every five iteration

	  DV = (- stepsize * dCV + momentum * DVOld) .* VMask;
	  DW = (- stepsize * dCW + momentum * DWOld) .* WMask;
	  if lyngby_mod(iteration, 5) 
	    VNew = V + DV;
	    WNew = W + DW;
	  else
	    [CNew, VNew, WNew, succes] = lyngby_nn_qsoftline(X, T, V, W, ...
		Reg, DV, DW, C, 30);
	    if bInfo
	      logString = sprintf('Succes: %2d, Stepsize: %5g, %s', ...
		  succes, stepsize, logString); 
	    end
	  end

	elseif met == 35
	  % Straight momentum gradient descent with soft linesearch
	
	  DV = (- stepsize * dCV + momentum * DVOld) .* VMask;
	  DW = (- stepsize * dCW + momentum * DWOld) .* WMask;
	  [CNew, VNew, WNew, succes] = lyngby_nn_qsoftline(X, T, V, W, ...
	      Reg, DV, DW, C, 30);
	  if bInfo
	    logString = sprintf('Succes: %2d, Stepsize: %5g, %s', ...
		succes, stepsize, logString); 
	  end
	    
	elseif met == 36
	  % Poor-mans Levenberg-Marquardt: 
	  % Interpolating between momentum calculated step and an
	  % infinitely small gradient step, with soft linesearch
	  
	  DV =  -dCV ./ (-dCV ./ (- dCV + momentum * DVOld + ...
	      realmin) + interpolate + realmin) .* VMask;
	  DW =  -dCW ./ (-dCW ./ (- dCW + momentum * DWOld + ...
	      realmin) + interpolate + realmin) .* WMask;
	  [CNew, VNew, WNew, succes] = lyngby_nn_qsoftline(X, T, V, W, ...
	      Reg, DV, DW, C, 1);
	  if succes == 1
	    interpolate = interpolate * 1.25;
	  elseif succes == 0
	    interpolate = interpolate * 0.75;
	  end
	  if bInfo
	    logString = sprintf('Succes: %2d, Interpolate: %5g, %s', ...
		succes, interpolate, logString); 
	  end
	    
	elseif met == 37
	  % Hybrid optimization:
	  %   1. Momentum gradient descent with soft linesearch
	  %   2. Gradient descent with output layer
	  
	  DV = (- stepsize2 * dCV + momentum * DVOld) .* VMask;
	  DW = (- stepsize2 * dCW + momentum * DWOld) .* WMask;
	  [CNew, VNew, WNew, succes, DV, DW] = lyngby_nn_qsoftline(X, T, V, W, ...
	      Reg, DV, DW, C, 10);
	  if succes == 1 
	    stepsize2 = stepsize2 * stepsizeIncrease;
	  elseif succes == 0
	    stepsize2 = stepsize2 * stepsizeDecrease;
	  else
	    stepsize2 = stepsize2 * 2^(-succes+1);
	  end

	  if ~succes
	    DV = - stepsize * dCV .* VMask;
	    DV = zeros(size(DV));
	    DW = - stepsize * dCW .* WMask;
	    % DW = zeros(size(DW));
	    [CNew, VNew, WNew, succes] = lyngby_nn_qsoftline(X, T, V, W, ...
		Reg, DV, DW, C, 100);
	    if succes == 1 
	      stepsize = stepsize * stepsizeIncrease;
	    elseif succes == 0
	      stepsize = stepsize * stepsizeDecrease;
	    else
	      stepsize = stepsize * 2^(-succes+1);
	    end
	    logString = sprintf('first %s', logString);

	  end
	  if bInfo
	    logString = sprintf('Succes: %2d, Stepsize: %5g, %s', ...
		succes, stepsize, logString); 
	  end
	end
	  
      elseif met == 65
	% Hybrid optimization:
	%   1. Symmetrical diagonal second Order, soft linesearch
	%   2. Gradient descent, soft linesearch

	DVOld = DV;
	DWOld = DW;

	if lyngby_mod(iteration, 2)
	  DV = - (dCV ./ (ddCV + realmin)) .* VMask;
	  DW = zeros(size(DW));
	else
	  DV = zeros(size(DV));
	  DW = - (dCW ./ (ddCW + realmin)) .* WMask;
	end
	[CNew, VNew, WNew, succes, DV, DW] = lyngby_nn_qsoftline(X, T, V, W, ...
	    Reg, DV, DW, C, 10);
	s = 1;
	if ~succes 
	  DV = - stepsize * dCV .* VMask;
	  DW = - stepsize * dCW .* WMask;
	  [CNew, VNew, WNew, succes, DV, DW] = lyngby_nn_qsoftline(X, T, V, W, ...
	      Reg, DV, DW, C, 100);
	  if succes == 1 
	    stepsize = stepsize * stepsizeIncrease;
	  elseif succes == 0
	    stepsize = stepsize * stepsizeDecrease;
	  else
	    stepsize = stepsize * 0.5;
	  end
	  if bInfo
	    logString = sprintf('first %s', logString);
	    s = stepsize;
	  end
	end
	if bInfo
	  logString = sprintf(['Succes: %2d, Stepsize: %5g, '...
		'%s'], succes, s, logString); 
	end
      
      elseif met == 75
	% Hybrid optimization:
	%   1. Symmetrical second Order, soft linesearch
	%      diagonal hessian for input weights
	%   2. Gradient descent, soft linesearch

	DVOld = DV;
	DWOld = DW;

	DV = - dCV ./ (ddCV+realmin);
	DW = reshape(- ddCWW \ dCW(:), Nhh, No);
	[CNew, VNew, WNew, succes, DV, DW] = lyngby_nn_qsoftline(X, T, V, W, ...
	    Reg, DV, DW, C, 10);
	s = 1;
	if ~succes 
	  DV = - stepsize * dCV .* VMask;
	  DW = - stepsize * dCW .* WMask;
	  [CNew, VNew, WNew, succes, DV, DW] = lyngby_nn_qsoftline(X, T, V, W, ...
	      Reg, DV, DW, C, 100);
	  if succes == 1 
	    stepsize = stepsize * stepsizeIncrease;
	  else 
	    stepsize = stepsize * stepsizeDecrease;
	  end
	  if bInfo
	    logString = sprintf('first %s', logString);
	    s = stepsize;
	  end
	end
	if bInfo
	  logString = sprintf(['Succes: %2d, Stepsize: %5g, '...
		'%s'], succes, s, logString); 
	end
      
      elseif met == 83
	% Hybrid optimization:
	%   1. Symmetrical second Order, soft linesearch
	%   2. Gradient descent, soft linesearch

	DVOld = DV;
	DWOld = DW;

	DV = reshape(- (ddCVV + eye(Nii*Nh)) \ dCV(:), Nii, Nh);
	% DV = zeros(Nii,Nh);
	DW = reshape(- ddCWW \ dCW(:), Nhh, No);
	[CNew, VNew, WNew, succes, DV, DW] = lyngby_nn_qsoftline(X, T, V, W, ...
	    Reg, DV, DW, C, 10);
	s = 1;
	if ~succes 
	  DV = - stepsize * dCV .* VMask;
	  DW = - stepsize * dCW .* WMask;
	  [CNew, VNew, WNew, succes, DV, DW] = lyngby_nn_qsoftline(X, T, V, W, ...
	      Reg, DV, DW, C, 100);
	  if succes == 1 
	    stepsize = stepsize * stepsizeIncrease;
	  else 
	    stepsize = stepsize * stepsizeDecrease;
	  end
	  if bInfo
	    logString = sprintf('first %s', logString);
	    s = stepsize;
	  end
	end
	if bInfo
	  logString = sprintf(['Succes: %2d, Stepsize: %5g, '...
		'%s'], succes, s, logString); 
	end
      
       elseif met == 93
	% Levenberg Marquardt with block hessian
	% No Levenberg-Marquardt interpolation with output weight 

	DVOld = DV;
	DWOld = DW;

	DV = reshape(- (ddCVV + interpolate * eye(Nii*Nh)) \ ...
	    dCV(:), Nii, Nh);
	% DW = reshape(- ddCWW \ dCW(:), Nhh, No);
	DW = reshape(- pinv(ddCWW) * dCW(:), Nhh, No);

	
	VNew = (V + DV) .* VMask;;
	WNew = (W + DW) .* WMask;;

	[YNew,HNew]   = lyngby_nn_qforward(X, VNew, WNew);
	[ENew, EYNew] = lyngby_nn_qerror(T, YNew);
	CNew          = lyngby_nn_cost(ENew, VNew, WNew, Reg);

	if (CNew < C)
	  interpolate = interpolate * 0.9;
	  Y = YNew;
	  H = HNew;
	  E = ENew;
	  EY = EYNew;
	  succes = 1;
	else 
	  interpolate = interpolate * 2;
	  VNew = V;
	  WNew = W;
	  CNew = C;
	  succes = 2;
	end
	if bInfo
	  logString = sprintf(['Succes %d , Interpolate: %5g, '...
		'%s'],  succes, interpolate, logString); 
	end
	
       elseif met == 94
	% Levenberg Marquardt with block hessian

	DVOld = DV;
	DWOld = DW;

	DV = reshape(- (ddCVV + interpolate * eye(Nii*Nh)) \ ...
	    dCV(:), Nii, Nh);
	DW = reshape(- (ddCWW + interpolate * eye(Nhh*No)) \ ...
	    dCW(:), Nhh, No);
	
	VNew = (V + DV) .* VMask;
	WNew = (W + DW) .* WMask;

	[YNew,HNew]   = lyngby_nn_qforward(X, VNew, WNew);
	[ENew, EYNew] = lyngby_nn_qerror(T, YNew);
	CNew          = lyngby_nn_cost(ENew, VNew, WNew, Reg);
   
	if (CNew < C)
	  interpolate = interpolate * 0.9;
	  Y = YNew;
	  H = HNew;
	  E = ENew;
	  EY = EYNew;
	  succes = 1;
	else 
	  interpolate = interpolate * 2;
	  VNew = V;
	  WNew = W;
	  CNew = C;
	  succes = 2;
	end
	if bInfo
	  logString = sprintf(['Succes %d , Interpolate: %5g, '...
		'%s'],  succes, interpolate, logString); 
	end

      elseif met == 97
	% Levenberg Marquardt with full Levenberg-Marquardt hessian
	
	DVOld = DV;
	DWOld = DW;

	dCU = [ dCV(:) ; dCW(:) ];
	DU = - (ddCUU + interpolate * eye(Nii*Nh+Nhh*No)) \ dCU;
	[DV, DW] = lyngby_nn_u2vw(DU, N);
	
	VNew = (V + DV) .* VMask;
	WNew = (W + DW) .* WMask;

	[YNew,HNew]   = lyngby_nn_qforward(X, VNew, WNew);
	[ENew, EYNew] = lyngby_nn_qerror(T, YNew);
	CNew          = lyngby_nn_cost(ENew, VNew, WNew, Reg);
   
	if (CNew < C)
	  interpolate = interpolate * 0.9;
	  Y = YNew;
	  H = HNew;
	  E = ENew;
	  EY = EYNew;
	  succes = 1;
	else 
	  interpolate = interpolate * 2;
	  VNew = V;
	  WNew = W;
	  CNew = C;
	  succes = 2;
	end
	if bInfo
	  logString = sprintf(['Succes %d , Interpolate: %5g, '...
		'%s'],  succes, interpolate, logString); 
	end

      end
      
      W = WNew;
      V = VNew;
      C = CNew;
      if (succes == 0)
	% disp('Warning: could not find a better solution');
      end

      if bWeightAcc
	VOut(:,iteration+1) = VNew(:);
	WOut(:,iteration+1) = WNew(:);
      end
      
      % Update stop criterions
      cost = C;
      dCUv = [dCV(:) ; dCW(:) ];
      gradient = norm(dCUv);
      if bInfo
	disp(sprintf('Iteration: %5d, Cost: %6f, Gradient: %8f, %s', ...
	    iteration+1, cost, gradient, logString));
      end
      iteration = iteration + 1;
    end

    if bWeightAcc
      % If the stop criterion is not maxIteration
      VOut = VOut(:,1:iteration);
      WOut = WOut(:,1:iteration);
    else
      VOut = VNew;
      WOut = WNew;
    end



























