function varargout = rtresgev(varargin);
% RTRESGEV   Find a few eigenpairs of a generalized Hermitian definite eigenvalue problem using the Riemannian Trust-Region method.
%
%   Find a few smallest generalized eigenvalues and eigenvectors of the matrices A and B.
%   Uses an implentation of the Riemannian Trust-Region algorithm.
%
%   D = RTRESGEV(A) returns a vector of A's 6 smallest signed eigenvalues.
%   A must be Hermitian and should be large and sparse.
%
%   [V,D] = RTRESGEV(A) returns a diagonal matrix D of A's 6 smallest signed
%   eigenvalues and a matrix V whose columns are the corresponding eigenvectors.
%
%   [V,D,OPS] = RTRESGEV(A) also returns a structure containing information
%   on the performance of the algorithm.
%
%   RTRESGEV(A,B) solves the generalized eigenvalue problem A*V == B*V*D.  
%   B must be Hermitian positive definite and the same size as A.
%   The positive definiteness of B is taken for granted, and will not
%   be tested.
%   RTRESGEV(A,[],...) indicates the standard eigenvalue problem A*V == V*D.
%
%   RTRESGEV(A,K) and RTRESGEV(A,B,K) return the K smallest signed eigenvalues.
%
%   RTRESGEV(A,K,OPTS) and RTRESGEV(A,B,K,OPTS) specify options:
%     OPTS.verbosity: verbose printing at each outer step [0 | {1} | 2]
%     OPTS.x0: basis for the initial iterate [{randomly generated}]
%     OPTS.tol: outer convergence tolerance [scalar | {1e-6}]
%     OPTS.outerstop: mechanism for outer convergence 
%          'gradabs' absolute tolerance: quit when |grad_i| < tol
%          'gradrel' relative tolerance: quit when |grad_i|/|grad_0| < tol  
%          'resrel'  relative tolerance: quit when |res_i|/|lambda_i| < tol (default)
%
%   RTRESGEV(AFUN,N,K,OPTS) and RTRESGEV(AFUN,N,BFUN,K,OPTS) accepts the
%   functions AFUN and BFUN instead of the matrix A, where N is the size of A.
%   The functions are called like:
%      y = feval(AFUN,x);
%   and
%      y = feval(BFUN,x);
%
%     ---------------------- ADVANCED OPTIONS -----------------------
%
%     Note, any field in OPTS that is not listed below will be passed  
%     out of RTRESGEV in OPS. For example,                             
%       >> options.desc = 'my favorite test';                            
%       >> [V,L,O] = RTRESGEV(...,options);                              
%       >> O.desc
%       ans = my favorite test
%
%     OPTS.fout: file identifier for printing {1}
%          All print statements are directed to fout.
%          Typical choices are:
%             1, standard output,
%             2, standard error,
%             or a file handle returned by fopen.
%          RTRESGEV will not close the file handle.
%     OPTS.debug: debugging checks and output [{0} | 1]
%     OPTS.useSA: use 2-D subspace acceleration [false]
%                 This is only in effect early in the algorithm.
%     OPTS.p: block size for problem [integer | {k}]
%          Must satisfy k <= OPTS.p <= n
%     OPTS.paramtest: parameter test          [{0} | 1]
%          Do not actually run the algorithm, but instead
%          test the given parameters, set the defaults, and return them
%          in a struct.
%          Example: 
%             defaultopts = rtresgev(A,B,k,opts);
%     OPTS.Vtest: space against which to test at each step
%          Canonical sines are computed w.r.t. the euclidean inner product and 
%          are stored in OPS.sines. (See note in code header for methodology.)
%     OPTS.Ltest: values against which to test at each step 
%          Errors between the values in Ltest and the current eigenvalue 
%          estimates are stored in OPS.verrors. OPTS.Ltest must contain
%          p values, stored in a vector or a diagonal matrix.
%     OPTS.maxinner: max number of inner iterations
%          Default is the dimension of the tangent space:
%             d = n*p - (p^2+p)/2
%     OPTS.innerstop: available stopping criteria for the model minimization
%          A bitmask of three bits, the default is 3=1+2=maxinner or RTR model grad-based
%             bitand(innerstop,1): maxinner iterations reached
%             bitand(innerstop,2): RTR model gradient-based stop is enabled (kappa/theta)
%             bitand(innerstop,4): stop inner when outer stopping criteria is satisfied
%          At least one of these bits must be set.
%     OPTS.maxouter: max number of outer iterations  [integer | {100}]
%     OPTS.minouter: min number of outer iterations  [integer]
%          When OPTS.randomize == 1, default is 2
%          Otherwise, default is 0.
%          Useful when OPTS.randomize == 1, to allow the algorithm
%          opportunity to escape a non-optimal critical point.
%          Note, the algorithm always performs one outer step before
%          checking the outer stopping criteria, as long as the gradient
%          is non-zero.
%     OPTS.randomize: randomize model minimization   [{0} | 1]
%          This randomizes the initial solution to the model minimization.
%          Otherwise, initial solution is the origin. See OPTS.min_outer.
%          Randomization must be disabled if a preconditioner is used.
%     OPTS.kappa: convergence: see paper   [scalar | {0.1}]
%          kappa effects a linear reduction in the 
%          residual before truncating the inner solver.
%          Must satisfy 0 < kappa <= 1
%     OPTS.theta: convergence: see paper   [scalar | {1.0}]
%          theta effects a superlinear convergence rate
%          by introducing a more strict stopping criteria on 
%          the inner problem, as the algorithm nears solution.
%          The algorithm seeks order theta+1 convergence.
%          Must satisfy 0 <= theta <= 2 for a maximum cubic 
%          rate of convergence
%     OPTS.rho_prime: TR: acceptance ratio [scalar | {0.1}]
%          The ratio between the reduction in the Rayleigh
%          quotient and reduction in the quadratic model
%          is used as an acceptance criteria for a new proposal.
%          Should satisfy 0 < rho_prime < .25
%     OPTS.Delta_bar: TR: max trust region size [scalar | {inf}]
%     OPTS.Delta0: TR: initial trust region size [scalar | {p*sqrt(3)}]
%          Must satisfy 0 <= Delta0 <= Delta_bar
%     OPTS.Prec: preconditioner for the inner iteration.
%
%     Regarding Preconditioning: A preconditioner for the inner iteration may be 
%        specified via OPTS.Prec. The preconditioner will be invoked as follows:
%           Peta = feval(OPTS.Prec,X,BX,eta);
%        The input to the preconditioner will always be B-orthogonal.
%        The preconditioner must return a matrix which is B-orthogonal to X.
%        See, for example, 'precDLP'
%
%    Examples:
%       A = delsq(numgrid('C',15));
%       B = speye(15);
%       [V,D] = rtresgev(A,B,5);
%
% See also IRTRESGEV

% Copyright 2008: CG Baker, P-A Absil, KA Gallivan
% Argument processing ripped-off from MATLAB's eigs.m

% Subspaces are represented by B-orthonormal bases. Cosines/sines of angles 
% between two such subspaces, relative to the B-inner product, can be computed 
% as follows:
% Given Qf,Qg   Qf'*B*Qf = I = Qg'*B*Qg
% cosines = svd(Qf'*B*Qg)
% sines   = svd(Qg-Qf*(Qf'*B*Qg))
% See Knyazev and Argentati, "Principal Angles Between Subspaces In An A-Based 
%   Scalar Product", SIAM J. Sci. Comput., Vol. 23, No. 6, pp. 2009-2041
%
% However, in this code, we compute the canonical angles using the Euclidean
% inner product (B=I):
% cosines = svd(Qf'*Qg)
% sines   = svd(Qg-Qf*(Qf'*Qg))
% where Qf and Qg are I-orthonormal bases.
% See Golub and Van Loan, "Matrix Computations", 1996
% This is done by taking the B-orthonormal bases and finding I-orthonormal
% bases for the same subspaces.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% we accept at most three output args: V, L and Ops
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (nargout > 3)
    error('Too many output arguments. See ''help rtresgev''')
end
if length(varargin) < 1,
    error('Not enough input arguments. See ''help rtresgev'' for more.');
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% grab input arguments: A,B,K,OPTS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[A,Aptr,B,Bptr,n,k,opts] = get_args(varargin{:});

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% initialize parameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% strings
outerstop   = 'resrel';
% ints
p           = k;
paramtest   = 0;
maxouter    = 100;
minouter    = [];
maxinner    = inf;
innerstop   = 3;    % STOP_MAX + STOP_RTR
verbosity   = 1;
randomize   = 0;
debug       = 0;
% floats
tol         = 1e-6;
rho_prime   = 0.1;
Delta0      = p*sqrt(3);
Delta_bar   = inf;
kappa       = 0.1;
theta       = 1.0;
% arrays
x0          = [];
Prec        = [];
Vtest       = [];
Ltest       = [];


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% get parameters from opts
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% strings
[outerstop,opts] = get_string(opts,'outerstop','Outer convergence test',...
                    outerstop,{'gradabs','gradrel','resrel'});

% ints
[p,opts] = get_int(opts,'p','Block size',p,k,n);
[maxinner,opts] = ...
    get_int(opts,'maxinner','Maximum number of inner iterations', maxinner,0);
[maxouter,opts] = ...
    get_int(opts,'maxouter','Maximum number of outer iterations', maxouter,0);
[minouter,opts] = ...
    get_int(opts,'minouter','Minimum number of outer iterations', minouter,0);
[innerstop,opts] = ...
    get_int(opts,'innerstop','Inner stopping criteria',innerstop,1,7);
[verbosity,opts] = get_int(opts,'verbosity','Diagnostic level',verbosity);
[randomize,opts] = get_int(opts,'randomize','Randomize flag',randomize);
[paramtest,opts] = get_int(opts,'paramtest','Parameter test flag',paramtest);
[debug,opts]     = get_int(opts,'debug','Debugging flag',debug);
[fout,opts]      = get_int(opts,'fout','File number for output',1);
[useSA,opts]     = get_int(opts,'useSA','Use subspace acceleration',0);

% scalar floats
[tol,opts] = get_float(opts,'tol','Convergence tolerance',tol,0,inf,[1 1]);
[kappa,opts] = get_float(opts,'kappa','Convergence parameter',kappa,0,1,[1 1]);
[theta,opts] = ...
    get_float(opts,'theta','Convergence parameter',theta,0,2,[1 1]);
[rho_prime,opts] = ...
    get_float(opts,'rho_prime','Acceptance parameter', rho_prime,0,.25,[1 1]);
[Delta_bar,opts] = ...
    get_float(opts,'Delta_bar','Trust region parameter', Delta_bar,0,inf,[1 1]);
[Delta0,opts] = ...
    get_float(opts,'Delta0','Trust region parameter',Delta0,0,Delta_bar,[1 1]);

% arrays
[x0,opts] = get_float(opts,'x0','Initial subspace',x0);
if ~isempty(x0) && ~isequal(size(x0),[n p]),
    errstr = 'x0 must be n by p.';
    warning('RTRESGEV:InconsistentX0', ...
            '%s\n         Selecting random initial point.',errstr)
end
% Vtest requires extra testing and work
[Vtest,opts] = get_float(opts,'Vtest','Test basis',Vtest);
if ~isempty(Vtest),
   if ~isequal(size(Vtest,1),[n])
       error(['Test basis opts.Vtest must be n-by-*.'])
   end
   % convert to orthonormal basis
   Vtest = qf(Vtest);
end
[Ltest,opts] = get_float(opts,'Ltest','Test values',Ltest);
if ~isempty(Ltest),
    if (size(Ltest,1) == size(Ltest,2)),
        Ltest = diag(Ltest);
    elseif size(Ltest,1) == 1,
        Ltest = Ltest';
    end
    if size(Ltest,2) ~= 1,
        error(['Test values opts.Ltest must be a ',...
               'vector or a diagonal matrix.'])
    end
    Ltest = sort(Ltest,1,'ascend');
end

% get precon
[Prec,opts] = get_handle(opts,'Prec','Preconditioner',[]);
if ~isempty(Prec),
    if randomize,
        warning('Randomization not compatible with preconditioning. Disabling randomization.');
        randomize = 0;
    end
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Set defaults
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% if randomize is on, we should default minouter a little higher
% to give a chance to escape stable, non-optimal critical point
if isempty(minouter),
    if randomize,
        minouter = 2;
    else 
        minouter = 0;
    end
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Check valid parameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (minouter > maxouter)
    errstr = 'maxouter must be larger than minouter.';
    warning('RTRESGEV:InconsistentMinMaxIter', ...
            '%s\n         Resetting to defaults (0,inf).',errstr)
    minouter = 0;
    maxouter = inf;
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% If performing a parameter test, save params and exit now
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if paramtest,
    theparams.k        = k;
    theparams.p        = p;
    theparams.verbosity = verbosity;
    theparams.debug    = debug;
    theparams.tol      = tol;
    theparams.outerstop = outerstop;
    theparams.innerstop = outerstop;
    theparams.randomize = randomize;
    theparams.maxouter = maxouter;
    theparams.minouter = minouter;
    theparams.maxinner = maxinner;
    theparams.rho_prime = rho_prime;
    theparams.Delta0   = Delta0;
    theparams.Delta_bar = Delta_bar;
    theparams.kappa    = kappa;
    theparams.theta    = theta;
    theparams.fout     = fout;
    theparams.x0       = x0;
    theparams.useSA    = useSA;
    varargout{1} = theparams;
    return;
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Any fields in OPTS that were not consumed should be passed
% on to OPS. Init OPS now.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
OPS = opts;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Initialize counters/sentinals
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% counters: 
%  inner iterations
CNT_INNER = 1;
%  number of matrix-vector products with A
CNT_A     = 2;
%  number of matrix-vector products with B
CNT_B     = 3;
%  number of preconditioner applications
CNT_PREC  = 4;
%  significant flops, excluding applications of A, B and Prec
CNT_FLOPS = 5;
% reset all counters to zero, in backwards order
inccntrs(CNT_FLOPS,-inccntrs(CNT_FLOPS,0));
inccntrs(CNT_PREC ,-inccntrs(CNT_PREC ,0));
inccntrs(CNT_B    ,-inccntrs(CNT_B    ,0));
inccntrs(CNT_A    ,-inccntrs(CNT_A    ,0));
inccntrs(CNT_INNER,-inccntrs(CNT_INNER,0));


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Stop reasons for tCG
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
stop_reason = {'negative curvature',...
               'exceeded TR',...
               'inner conv-kappa',...
               'inner conv-theta',...
               'outer convergence',...
               'dimension exceeded'};


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Set initial iterate
% this generates a real x0, but the Rayleigh-Ritz process below will 
% return a complex multivector if A or B is complex
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if isempty(x0)
    x0 = randn(n,p);
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% debug hello printing
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (debug > 0)
    fprintf(fout,'DEBUG MODE\n');
    fprintf(fout,'Operation counts may not be accurate.\n');
    fprintf(fout,'n == %d\n',n);
    fprintf(fout,'k == %d\n',k);
    fprintf(fout,'p == %d\n',p);
    if ~isempty(A),
        fprintf(fout,'A is a matrix\n');
    elseif ~isempty(Aptr),
        fprintf(fout,'A is a function\n');
    else
        error('Logic error. A and Aptr are null.');
    end
    if ~isempty(B),
        fprintf(fout,'B is a matrix\n');
    elseif ~isempty(Bptr),
        fprintf(fout,'B is a function\n');
    else
        fprintf(fout,'B was empty or not specified; treating as B=I\n');
    end
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Generate first iterate 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
oiter = 0;         % outer number
X = x0;
if ~isempty(A),
    AX = A*X;           inccntrs(CNT_A,p);
else
    AX = Aptr(X);       inccntrs(CNT_A,p);
end

if ~isempty(B),
    BX = B*X;           inccntrs(CNT_B,p);
elseif ~isempty(Bptr),
    BX = Bptr(X);       inccntrs(CNT_B,p);
else
    BX = X;
end
VAV=X'*AX; VBV=X'*BX;   inccntrs(CNT_FLOPS,4*n*p^2);

[RV,xritz] = ritz(VAV,VBV);
X  =  X*RV;             %^
AX = AX*RV;             inccntrs(CNT_FLOPS,3*2*n*p^2);
BX = BX*RV;             %v



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Compute properties of initial iterate 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% f(X): X is ritz and X'*B*X = I
fx = mytrace2(X,AX);
% grad f(X)
gradfx = tangentialize(2*AX,BX);
% norm grad f(X) for first k columns of X
normgradfx = sqrt(mytrace2(gradfx(:,1:k),gradfx(:,1:k)));
normgrad0  = normgradfx;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Set effective tolerance (tolerance used for outer convergence testing)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if isequal(outerstop,'gradabs'),
    efftol = tol;
elseif isequal(outerstop,'gradrel'),
    efftol = tol*normgrad0;
elseif isequal(outerstop,'resrel'),
    efftol = tol;
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% initialize loop variables
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
total_time = 0;     % total time spent in outer loop
stop_outer = 0;     % sentinal for outer loop
num_accepted = 0;   % number of accepted proposals
Delta = Delta0;     % set trust region radius


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Record data  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
curstat.EtaNorm  = 0;
curstat.EtaNormP = 0;
% save counter data for this outer step
curstat.i = oiter;
curstat.inner = inccntrs(CNT_INNER,0);
curstat.MulA  = inccntrs(CNT_A,0);
curstat.MulB  = inccntrs(CNT_B,0);
curstat.MulP  = inccntrs(CNT_PREC,0);
curstat.flops = inccntrs(CNT_FLOPS,0);
% function information
d = mydiag2(gradfx,gradfx);
curstat.fx       = fx;
curstat.Nres     = sqrt(d/4) ./ abs(xritz);
curstat.Ngrad    = sqrt(sum(d));
curstat.Delta    = Delta;
curstat.rho      = inf;
curstat.rhonum   = inf;
curstat.rhoden   = inf;
curstat.time     = 0;
curstat.accepted = true;
% accuracy of the current iterate
if ~isempty(Vtest),
    % canonical sines
    qfx = qf(X);
    curstat.sines = svd(qfx-Vtest*(Vtest'*qfx));
    curstat.sdist = norm(asin(curstat.sines));
    % canonical cosines
    curstat.cosines = svd(Vtest'*qfx);
    curstat.cdist = norm(acos(curstat.cosines));
end
%   vector of errors of eigenvalues
if ~isempty(Ltest),
    curstat.verrors = xritz - Ltest;
end
stats(1) = curstat;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Initial printing
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if verbosity > 1,
    fprintf(fout,['**********************************',...
                  '** Init ***',...
                  '*******************************************\n']);
end
if verbosity > 0,
    fprintf(fout,['          i: _____                      ',...
             '|grad|: %16.10e     \n'],  normgradfx);
end
if verbosity > 1,
    fprintf(fout,'     f(X): % 16.9e\n',fx);
    fprintf(fout,'   Delta0: %16.10e\n',Delta0);
    if isfield(stats(end),'cdist'),
        fprintf(fout,'    cdist: %16.10e     sdist: %16.10e\n',stats(end).cdist,stats(end).sdist);
    end
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% begin outer loop
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% we cannot perform tCG if gradient is zero without randomization,
% so don't bother starting.
if normgradfx == 0.0 && randomize == 0,
    stop_outer = 1;
end
while ~stop_outer,

    % start clock for this outer iteration
    t0 = clock;

    % update counter
    oiter = oiter + 1;

    % print header
    if (verbosity > 1) || (debug > 0)
        fprintf(fout,['**********************************',...
                      '* RTRESGEV ',...
                      '*******************************************\n']);
    end


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Prepare and solve TR subproblem
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % select eta, the initial solution
    % either zero or random (in tangent plane) 
    if randomize,
        % generate a random starting vector that is:
        % * in the tangent plane
        % * in the trust region
        % * as small as possible
        eta = randn(size(X))/(2*n);    % dividing by n is enough for TR
        eta = tangentialize(eta,BX);
    else
        eta = zeros(size(X));
    end
    % simply solve the TR sub-minimization
    [eta,Aeta,Beta,Heta,EtaNormP,num_inner,stop_inner] =        ...
                                    tCG(A,Aptr,B,Bptr,Prec,k,   ...
                                        X,AX,BX,fx,gradfx,xritz,...
                                        eta,Delta,              ...
                                        kappa,theta,            ...
                                        outerstop,efftol,       ...
                                        normgrad0,maxinner,     ...
                                        innerstop,debug,fout);
    % increment inner iteration counter
    inccntrs(CNT_INNER,num_inner);

    % save length of current iterate
    curstat.EtaNorm  = sqrt(mytrace2(eta,eta));
    curstat.EtaNormP = EtaNormP;

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Debugging output
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    if (debug > 0),
        dbgprint_1(A,Aptr,B,Bptr,X,AX,BX,xritz,eta,Heta,Aeta,Beta,maxinner,fout);
    end


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Compute the retraction of the next iterate:
    %    Xprop = R(X,eta)
    % 
    % If desired, compute subspace accelerated point instead.
    % In that case, we still need f(Xprop) for computing rho, for 
    % adjusting the trust-region radius.
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    usedSA = 0;
    if (stop_inner == 1 || stop_inner == 2) && useSA,
        % use SA
        usedSA = 1;
        AA = [     X'*AX,X'*Aeta;...    %^
              zeros(p,p),eta'*Aeta];    %^
        BB = [     X'*BX,X'*Beta;...    inccntrs(CNT_FLOPS,2*3*2*n*p^2);
              zeros(p,p),eta'*Beta];    %v

        [RV,xpritz] = ritz(AA,BB,p);

        Xprop  = [ X, eta]*RV;          %^
        AXprop = [AX,Aeta]*RV;          inccntrs(CNT_FLOPS,3*2*n*p*2*p);
        BXprop = [BX,Beta]*RV;          %v

        % also need to compute fhat(eta), using (X+eta)'*(A,B)*(X+eta)
        XpAXp = AA(1:p,1:p) + AA(p+1:end,p+1:end) + AA(1:p,p+1:end) + AA(1:p,p+1:end)';
        XpBXp = BB(1:p,1:p) + BB(p+1:end,p+1:end) + BB(1:p,p+1:end) + BB(1:p,p+1:end)';
        [dummy,fRXe] = ritz(XpAXp,XpBXp);        
        fRXe = sum(fRXe);
    else
        Xprop = X+eta;            %^
        AXprop = AX+Aeta;         inccntrs(CNT_FLOPS,3*n*p);
        BXprop = BX+Beta;         %v

        XpAXp = Xprop'*AXprop;    %^
        XpBXp = Xprop'*BXprop;    inccntrs(CNT_FLOPS,2*2*n*p^2);

        [RVprop,xpritz] = ritz(XpAXp,XpBXp);
        Xprop  =  Xprop*RVprop;            %^
        AXprop = AXprop*RVprop;            inccntrs(CNT_FLOPS,3*2*n*p^2);
        BXprop = BXprop*RVprop;            %v

        fRXe = sum(xpritz);
    end

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % check the performance of the quadratic model relative to f
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % note: this uses the Hessian of X at eta, which was returned from tCG
    clear small_rhonum neg_rhonum;
    rhonum = fx-fRXe;
    rhoden = -mytrace2(gradfx,eta) - 0.5*mytrace2(Heta,eta);
    % small negatives rho counts as small rho
    % relatively small rhonum (positive or negative) should happen
    % late in the game, after the trust-region has become effectively 
    % inactive and there are no more directions of negative curvature
    if abs(rhonum/fx) < 1e-10 && (stop_inner == 3 || stop_inner == 4 || stop_inner == 5 || stop_inner == 6),
        small_rhonum = rhonum;
        small_rho    = rhonum/rhoden;
        rho = 1;
    elseif rhonum < 0,
        neg_rhonum = rhonum;
        rho = 0;
    else 
        rho = rhonum / rhoden;
    end

    % choose new TR radius based on performance
    TRadjust = '   ';
    if rho < 1/4,
        % rho is bad. shrink trust region radius.
        Delta = 1/4*Delta;
        TRadjust = 'TR-';
    elseif rho > 3/4 & (stop_inner == 2 | stop_inner == 1)
        % rho is good and we went to trust region radius. expand it.
        Delta = min(2*Delta,Delta_bar);
        TRadjust = 'TR+';
    end


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Decide whether to accept or reject
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % accept/reject proposal based on rho
    % always accept subspace-accelerated proposal
    if rho > rho_prime || usedSA,
        accepted = 1;
    else
        accepted = 0;
    end


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % If accepted, 
    % * set new iterate
    % * compute properties 
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    if accepted,
        num_accepted = num_accepted + 1;

        % update X with the proposal
        X = Xprop;
        AX = AXprop;             inccntrs(CNT_FLOPS,3*n*p); 
        BX = BXprop;
        xritz = xpritz;

        % compute properties of next iterate
        % f(X)
        fx = sum(xritz);
        % grad f(X)
        gradfx = tangentialize(2*AX,BX);
        % norm grad f(X)
        normgradfx = sqrt(mytrace2(gradfx(:,1:k),gradfx(:,1:k)));
    end % if accepted

    % stop clock for this outer step
    this_time = etime(clock,t0);
    % update total time
    total_time = total_time + this_time;

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Record data  
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % save counter data for this outer step
    curstat.i = oiter;
    curstat.inner = inccntrs(CNT_INNER,0);
    curstat.MulA  = inccntrs(CNT_A,0);
    curstat.MulB  = inccntrs(CNT_B,0);
    curstat.MulP  = inccntrs(CNT_PREC,0);
    curstat.flops = inccntrs(CNT_FLOPS,0);
    % function information
    d = mydiag2(gradfx,gradfx);
    curstat.fx       = fx;
    curstat.Nres     = sqrt(d/4) ./ abs(xritz);
    curstat.Ngrad    = sqrt(sum(d));
    curstat.Delta    = Delta;
    curstat.rho      = rho;
    curstat.rhonum   = rhonum;
    curstat.rhoden   = rhoden;
    curstat.time     = this_time;
    curstat.accepted = accepted;
    % accuracy of the current iterate
    if ~isempty(Vtest),
        % canonical sines
        qfx = qf(X);
        curstat.sines = svd(qfx-Vtest*(Vtest'*qfx));
        curstat.sdist = norm(asin(curstat.sines));
        % canonical cosines
        curstat.cosines = svd(Vtest'*qfx);
        curstat.cdist = norm(acos(curstat.cosines));
    end
    %   vector of errors of eigenvalues
    if ~isempty(Ltest),
        curstat.verrors = xritz - Ltest;
    end
    stats(end+1) = curstat;


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % verbosity 
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    if verbosity > 0,
        if accepted,
            accstr = 'acc';
        else
            accstr = 'REJ';
        end
        fprintf(fout,['%3s %3s   i: %5.5d   num_inner: %5.5d   ',...
                 '|grad|: %16.10e     %s\n'],...
                accstr,TRadjust,oiter,num_inner,normgradfx,stop_reason{stop_inner});
    end
    if verbosity > 1,
        fprintf(fout,'     f(X): % 16.9e     |eta|: %16.10e     Delta: %16.10e\n',stats(end).fx,stats(end).EtaNormP,Delta);
        fprintf(fout,'fhat(eta): % 16.9e\n',fRXe);
            if exist('neg_rhonum'),
                fprintf(fout,'  neg rho: % 16.9e\n',neg_rhonum);
            elseif exist('small_rhonum'),
                fprintf(fout,'      rho: % 16.9e      tiny: % 16.9e\n', ...
                        small_rho,small_rhonum);
            else
                fprintf(fout,'      rho: % 16.9e\n',stats(end).rho);
            end
        if isfield(stats(end),'cdist'),
            fprintf(fout,'    cdist: %16.10e     sdist: %16.10e\n',stats(end).cdist,stats(end).sdist);
        end
        fprintf(fout,'Elapsed time is %f seconds.\n',this_time);
    end


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Check stopping criteria
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    if isequal(outerstop,'resrel'),
        outererr = 0;
        for j=1:k,
            r = AX(:,j) - BX(:,j)*xritz(j);
            if xritz(j) > 1,
                err = norm(r)/abs(xritz(j));
            else
                err = norm(r);
            end
            outererr = max(outererr, err);
        end
    else
        outererr = normgradfx;
    end
    if outererr < efftol && oiter >= minouter,
        stop_outer = 1; 
    elseif oiter >= maxouter,
        fprintf(fout,'\n*** timed out -- i == %d***\n\n',oiter);
        stop_outer = 1;
    end 

end  % of outer/TR loop (counter: k)

% grab the current leftmost space
Q = X(:,1:k);
vals = xritz(1:k);

% print footer
if (verbosity > 1) || (debug > 0)
    fprintf(fout,['*********************************************', ...
                  '*******************************************\n']);
end
if (verbosity > 0) || (debug > 0)
    fprintf(fout,'Total time is %f\n',total_time);
end

OPS.total_time = total_time;
OPS.stats = stats;

% save the data
if (nargout <= 1)
    varargout{1} = vals;
else
    varargout{1} = Q;
    varargout{2} = diag(vals);
    if (nargout >= 3)
        varargout{3} = OPS;
    end
end

return




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%% trunctated cg solver for inner problem %%%%%%%%%%%%%%%%%%%%%%%%
% solve Hess_x(eta) = -grad
% X contains the ritz vectors of A and has B-orthonormal columns
function [eta,Aeta,Beta,Heta,EtaNormP,num_inner,stop_inner] = ...
                tCG( A,Aptr,B,Bptr,Prec,k,    ...     % sys data
                     X,AX,BX,fx,gradfx,xritz, ...     % current iterate
                     eta,Delta,               ...     % initial and TR radius
                     kappa,theta,             ...     % stopping params
                     outerstop,tol,           ...     % outer stopping params
                     normgrad0,maxinner,      ...     % stopping params
                     innerstop,debug,fout);           % inner stop choices

    % all terms involving the trust-region radius will utilize an inner product
    % w.r.t. the preconditioner; this is because the iterates grow in
    % length w.r.t. the preconditioner, guaranteeing that we will not 
    % re-enter the trust-region
    % 
    % the following recurrences for Prec-based norms and inner 
    % products come from CGT2000, pg. 205, first edition
    % below, P is the preconditioner
    % 
    % <eta_k,P*delta_k> = beta_k-1 * ( <eta_k-1,P*delta_k-1> + alpha_k-1 |delta_k-1|^2_P )
    % |delta_k|^2_P = <r_k,z_k> + beta_k-1^2 |delta_k-1|^2_P
    % 
    % therefore, we need to keep track of 
    % 1)   |delta_k|^2_P 
    % 2)   <eta_k,P*delta_k> = <eta_k,delta_k>_P
    % 3)   |eta_k  |^2_P
    % 
    % initial values are given by:
    %    |delta_0|_P = <r,z>
    %    |eta_0|_P   = 0
    %    <eta_0,delta_0>_P = 0
    % because we take eta_0 = 0

    % inner stopping criteria
    STOP_MAX = 1;
    STOP_RTR = 2;
    STOP_OUTER = 4;

    CNT_INNER = 1;
    CNT_A     = 2;
    CNT_B     = 3;
    CNT_PREC  = 4;
    CNT_FLOPS = 5;

    % compute dimension of tangent space
    [n,p] = size(X);

    % dimension of the tangent space
    d = n*p - (p^2+p)/2;

    % in exact arithmetic, linear CG must converge after d steps
    % in practice, we may need more
    maxinner = min(maxinner,d);

    % determine current inner residual: r = H[eta] + gradfx
    if isequal(eta,zeros(size(eta)))
        nonzerostart = 0;
        Heta = eta; % = 0;
        Aeta = eta; % = 0;
        Beta = eta; % = 0;
        e_Pe = 0;
        r = gradfx;
    else
        nonzerostart = 1;
        % eta != 0, ergo preconditioning must be off.
        if ~isempty(Prec),
            error('RTRESGEV: Internal logic error. Randomization not compatible with preconditioning.');
        end
        [Heta,Aeta,Beta] = H(A,Aptr,B,Bptr,xritz,BX,eta);
        r = gradfx+Heta;                inccntrs(CNT_FLOPS,n*p); 
        e_Pe = mytrace2(eta,eta);
    end

    % precondition the search direction
    if ~isempty(Prec),
        z = Prec(X,BX,r);
    else
        z = r;
    end
    % compute z'*r
    z_r = mytrace2(z,r);
    d_Pd = z_r;

    % compute norm of residuals
    rnorm = sqrt(mytrace2(r,r));

    % save the original residual norms
    r0norm = rnorm;
    kconv = r0norm*kappa;
    % tconv = r0norm*(r0norm/normgrad0)^theta;
    tconv = r0norm^(theta+1);
    if debug > 2,
        fprintf(fout,'+                       |r0|: %16.10e\n',r0norm);
        fprintf(fout,'+     kconv: %16.10e   tconv: %16.10e\n',kconv,tconv);
    end

    % initial search direction = -negative (precon) residual 
    delta = -z;
    if nonzerostart,
        % eta != 0, ergo, preconditioning must be off.
        e_Pd = mytrace2(eta,delta);
    else
        e_Pd = 0;
    end

    % assume termination b/c  j == end
    stop_inner = 6;

    if maxinner == 0 && bitand(innerstop,STOP_MAX),
        eta = delta;
        e_Pe = d_Pd;
        if ~isempty(A),
            Aeta = A*eta;               inccntrs(CNT_A,p);
        else
            Aeta = Aptr(eta);           inccntrs(CNT_A,p);
        end
    
        if ~isempty(B),
            Beta = B*eta;               inccntrs(CNT_B,p);
        elseif ~isempty(Bptr),
            Beta = Bptr(eta);           inccntrs(CNT_B,p);
        else
            Beta = eta;
        end
    end

    if debug > 1,
        oldm = fx + mytrace2(eta,gradfx) + 0.5*mytrace2(eta,Heta);
        fprintf(fout,'+                     m(eta): % 16.9e\n',oldm);
        fprintf(fout,'+                  |X^T B R|: %16.10e\n',norm(BX'*r));
    end

    % begin inner/tCG loop
    iiter = 0;
    while iiter < maxinner || ~bitand(innerstop,STOP_MAX),
        iiter=iiter+1;

        if debug > 2,
            % check that delta are descent directions
            % d'*r < 0
            d_r = mytrace2(delta,r);
            fprintf(fout,'+                       d''*r: % 16.9e\n',d_r);
            if d_r > 0,
                fprintf(fout,'*** direction of ASCENT ***\n');
                keyboard;
            end
        end

        % apply Hessian
        [Hd,Ad,Bd] = H(A,Aptr,B,Bptr,xritz,BX,delta);

        d_Hd = mytrace2(delta,Hd);
        alpha = z_r/d_Hd;

        % <neweta,neweta>_P = <eta,eta>_P + 2*alpha*<eta,delta>_P + alpha*alpha*<delta,delta>_P
        e_Pe_new = e_Pe + 2.0*alpha*e_Pd + alpha*alpha*d_Pd;

        % check curvature and trust-region radius
        if d_Hd <= 0 || e_Pe_new >= Delta^2,
            % go to the edge of the trust region along this line.
            alpha = (-e_Pd + sqrt(e_Pd*e_Pd + d_Pd*(Delta^2-e_Pe))) / d_Pd;
            e_Pe_new = Delta^2;
            if d_Hd <= 0,
                stop_inner = 1;     % negative curvature
            else
                stop_inner = 2;     % exceeded trust region
            end
        end

        % compute new optimal point
        eta  =  eta+delta*alpha;        %^
        Aeta = Aeta+   Ad*alpha;        inccntrs(CNT_FLOPS,4*2*n*p); 
        Beta = Beta+   Bd*alpha;        %v
        Heta = Heta+   Hd*alpha;        %v
        e_Pe = e_Pe_new;

        if debug > 1,
            newm = fx + mytrace2(eta,gradfx) + 0.5*mytrace2(eta,Heta);
            fprintf(fout,'+                     m(eta): % 16.9e\n',newm);
            if newm > oldm,
                fprintf(fout,'*** model has INCREASED ***\n');
                % keyboard;
            end
            oldm = newm;
        end

        if stop_inner == 1 || stop_inner == 2,
            break;
        end

        % update the residual
        r = r + Hd*alpha;               inccntrs(CNT_FLOPS,2*n*p); 
        % re-tangentialize r
        r = tangentialize(r,BX);

        if debug > 1,
            % target equation is Hess[eta] = -gradfx
            % residual is Hess[eta] + gradfx
            actHeta = H(A,Aptr,B,Bptr,xritz,BX,eta);
            actr    = actHeta + gradfx;
            fprintf(fout,'+   |H[eta] - actual H[eta]|: %16.10e\n',norm(actHeta-Heta)/norm(Heta));
            fprintf(fout,'+             |r - actual r|: %16.10e\n',norm(actr-r)/norm(r));
            fprintf(fout,'+                  |X^T B R|: %16.10e\n',norm(BX'*r));
        end

        % compute norm
        oldrnorm = rnorm;
        rnorm = sqrt(mytrace2(r,r));
        if debug > 2,
            fprintf(fout,'+                        |r|: %16.10e\n',rnorm);
        end

        % check stopping criteria
        if bitand(innerstop,STOP_RTR),
            if rnorm <= min(tconv,kconv),
                if tconv < kconv,
                    stop_inner = 4;
                else
                    stop_inner = 3;
                end
                break;
            end
        end

        % apply preconditioner 
        if ~isempty(Prec),
            % precondition the search direction
            z = Prec(X,BX,r);
        else
            z = r;
        end

        % save the old z'*r
        zold_rold = z_r;
        % compute new z'*r
        z_r = mytrace2(z,r);
        % compute new search direction
        beta = z_r/zold_rold;
        delta = -z + delta*beta;        inccntrs(CNT_FLOPS,2*n*p); 
        % update new P-norms and P-dots
        e_Pd = beta*(e_Pd + alpha*d_Pd);
        d_Pd = z_r + beta*beta*d_Pd;

    end % end of tCG loop
    EtaNormP = sqrt(e_Pe);
    num_inner = min(iiter,maxinner);
    return;



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%% Hessian of f %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% apply Hess of f at X to eta
function [He,Ae,Be] = H(A,Aptr,B,Bptr,xritz,BX,eta);
    
    CNT_INNER = 1;
    CNT_A     = 2;
    CNT_B     = 3;
    CNT_PREC  = 4;
    CNT_FLOPS = 5;
    [n,p] = size(eta);
    
    % apply Hessian: two choices
    % exact - H_x(eta) = 2 * PBx_Bx * [A*eta - B*eta*(X'*A*X)]
    if ~isempty(A),
        Ae = A*eta;                       inccntrs(CNT_A,p);
    else
        Ae = Aptr(eta);                   inccntrs(CNT_A,p);
    end
    
    if ~isempty(B),
        Be = B*eta;                       inccntrs(CNT_B,p);
    elseif ~isempty(Bptr),
        Be = Bptr(eta);                   inccntrs(CNT_B,p);
    else
        Be = eta;
    end
    
    % intermediate value
    He = Ae - Be*spdiags(xritz,0,p,p);    inccntrs(CNT_FLOPS,2*n*p);
    % apply projector P_Bx,BX
    He = tangentialize(2*He,BX);
    return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%% Compute the first p ritz vectors of X %%%%%%%%%%%%%%%%%%%%%%%%
function [rv,rl] = ritz(VAV,VBV,p);
% compute the first p ritz values and primitize ritz vectors 
% of A,B with respect to V 
    
    if nargin < 3,
        p = size(VAV,2);
    end
    
    % make VAV and VBV Hermitian in memory, using the upper triangular part
    VAV = mysym(VAV);
    VBV = mysym(VBV);
    
    [rv,rl] = eig(VAV,VBV);
    rl = diag(rl);
    [rl,y] = sort(rl,'ascend');
    rv = rv(:,y(1:p));
    rl = rl(1:p);
return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%  Tangentialize  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function z = tangentialize(y,BX);
% 
% Orthogonalize y against the space generated by X, in the B-inner product
%
%  z = [I - X*inv(X'*B*B*X)*X'*B] * y
    CNT_FLOPS = 5;
    [n,p] = size(y);
    
    den = BX'*BX;                   inccntrs(CNT_FLOPS,n*p^2); % den is Hermitian
    R = chol(den);
    tmp = R\(R'\(BX'*y));           inccntrs(CNT_FLOPS,2*n*p^2);
    z = y - BX*tmp;                 inccntrs(CNT_FLOPS,2*n*p^2+n*p);
    return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  dbgprint_1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function dbgprint_1(A,Aptr,B,Bptr,X,AX,BX,xritz,eta,Heta,Aeta,Beta,maxinner,fout);
    p = size(X,2);
    % A*eta == Aeta
    if ~isempty(A),
        err = norm(Aeta- A*eta,'fro') / norm(A*eta,'fro');
    else
        err = norm(Aeta-Aptr(eta),'fro') / norm(Aptr(eta),'fro');
    end
    fprintf(fout,'+             |A eta - Aeta|: %16.10e   (A*eta cache accuracy)\n',err);
    if ~isempty(B),
        % B*eta == Beta
        err = norm(Beta-B*eta,'fro') / norm(B*eta,'fro');
    elseif ~isempty(Bptr),
        % B*eta == Beta
        err = norm(Beta-Bptr(eta),'fro') / norm(Bptr(eta),'fro');
    else
        % eta == Beta
        err = norm(Beta-eta,'fro') / norm(eta,'fro');
    end
    fprintf(fout,'+             |B eta - Beta|: %16.10e   (B*eta cache accuracy)\n',err);
    % X on manifold
    err = norm(X'*BX - eye(p),'fro');
    fprintf(fout,'+              |X^T B X - I|: %16.10e   (X on manifold)\n',err);
    % eta \in T_x M
    err = norm(X'*Beta,'fro');
    fprintf(fout,'+              | X^T B eta |: %16.10e   (TR solution tangentiality)\n',err);
    if maxinner > 0,
        % H*eta == Heta
        actHeta = H(A,Aptr,B,Bptr,xritz,BX,eta);
        err = norm(Heta- actHeta,'fro') / norm(actHeta,'fro');
        fprintf(fout,'+             |H eta - Heta|: %16.10e   (H*eta cache accuracy)\n',err);
    end
    return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   mytrace2   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function t = mytrace2(X,Y)
    [n,p] = size(X);
    t = 0;
    for j=1:p,
        t = t + real(X(:,j)'*Y(:,j));
    end
    CNT_FLOPS = 5; inccntrs(CNT_FLOPS,2*n*p);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   mydiag2    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function d = mydiag2(X,Y)
    [n,p] = size(X);
    d = zeros(p,1);
    for j=1:p,
        d(j) = real(X(:,j)'*Y(:,j));
    end
    CNT_FLOPS = 5; inccntrs(CNT_FLOPS,2*n*p);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%  inccntrs  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function c = inccntrs(cntr,inc)
   persistent cntrs;
   if isempty(cntrs),
      cntrs = zeros(cntr,1);
   end
   cntrs(cntr) = cntrs(cntr) + inc;
   c = cntrs(cntr);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  mysym %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function S = mysym(M)
    S = diag(real(diag(M))) + triu(M,1) + triu(M,1)';


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  get_args   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [A,Aptr,B,Bptr,n,k,opts] = get_args(varargin);
% Process inputs and do error-checking 
    
    A = []; 
    Aptr = [];
    B = [];
    Bptr = [];

    if isa(varargin{1},'double')
        A = varargin{1};
        Amatrix = 1;
    else
        Aptr = fcnchk(varargin{1});
        Amatrix = 0;
    end

    if Amatrix
        [m,n] = size(A);
        if (m ~= n) || ~isequal(A,A'),
            error(['A must be a Hermitian matrix or'...
                   ' a function which computes A*X.'])
        end
        clear m;
    else
        nstr = 'Size of problem, ''n'', must be a positive integer.';
        if nargin < 2,
            error(nstr);
        end
        n = varargin{2};
        if ~isequal(size(n),[1,1]) | ~isreal(n)
            error(nstr)
        end
        if (round(n) ~= n)
            warning('RTRESGEV:NonIntegerSize',['%s\n         ' ...
                'Rounding input size.'],nstr)
            n = round(n);
        end
        if issparse(n)
            n = full(n);
        end
        clear nstr
    end

    Bnotthere = 0;
    Bmatrix = 0;
    Bstr = sprintf(['Generalized matrix B must be the same size as A and' ...
                    ' a Hermitian positive-definite matrix or a function which computes B*X.']);
    if (nargin < (3-Amatrix-Bnotthere))
        B = [];
        Bnotthere = 1;
    else
        Bk = varargin{3-Amatrix-Bnotthere};
        if isempty(Bk) % allow rtresgev(A,[],k,sigma,opts);
            B = Bk;
        else
            if isa(Bk,'double'),
                % could be B matrix or K
                if isequal(size(Bk),[1,1]) & (n ~= 1)
                    % it's K
                    B = [];
                    k = Bk;
                    Bnotthere = 1;
                else % rtresgev(9,8,...) assumes A=9, B=8, ... NOT A=9, k=8, ...
                    % it's B matrix
                    B = Bk;
                    if ~isequal(size(B),[n,n]) || ~isequal(B,B'),
                        error(Bstr)
                    else
                        Bmatrix = 1;
                    end
                end
            elseif Amatrix,
                % B is a function, which reqires A function
                error(['B must be specified as a matrix when A is specified as a matrix.'])
            else
                % is B function, and everything is okay
                [Bptr,msg] = fcnchk(Bk);
                if isempty(Bptr),
                    error('Error processing input arguments. You probably forgot to specify size of A. See ''help irtresgev''.');
                end
                Bmatrix = 0;
            end
        end
    end
    clear Bstr

    % Amatrix implies args are
    % RTRESGEV(A,K,OPTS)        or
    % RTRESGEV(A,B,K,OPTS)
    if Amatrix & ((nargin - ~Bnotthere)>3)
        error('Too many inputs.')
    end

    if (nargin < (4-Amatrix-Bnotthere))
        % RTRESGEV(A,B)
        % RTRESGEV(AFUN,N,OPTS)
        % RTRESGEV(AFUN,N,BFUN,OPTS)
        k = min(n,6);
    else
        % RTRESGEV(A,K)
        % RTRESGEV(A,B,K)
        % RTRESGEV(AFUN,N,K,OPTS)
        % RTRESGEV(AFUN,N,BFUN,K,OPTS)
        k = varargin{4-Amatrix-Bnotthere};
    end
    
    kstr = ['Number of eigenvalues requested, k, must be a' ...
            ' positive integer <= n.'];
    if ~isa(k,'double') | ~isequal(size(k),[1,1]) | ~isreal(k) | (k>n)
        error(kstr)
    end
    if issparse(k)
        k = full(k);
    end
    if (round(k) ~= k)
        warning('RTRESGEV:NonIntegerEigQty',['%s\n         ' ...
                'Rounding number of eigenvalues.'],kstr)
        k = round(k);
    end

    if (nargin >= (5-Amatrix-Bnotthere))
        opts = varargin{5-Amatrix-Bnotthere};
        if ~isa(opts,'struct')
            error('Options argument must be a structure.')
        end
        if (nargin > (5-Amatrix-Bnotthere))
            error('Too many inputs.')
        end
    else
        % create an empty struct to return
        opts = struct();
    end
    return; 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  get_string %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [ret,opts] = get_string(opts,argname,argdesc,def,options);
    % Process inputs and do error-checking 
    errstr = sprintf('%s opts.%s must be: \n',argdesc,argname);
    errstr = [errstr, sprintf('%s  ',options{:})];
    if isfield(opts,argname)
        ret = getfield(opts,argname);
        valid = 0;
        if isstr(ret),
            for j = 1:length(options),
                if isequal(ret,options{j}),
                    valid = 1;
                    break;
                end
            end
        end
        if ~valid,
            error(errstr);
        end
        
        % remove field from opts
        opts = rmfield(opts,argname);
    else
        ret = def;
    end
    return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  get_int %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [ret,opts] = get_int(opts,argname,argdesc,def,lb,ub);
    if nargin < 6
        ub = inf;
        if nargin < 5,
            lb = -inf;
        end
    end
    % Process inputs and do error-checking 
    errstr = sprintf('%s opts.%s must be an integer in [%d,%d]',...
                     argdesc,argname,lb,ub);
    if isfield(opts,argname)
        ret = getfield(opts,argname);
        valid = 0;
        % check that it is an int
        if isnumeric(ret),
            ret = floor(ret);
            % check size (1 by 1) and bounds
            if isequal(size(ret),[1 1]) && lb <= ret && ret <= ub,
                valid = 1;
            end
        end
        if ~valid,
            error(errstr);
        end
        % remove field from opts
        opts = rmfield(opts,argname);
    else
        ret = def;
    end
    return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  get_handle %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [ret,opts] = get_handle(opts,argname,argdesc,def);
    if nargin < 4
        def = [];
    end
    % Process inputs and do error-checking 
    errstr = sprintf('%s opts.%s must be a function handle.',...
                     argdesc,argname);
    if isfield(opts,argname)
        ret = getfield(opts,argname);
        if ~isempty(ret),
            % check that it is a function handle
            ret = fcnchk(ret);
        end
        % remove field from opts
        opts = rmfield(opts,argname);
    else
        ret = def;
    end
    return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  get_float %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [ret,opts] = get_float(opts,argname,argdesc,def,lb,ub,sz);
% only test bounds if sz == [1 1]
    if nargin < 7,
        sz = [];
    end
    if nargin < 6
        ub = inf;
    end
    if nargin < 5,
        lb = -inf;
    end
    % Process inputs and do error-checking 
    if isequal(sz,[1 1]),
        errstr = sprintf('%s opts.%s must be a scalar in [%d,%d]',...
                         argdesc,argname,lb,ub);
    elseif ~isempty(sz),
        errstr = sprintf('%s opts.%s must be an array of dimension %d by %d',...
                         argdesc,argname,sz(1),sz(2));
    
    else
        errstr = sprintf('%s opts.%s must be a numeric array.\n',...
                         argdesc,argname);
    end
    if isfield(opts,argname)
        ret = getfield(opts,argname);
        valid = 0;
        % check that it is an int
        if isnumeric(ret),
            ret = double(ret);
            % no size request, no checks at all
            if isempty(sz),
                valid = 1;
            % if scalar requested, perform bounds check
            elseif isequal(sz,[1 1]),
                if isequal(sz,size(ret)) && lb <= ret && ret <= ub,
                    valid = 1;
                end
            % if matrix requested, just check size
            elseif isequal(sz,size(ret)),
                valid = 1;
            end
        end
        if ~valid,
            error(errstr);
        end
        % remove field from opts
        opts = rmfield(opts,argname);
    else
        ret = def;
    end
    return;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  q factor   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function Q = qf(M)
   [Q,ignore] = qr(M,0);
