function varargout = rtresgev(varargin);
% RTRESGEV   Find a few eigenpairs of a generalized symmetric definite eigenvalue problem using the Riemannian Trust-Region method.
%
%   Find a few smallest generalized eigenvalues and eigenvectors of the matrices A and B.
%   Uses an implentation of the Riemannian Trust-Region algorithm.
%
%   D = RTRESGEV(A) returns a vector of A's 6 smallest signed eigenvalues.
%   A must be symmetric and should be large and sparse.
%
%   [V,D] = RTRESGEV(A) returns a diagonal matrix D of A's 6 smallest signed
%   eigenvalues and a matrix V whose columns are the corresponding eigenvectors.
%
%   [V,D,OPS] = RTRESGEV(A) also returns a structure containing information
%   on the performance of the algorithm.
%
%   RTRESGEV(A,B) solves the generalized eigenvalue problem A*V == B*V*D.  
%   B must be symmetric positive definite and the same size as A.
%   The positive definiteness of B is taken for granted, and will not
%   be tested.
%   RTRESGEV(A,[],...) indicates the standard eigenvalue problem A*V == V*D.
%
%   RTRESGEV(A,K) and RTRESGEV(A,B,K) return the K smallest signed eigenvalues.
%
%   RTRESGEV(A,K,OPTS) and RTRESGEV(A,B,K,OPTS) specify options:
%     OPTS.verbosity: verbose printing at each outer step [0 | {1} | 2]
%     OPTS.v0: basis for the initial iterate [{randomly generated}]
%     OPTS.tol: outer convergence tolerance [scalar | {1e-6}]
%     OPTS.outerstop: mechanism for outer convergence 
%          'gradabs' absolute tolerance: quit when |grad_i| < tol
%          'gradrel' relative tolerance: quit when |grad_i|/|grad_0| < tol  
%          'resrel'  relative tolerance: quit when |res_i|/|lambda_i| < tol (default)
%
%   RTRESGEV(AFUN,N,K,OPTS) and RTRESGEV(AFUN,N,BFUN,K,OPTS) accepts the
%   functions AFUN and BFUN instead of the matrix A, where N is the size of A.
%   The functions are called like:
%      y = feval(AFUN,x);
%   and
%      y = feval(BFUN,x);
%
%     ---------------------- ADVANCED OPTIONS -----------------------
%
%     Note, any field in OPTS that is not listed below will be passed  
%     out of RTRESGEV in OPS. For example,                             
%       >> options.desc = 'my favorite test';                            
%       >> [V,L,O] = RTRESGEV(...,options);                              
%       >> O.desc
%       ans = my favorite test
%
%     OPTS.fout: file identifier for printing {1}
%          All print statements are directed to fout.
%          Typical choices are:
%             1, standard output,
%             2, standard error,
%             or a file handle returned by fopen.
%          RTRESGEV will not close the file handle.
%     OPTS.debug: debugging checks and outputs [{0} | 1]
%     OPTS.p: block size for problem [integer | {k}]
%          Must satisfy k <= OPTS.p <= n
%     OPTS.paramtest: parameter test          [{0} | 1]
%          Do not actually run the algorithm, but instead
%          test the given parameters, set the defaults, and return them
%          in a struct.
%          Example: 
%             defaultopts = rtresgev(A,B,k,opts);
%     OPTS.Vtest: space against which to test at each step
%          Canonical sines are computed w.r.t. the euclidean inner product and 
%          are stored in OPS.sines. (See note in code header for methodology.)
%     OPTS.Ltest: values against which to test at each step 
%          Errors between the values in Ltest and the current eigenvalue 
%          estimates are stored in OPS.verrors. OPTS.Ltest must contain
%          p values, stored in a vector or a diagonal matrix.
%     OPTS.maxinner: max number of inner iterations
%          Default is the dimension of the tangent space:
%             d = n*p - (p^2+p)/2
%     OPTS.innerstop: available stopping criteria for the model minimization
%          A bitmask of three bits, the default is 3=1+2=maxinner or RTR model grad-based
%             bitand(innerstop,1): maxinner iterations reached
%             bitand(innerstop,2): RTR model gradient-based stop is enabled (kappa/theta)
%             bitand(innerstop,4): stop inner when outer stopping criteria is satisfied
%          At least one of these bits must be set.
%     OPTS.maxouter: max number of outer iterations  [integer | {100}]
%     OPTS.minouter: min number of outer iterations  [integer]
%          When OPTS.randomize == 1, default is 2
%          Otherwise, default is 0.
%          Useful when OPTS.randomize == 1, to allow the algorithm
%          opportunity to escape a non-optimal critical point.
%          Note, the algorithm always performs one outer step before
%          checking the outer stopping criteria, as long as the gradient
%          is non-zero.
%     OPTS.randomize: randomize model minimization   [{0} | 1]
%          This randomizes the initial solution to the model minimization.
%          Otherwise, initial solution is the origin. See OPTS.min_outer.
%          Randomization must be disabled if a preconditioner is used.
%     OPTS.kappa: convergence: see paper   [scalar | {0.1}]
%          kappa effects a linear reduction in the 
%          residual before truncating the inner solver.
%          Must satisfy 0 < kappa <= 1
%     OPTS.theta: convergence: see paper   [scalar | {1.0}]
%          theta effects a superlinear convergence rate
%          by introducing a more strict stopping criteria on 
%          the inner problem, as the algorithm nears solution.
%          The algorithm seeks order theta+1 convergence.
%          Must satisfy 0 <= theta <= 2 for a maximum cubic 
%          rate of convergence
%     OPTS.rho_prime: TR: acceptance ratio [scalar | {0.1}]
%          The ratio between the reduction in the Rayleigh
%          quotient and reduction in the quadratic model
%          is used as an acceptance criteria for a new proposal.
%          Should satisfy 0 < rho_prime < .25
%     OPTS.Delta_bar: TR: max trust region size [scalar | {inf}]
%     OPTS.Delta0: TR: initial trust region size [scalar | {p*sqrt(3)}]
%          Must satisfy 0 <= Delta0 <= Delta_bar
%     OPTS.Prec: preconditioner for the inner iteration.
%
%     Regarding Preconditioning: A preconditioner for the inner iteration may be 
%        specified via OPTS.Prec. The preconditioner will be invoked as follows:
%           Peta = feval(OPTS.Prec,X,BX,eta);
%        The input to the preconditioner will always be B-orthogonal.
%        The preconditioner must return a matrix which is B-orthogonal to X.
%        See, for example, 'precDLP'
%
%    Examples:
%       A = delsq(numgrid('C',15));
%       B = speye(15);
%       [V,D] = rtresgev(A,B,5);
%
% See also IRTRESGEV

% Copyright 2008: CG Baker, P-A Absil, KA Gallivan
% Argument processing ripped-off from MATLAB's eigs.m

% Subspaces are represented by B-orthonormal bases. Cosines/sines of angles 
% between two such subspaces, relative to the B-inner product, can be computed 
% as follows:
% Given Qf,Qg   Qf'*B*Qf = I = Qg'*B*Qg
% cosines = svd(Qf'*B*Qg)
% sines   = svd(Qg-Qf*(Qf'*B*Qg))
% See Knyazev and Argentati, "Principal Angles Between Subspaces In An A-Based 
%   Scalar Product", SIAM J. Sci. Comput., Vol. 23, No. 6, pp. 2009-2041
%
% However, in this code, we compute the canonical angles using the Euclidean
% inner product (B=I):
% cosines = svd(Qf'*Qg)
% sines   = svd(Qg-Qf*(Qf'*Qg))
% where Qf and Qg are I-orthonormal bases.
% See Golub and Van Loan, "Matrix Computations", 1996
% This is done by taking the B-orthonormal bases and finding I-orthonormal
% bases for the same subspaces.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% we accept at most three output args: V, L and Ops
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (nargout > 3)
    error('Too many output arguments. See ''help rtresgev''')
end
if length(varargin) < 1,
    error('Not enough input arguments. See ''help rtresgev'' for more.');
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% grab input arguments: A,B,K,OPTS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[A,B,n,k,opts] = get_args(varargin{:});

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% initialize parameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% strings
outerstop   = 'resrel';
% ints
p           = k;
paramtest   = 0;
maxouter    = 100;
minouter    = [];
maxinner    = inf;
innerstop   = 3;    % STOP_MAX + STOP_RTR
verbosity   = 1;
randomize   = 0;
debug       = 0;
% floats
tol         = 1e-6;
Delta0      = p*sqrt(3);
Delta_bar   = inf;
rho_prime   = 0.1;
kappa       = 0.1;
theta       = 1.0;
% arrays
v0          = [];
Prec        = [];
Vtest       = [];
Ltest       = [];


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% get parameters from opts
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% strings
[outerstop,opts] = get_string(opts,'outerstop','Outer convergence test',...
                    outerstop,{'gradabs','gradrel','resrel'});

% ints
[p,opts] = get_int(opts,'p','Block size',p,k,n);
[maxinner,opts] = ...
    get_int(opts,'maxinner','Maximum number of inner iterations', maxinner,0);
[maxouter,opts] = ...
    get_int(opts,'maxouter','Maximum number of outer iterations', maxouter,0);
[minouter,opts] = ...
    get_int(opts,'minouter','Minimum number of outer iterations', minouter,0);
[innerstop,opts] = ...
    get_int(opts,'innerstop','Inner stopping criteria',innerstop,1,7);
[verbosity,opts] = get_int(opts,'verbosity','Diagnostic level',verbosity);
[randomize,opts] = get_int(opts,'randomize','Randomize flag',randomize);
[paramtest,opts] = get_int(opts,'paramtest','Parameter test flag',paramtest);
[debug,opts] = get_int(opts,'debug','Debugging flag',debug);
[fout,opts]    = get_int(opts,'fout','File number for output',1);

% scalar floats
[tol,opts] = get_float(opts,'tol','Convergence tolerance',tol,0,inf,[1 1]);
[kappa,opts] = get_float(opts,'kappa','Convergence parameter',kappa,0,1,[1 1]);
[theta,opts] = ...
    get_float(opts,'theta','Convergence parameter',theta,0,2,[1 1]);
[rho_prime,opts] = ...
    get_float(opts,'rho_prime','Acceptance parameter', rho_prime,0,.25,[1 1]);
[Delta_bar,opts] = ...
    get_float(opts,'Delta_bar','Trust region parameter', Delta_bar,0,inf,[1 1]);
[Delta0,opts] = ...
    get_float(opts,'Delta0','Trust region parameter',Delta0,0,Delta_bar,[1 1]);

% arrays
[v0,opts] = get_float(opts,'v0','Initial subspace',v0);
if ~isempty(v0) && ~isequal(size(v0),[n p]),
    errstr = 'v0 must be n by p.';
    warning('RTRESGEV:InconsistentV0', ...
            '%s\n         Selecting random initial point.',errstr)
end
% Vtest requires extra testing and work
[Vtest,opts] = get_float(opts,'Vtest','Test basis',Vtest);
if ~isempty(Vtest),
   if ~isequal(size(Vtest,1),[n])
       error(['Test basis opts.Vtest must be n-by-*.'])
   end
   % convert to orthonormal basis
   Vtest = qf(Vtest);
end
[Ltest,opts] = get_float(opts,'Ltest','Test values',Ltest);
if ~isempty(Ltest),
    if (size(Ltest,1) == size(Ltest,2)),
        Ltest = diag(Ltest);
    elseif size(Ltest,1) == 1,
        Ltest = Ltest';
    end
    if size(Ltest,2) ~= 1,
        error(['Test values opts.Ltest must be a ',...
               'vector or a diagonal matrix.'])
    end
    Ltest = sort(Ltest,1,'ascend');
end

% get precon
[Prec,opts] = get_handle(opts,'Prec','Preconditioner',[]);
if ~isempty(Prec),
    if randomize,
        warning('Randomization not compatible with preconditioning. Disabling randomization.');
        randomize = 0;
    end
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Set defaults
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% if randomize is on, we should default minouter a little higher
% to give a chance to escape stable, non-optimal critical point
if isempty(minouter),
    if randomize,
        minouter = 2;
    else 
        minouter = 0;
    end
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Check valid parameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (minouter > maxouter)
    errstr = 'maxouter must be larger than minouter.';
    warning('RTRESGEV:InconsistentMinMaxIter', ...
            '%s\n         Resetting to defaults (0,inf).',errstr)
    minouter = 0;
    maxouter = inf;
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% If performing a parameter test, save params and exit now
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if paramtest,
    theparams.k        = k;
    theparams.p        = p;
    theparams.verbosity = verbosity;
    theparams.debug    = debug;
    theparams.tol      = tol;
    theparams.outerstop = outerstop;
    theparams.innerstop = outerstop;
    theparams.randomize = randomize;
    theparams.maxouter = maxouter;
    theparams.minouter = minouter;
    theparams.maxinner = maxinner;
    theparams.rho_prime = rho_prime;
    theparams.Delta0   = Delta0;
    theparams.Delta_bar = Delta_bar;
    theparams.kappa    = kappa;
    theparams.theta    = theta;
    theparams.fout     = fout;
    theparams.v0       = v0;
    varargout{1} = theparams;
    return;
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Any fields in OPTS that were not consumed should be passed
% on to OPS. Init OPS now.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
OPS = opts;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Initialize counters/sentinals
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% counters: 
%  inner iterations
CNT_INNER = 1;
%  number of matrix-vector products with A
CNT_A     = 2;
%  number of matrix-vector products with B
CNT_B     = 3;
%  number of preconditioner applications
CNT_PREC  = 4;
%  significant flops, excluding applications of A, B and Prec
CNT_FLOPS = 5;
% reset all counters to zero, in backwards order
inccntrs(CNT_FLOPS,-inccntrs(CNT_FLOPS,0));
inccntrs(CNT_PREC ,-inccntrs(CNT_PREC ,0));
inccntrs(CNT_B    ,-inccntrs(CNT_B    ,0));
inccntrs(CNT_A    ,-inccntrs(CNT_A    ,0));
inccntrs(CNT_INNER,-inccntrs(CNT_INNER,0));


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Stop reasons for tCG
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
stop_reason = {'negative curvature',...
               'exceeded trust region',...
               'reached target residual - kappa',...
               'reached target residual - theta',...
               'outer condition met',...
               'dimension exceeded'};


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Set initial iterate
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if isempty(v0)
    v0 = randn(n,p);
end
v0 = qf(v0);            inccntrs(CNT_FLOPS,4*n*p^2); 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% debug hello printing
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (debug > 0)
    fprintf(fout,'DEBUG MODE\n');
    fprintf(fout,'Operation counts may not be accurate.\n');
    fprintf(fout,'n == %d\n',n);
    fprintf(fout,'k == %d\n',k);
    fprintf(fout,'p == %d\n',p);
    if isa(A,'double'),
        fprintf(fout,'A is a matrix\n');
    else
        fprintf(fout,'A is a function\n');
    end
    if isempty(B),
        fprintf(fout,'B was empty or not specified; treating as B=I\n');
    elseif isa(B,'double'),
        fprintf(fout,'B is a matrix\n');
    else
        fprintf(fout,'B is a function\n');
    end
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Generate first iterate 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
i = 0;         % outer number
X = v0;
if isa(A,'double')
    AX = A*X;           inccntrs(CNT_A,p);
else
    AX = A(X);          inccntrs(CNT_A,p);
end

if isempty(B),
    BX = X;
elseif isa(B,'double'),
    BX = B*X;           inccntrs(CNT_B,p);
else
    BX = B(X);          inccntrs(CNT_B,p);
end
VAV=X'*AX; VBV=X'*BX;   inccntrs(CNT_FLOPS,4*n*p^2);

[RV,xritz,rrank] = ritz_svd(VAV,VBV);
X  =  X*RV;             %^
AX = AX*RV;             inccntrs(CNT_FLOPS,3*2*n*p^2);
BX = BX*RV;             %v



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Compute properties of initial iterate 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% f(X): X is ritz and X'*B*X = I
fx = mytrace2(X,AX);
% grad f(X)
gradfx = tangentialize(2*AX,BX);
% norm grad f(X) for first k columns of X
normgradfx = sqrt(mytrace2(gradfx(:,1:k),gradfx(:,1:k)));
normgrad0  = normgradfx;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Set effective tolerance (tolerance used for outer convergence testing)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if isequal(outerstop,'gradabs'),
    efftol = tol;
elseif isequal(outerstop,'gradrel'),
    efftol = tol*normgrad0;
elseif isequal(outerstop,'resrel'),
    efftol = tol;
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% initialize loop variables
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
total_time = 0;     % total time spent in outer loop
stop_outer = 0;     % sentinal for outer loop
num_accepted = 0;   % number of accepted proposals
Delta = Delta0;     % set trust region radius


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Record data  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
curstat.EtaNorm  = 0;
curstat.EtaNormP = 0;
% save counter data for this outer step
curstat.i = i;
curstat.inner = inccntrs(CNT_INNER,0);
curstat.MulA  = inccntrs(CNT_A,0);
curstat.MulB  = inccntrs(CNT_B,0);
curstat.MulP  = inccntrs(CNT_PREC,0);
curstat.flops = inccntrs(CNT_FLOPS,0);
% function information
d = mydiag2(gradfx,gradfx);
curstat.fx       = fx;
curstat.Nres     = sqrt(d/4) ./ abs(xritz);
curstat.Ngrad    = sqrt(sum(d));
curstat.Delta    = Delta;
curstat.rho      = inf;
curstat.rhonum   = inf;
curstat.rhoden   = inf;
curstat.time     = 0;
curstat.accepted = true;
% accuracy of the current iterate
%   canonical sines
if ~isempty(Vtest),
    qfx = qf(X);
    curstat.sines = svd(qfx-Vtest*(Vtest'*qfx));
end
%   vector of errors of eigenvalues
if ~isempty(Ltest),
    curstat.verrors = xritz - Ltest;
end
stats(1) = curstat;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Initial printing
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (verbosity > 1) || (debug > 0)
    fprintf(fout,['********************************* Init ', ...
             '***********************************\n']);
end
if verbosity > 0,
    fprintf(fout,['          i: _____                          ',...
             '|grad|: %12.6e     \n'],  normgradfx);
end
if verbosity > 1,
    fprintf(fout,'       f(X) : %d\n',fx);
    fprintf(fout,'     Delta0 : %f\n',Delta0);
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% begin outer loop
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% we cannot perform tCG if gradient is zero without randomization,
% so don't bother starting.
if normgradfx == 0.0 && randomize == 0,
    stop_outer = 1;
end
while ~stop_outer,

    % start clock for this outer iteration
    t0 = clock;

    % update counter
    i = i + 1;

    % print header
    if (verbosity > 1) || (debug > 0)
        fprintf(fout,['********************************** RTRESGEV ',...
                 '***********************************\n']);
    end


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Prepare and solve TR subproblem
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % select eta, the initial solution
    % either zero or random (in tangent plane) 
    if randomize,
        % generate a random starting vector that is:
        % * in the tangent plane
        % * in the trust region
        % * as small as possible
        eta = randn(size(X))/(2*n);    % dividing by n is enough for TR
        eta = tangentialize(eta,BX);
    else
        eta = zeros(size(X));
    end
    % simply solve the TR sub-minimization
    [eta,Aeta,Beta,Heta,EtaNormP,num_inner,stop_inner] =        ...
                                    tCG(A,B,Prec,k,             ...
                                        X,AX,BX,fx,gradfx,      ...
                                        eta,Delta,              ...
                                        kappa,theta,            ...
                                        outerstop,efftol,       ...
                                        normgrad0,maxinner,     ...
                                        innerstop,debug,fout);
    % increment inner iteration counter
    inccntrs(CNT_INNER,num_inner);

    % save length of current iterate
    curstat.EtaNorm  = sqrt(mytrace2(eta,eta));
    curstat.EtaNormP = EtaNormP;

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Debugging output
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    if (debug > 0),
        dbgprint_1(A,B,X,AX,BX,eta,Heta,Aeta,Beta,maxinner,fout);
    end

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Compute the retraction of the proposal:
    %    Xprop = R(X,eta)
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    Xprop = X+eta;            %^
    AXprop = AX+Aeta;         inccntrs(CNT_FLOPS,3*n*p);
    BXprop = BX+Beta;         %v

    XpAXp = Xprop'*AXprop;    %^
    XpBXp = Xprop'*BXprop;    inccntrs(CNT_FLOPS,2*2*n*p^2);
    
    [RVprop,xpritz,xprrank] = ritz_svd(XpAXp,XpBXp);
    Xprop  =  Xprop*RVprop;            %^
    AXprop = AXprop*RVprop;            inccntrs(CNT_FLOPS,3*2*n*p^2);
    BXprop = BXprop*RVprop;            %v
    
    fxprop = mytrace2(Xprop,AXprop);

    % check the performance of the quadratic model relative to f
    % note: this uses the Hessian of X at eta, which was returned
    % from the tCG return
    rhonum = fx-fxprop;
    rhoden = -mytrace2(gradfx,eta) - 0.5*mytrace2(Heta,eta);
    % small negatives rho counts as small rho
    neg_rhonum = 0;
    small_rhonum = 0;
    % relatively small rhonum (positive or negative) should happen
    % late in the game, after the trust-region has become effectively 
    % inactive and there are no more directions of negative curvature
    if abs(rhonum/fx) < 1e-10 && (stop_inner == 3 || stop_inner == 4 || stop_inner == 5),
        small_rhonum  = rhonum;
        small_rho     = rho;
        rho = 1;
    elseif rhonum < 0,
        neg_rhonum = rhonum;
        rho = 0;
    else 
        rho = rhonum / rhoden;
    end

    % choose new TR radius based on performance
    TRadjust = '   ';
    if rho < 1/4,
        % rho is bad. shrink trust region radius.
        Delta = 1/4*Delta;
        TRadjust = 'TR-';
    elseif rho > 3/4 & (stop_inner == 2 | stop_inner == 1)
        % rho is good and we went to trust region radius. expand it.
        Delta = min(2*Delta,Delta_bar);
        TRadjust = 'TR+';
    end


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Decide whether to accept or reject
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % accept/reject proposal based on rho
    if rho > rho_prime,
        accepted = 1;
    else
        accepted = 0;
    end


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % If accepted, 
    % * set new iterate
    % * compute properties 
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    if accepted,
        num_accepted = num_accepted + 1;

        % update X with the proposal
        X = Xprop;
        AX = AXprop;             inccntrs(CNT_FLOPS,3*n*p); 
        BX = BXprop;
        xritz = xpritz;

        % compute properties of next iterate
        % f(X)
        fx = sum(xritz);
        % grad f(X)
        gradfx = tangentialize(2*AX,BX);
        % norm grad f(X)
        normgradfx = sqrt(mytrace2(gradfx(:,1:k),gradfx(:,1:k)));

    end % if accepted

    % stop clock for this outer step
    this_time = etime(clock,t0);
    % update total time
    total_time = total_time + this_time;

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Record data  
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % save counter data for this outer step
    curstat.i = i;
    curstat.inner = inccntrs(CNT_INNER,0);
    curstat.MulA  = inccntrs(CNT_A,0);
    curstat.MulB  = inccntrs(CNT_B,0);
    curstat.MulP  = inccntrs(CNT_PREC,0);
    curstat.flops = inccntrs(CNT_FLOPS,0);
    % function information
    d = mydiag2(gradfx,gradfx);
    curstat.fx       = fx;
    curstat.Nres     = sqrt(d/4) ./ abs(xritz);
    curstat.Ngrad    = sqrt(sum(d));
    curstat.Delta    = Delta;
    curstat.rho      = rho;
    curstat.rhonum   = rhonum;
    curstat.rhoden   = rhoden;
    curstat.time     = this_time;
    curstat.accepted = accepted;
    % accuracy of the current iterate
    %   canonical sines
    if ~isempty(Vtest),
        qfx = qf(X);
        curstat.sines = svd(qfx-Vtest*(Vtest'*qfx));
    end
    %   vector of errors of eigenvalues
    if ~isempty(Ltest),
        curstat.verrors = xritz - Ltest;
    end
    stats(end+1) = curstat;


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % verbosity 
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    if verbosity > 0,
        if accepted,
            accstr = 'acc';
        else
            accstr = 'REJ';
        end
        fprintf(fout,['%3s %3s   i: %5.5d     num_inner: %5.5d     ',...
                 '|grad|: %12.6e   %s\n'],...
                accstr,TRadjust,i,num_inner,normgradfx,stop_reason{stop_inner});
    end
    if verbosity > 1,
        fprintf(fout,'Elapsed time is %f seconds.\n',this_time);
        fprintf(fout,'       f(X) : %d         |eta| : %d\n',stats(end).fx,stats(end).EtaNormP);
        dstr = sprintf('     Delta : %f',Delta);
        if neg_rhonum ~= 0,
            fprintf(fout,' neg rhonum : %16.8e%s\n',neg_rhonum,dstr);
        elseif small_rhonum ~= 0,
            fprintf(fout,'tiny rhonum : %16.8e  rho: %16.8e%s\n', ...
                    small_rhonum,small_rho,dstr);
        else
            fprintf(fout,'        rho : %16.13f%s\n',stats(end).rho,dstr);
        end
    end


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Check stopping criteria
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    if isequal(outerstop,'resrel'),
        outererr = 0;
        for j=1:k,
            r = AX(:,j) - BX(:,j)*xritz(j);
            outererr = max(outererr, norm(r)/abs(xritz(j)));
        end
    else
        outererr = normgradfx;
    end
    if outererr < efftol && i >= minouter,
        stop_outer = 1; 
    elseif i >= maxouter,
        fprintf(fout,'\n*** timed out -- i == %d***\n\n',i);
        stop_outer = 1;
    end 

end  % of outer/TR loop (counter: k)

% grab the current leftmost space
Q = X(:,1:k);
vals = xritz(1:k);

% print footer
if (verbosity > 1) || (debug > 0)
    fprintf(fout,['*************************************', ...
             '*************************************\n']);
end
if (verbosity > 0) || (debug > 0)
    fprintf(fout,'Total time is %f\n',total_time);
end

OPS.total_time = total_time;
OPS.stats = stats;

% save the data
if (nargout <= 1)
    varargout{1} = vals;
else
    varargout{1} = Q;
    varargout{2} = diag(vals);
    if (nargout >= 3)
        varargout{3} = OPS;
    end
end

return




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%% trunctated cg solver for inner problem %%%%%%%%%%%%%%%%%%%%%%%%
% solve Hess_x(eta) = -grad
% X contains the ritz vectors of A and has B-orthonormal columns
function [eta,Aeta,Beta,Heta,EtaNormP,num_inner,stop_inner] = ...
                tCG( A,B,Prec,k,        ...     % sys data
                     X,AX,BX,fx,gradfx, ...     % current iterate
                     eta,Delta,         ...     % initial and TR radius
                     kappa,theta,       ...     % stopping params
                     outerstop,tol,     ...     % outer stopping params
                     normgrad0,maxinner,...     % stopping params
                     innerstop,debug,fout);     % inner stop choices

    % all terms involving the trust-region radius will utilize an inner product
    % w.r.t. the preconditioner; this is because the iterates grow in
    % length w.r.t. the preconditioner, guaranteeing that we will not 
    % re-enter the trust-region
    % 
    % the following recurrences for Prec-based norms and inner 
    % products come from CGT2000, pg. 205, first edition
    % below, P is the preconditioner
    % 
    % <eta_k,P*delta_k> = beta_k-1 * ( <eta_k-1,P*delta_k-1> + alpha_k-1 |delta_k-1|^2_P )
    % |delta_k|^2_P = <r_k,z_k> + beta_k-1^2 |delta_k-1|^2_P
    % 
    % therefore, we need to keep track of 
    % 1)   |delta_k|^2_P 
    % 2)   <eta_k,P*delta_k> = <eta_k,delta_k>_P
    % 3)   |eta_k  |^2_P
    % 
    % initial values are given by:
    %    |delta_0|_P = <r,z>
    %    |eta_0|_P   = 0
    %    <eta_0,delta_0>_P = 0
    % because we take eta_0 = 0

    % inner stopping criteria
    STOP_MAX = 1;
    STOP_RTR = 2;
    STOP_OUTER = 4;

    CNT_INNER = 1;
    CNT_A     = 2;
    CNT_B     = 3;
    CNT_PREC  = 4;
    CNT_FLOPS = 5;

    % compute dimension of tangent space
    [n,p] = size(X);

    % dimension of the tangent space
    d = n*p - (p^2+p)/2;

    % in exact arithmetic, linear CG must converge after d steps
    % in practice, we may need more
    maxinner = min(maxinner,d);

    % determine current inner residual: r = H[eta] + gradfx
    if isequal(eta,zeros(size(eta)))
        nonzerostart = 0;
        e_Pe = 0;
        Heta = eta;
        Aeta = eta;
        Beta = eta;
        r = gradfx;
    else
        nonzerostart = 1;
        % eta != 0, ergo preconditioning must be off.
        if ~isempty(Prec),
            error('RTRESGEV: Internal logic error. Randomization not compatible with preconditioning.');
        end
        e_Pe = mytrace2(eta,eta);
        [Heta,Aeta,Beta] = H(A,B,X,AX,BX,eta);
        r = gradfx+Heta;   inccntrs(CNT_FLOPS,n*p); 
    end

    % precondition the search direction
    if ~isempty(Prec),
        z = Prec(X,BX,r);
    else
        z = r;
    end
    % compute z'*r
    z_r = mytrace2(z,r);
    d_Pd = z_r;

    % compute norm of residuals
    rnorm = sqrt(mytrace2(r,r));
    
    % save the original residual norms
    r0norm = rnorm;
    kconv = r0norm*kappa;
    % tconv = r0norm*(r0norm/normgrad0)^theta;
    tconv = r0norm^(theta+1);
    if debug,
        fprintf(fout,' |r0| : %16.8e\n',r0norm);
        fprintf(fout,' kconv : %16.8e\n',kconv);
        fprintf(fout,' tconv : %16.8e\n',tconv);
    end

    % initial search direction = -negative (precon) residual 
    delta = -z;
    if nonzerostart,
        % eta != 0, ergo, preconditioning must be off.
        e_Pd = mytrace2(eta,delta);
    else
        e_Pd = 0;
    end
    inccntrs(CNT_FLOPS,n*p); 

    % assume termination b/c  j == end
    stop_inner = 6;

    if maxinner == 0 && bitand(innerstop,STOP_MAX),
        eta = delta;
        e_Pe = d_Pd;
        if isa(A,'double')
            Aeta = A*eta;       inccntrs(CNT_A,p);
        else
            Aeta = A(eta);      inccntrs(CNT_A,p);
        end
    
        if isempty(B),
            Beta = eta;
        elseif isa(B,'double'),
            Beta = B*eta;       inccntrs(CNT_B,p);
        else
            Beta = B(eta);      inccntrs(CNT_B,p);
        end
    end

    if debug > 0,
        oldm = fx + mytrace2(eta,gradfx) + 0.5*mytrace2(eta,Heta);
    end

    % begin inner/tCG loop
    j = 0;
    while j < maxinner || ~bitand(innerstop,STOP_MAX),
        j=j+1;

        if debug > 0,
            % check that delta are descent directions
            % d'*r < 0
            d_r = mytrace2(delta,r);
            fprintf(fout,' d''*r : %16.8e\n',d_r);
            if d_r > 0,
                fprintf(fout,'*** direction of ASCENT ***\n');
                % keyboard;
            end
        end

        % apply Hessian
        [Hd,Ad,Bd] = H(A,B,X,AX,BX,delta);

        d_Hd = mytrace2(delta,Hd);
        alpha = z_r/d_Hd;

        % <neweta,neweta>_P = <eta,eta>_P + 2*alpha*<eta,delta>_P + alpha*alpha*<delta,delta>_P
        e_Pe_new = e_Pe + 2.0*alpha*e_Pd + alpha*alpha*d_Pd;

        % check curvature and trust-region radius
        if d_Hd <= 0 || e_Pe_new >= Delta^2,
            % go to the edge of the trust region along this line.
            tau = (-e_Pd + sqrt(e_Pd*e_Pd + d_Pd*(Delta^2-e_Pe))) / d_Pd;
            eta  =  eta + tau*delta;
            Aeta = Aeta + tau*Ad;
            Beta = Beta + tau*Bd;
            Heta = Heta + tau*Hd;
            inccntrs(CNT_FLOPS,4*2*n*p); 
            if d_Hd <= 0,
                stop_inner = 1;     % negative curvature
            else
                stop_inner = 2;     % exceeded trust region
            end
            break;
        end

        % no negative curvature and eta_prop inside TR: accept it
        e_Pe = e_Pe_new;
        eta  =  eta+alpha*delta;
        Aeta = Aeta+alpha*Ad;
        Beta = Beta+alpha*Bd;           inccntrs(CNT_FLOPS,4*2*n*p); 
        Heta = Heta+alpha*Hd;

        if debug > 0,
            % check that model has decreased
            newm = fx + mytrace2(eta,gradfx) + 0.5*mytrace2(eta,Heta);
            fprintf(fout,' m_x(eta) : %16.8e\n',newm);
            if newm > oldm,
                fprintf(fout,'*** model has INCREASED ***\n');
                % keyboard;
            end
            oldm = newm;
        end

        if debug > 0,
            fprintf(fout,' m(0) - m(eta): %16.8e\n',-mytrace2(eta,gradfx)-.5*mytrace2(eta,Heta));
        end

        % check norm of gradient of f (outer stopping criteria)
        % if bitand(innerstop,STOP_OUTER),
        %     if isequal(outerstop,'resrel'),
        %         xe =   X(:,1:k)+ eta(:,1:k);
        %         Axe = AX(:,1:k)+Aeta(:,1:k);
        %         Bxe = BX(:,1:k)+Beta(:,1:k);
        %         inccntrs(CNT_FLOPS,3*n*k); 
        %         ritzxe = mydiag2(xe,Axe) ./ mydiag2(xe,Bxe);
        %         inccntrs(CNT_FLOPS,4*n*k); 
        %         Rxe = Axe-Bxe*spdiags(ritzxe,0,k,k);
        %         inccntrs(CNT_FLOPS,2*n*k); 
        %         maxres = 0;
        %         for ii=1:k,
        %             maxres = max(maxres,norm(Rxe(:,ii))/abs(ritzxe(ii)));
        %         end
        %         inccntrs(CNT_FLOPS,2*n*k); 
        %         if maxres < .5*tol,
        %             stop_inner = 5;
        %             break;
        %         end
        %     else
        %         % compute retracted X+eta
        %         % assume norm of gradient under subspace acceleration to be smaller 
        %         % than norm of gradient(X+eta)
        %         Axe = AX(:,1:k)+Aeta(:,1:k);
        %         Bxe = BX(:,1:k)+Beta(:,1:k);
        %         inccntrs(CNT_FLOPS,2*n*k); 
        %         GF  = tangentialize(2*Axe,Bxe);
        %         % grad f(X+eta) = 2 P_{B(X+eta)} A(X+eta)
        %         % norm grad f(X+eta) = sqrt trace( inv((X+eta)'*B*(X+eta)) grad f(X+eta)' grad f(X+eta) )
        %         xeBxe = mysym((X+eta)'*Bxe);
        %         xeR = chol(xeBxe);
        %         ngf = sqrt(trace( xeR \ ( xeR' \ (GF'*GF) ) ));
        %         inccntrs(CNT_FLOPS,4*n*p^2+n*p);
        %         % add a small fudge factor
        %         if (ngf < .5*tol),    
        %             stop_inner = 5;              
        %             break;
        %         end
        %     end
        % end

        % update the residual
        r = r + alpha*Hd;               inccntrs(CNT_FLOPS,2*n*p); 
        % re-tangentialize r
        r = tangentialize(r,BX);

        % compute norm
        oldrnorm = rnorm;
        rnorm = sqrt(mytrace2(r,r));

        % check stopping criteria
        if bitand(innerstop,STOP_RTR),
            if rnorm <= min(tconv,kconv),
                if tconv < kconv,
                    stop_inner = 4;
                else
                    stop_inner = 3;
                end
                break;
            end
        end

        % apply preconditioner 
        if ~isempty(Prec),
            % precondition the search direction
            z = Prec(X,BX,r);
        else
            z = r;
        end

        % save the old z'*r
        zold_rold = z_r;
        % compute new z'*r
        z_r = mytrace2(z,r);
        % compute new search direction
        beta = z_r/zold_rold;
        delta = -z + delta*beta;            inccntrs(CNT_FLOPS,2*n*p); 
        % update new P-norms and P-dots
        e_Pd = beta*(e_Pd + alpha*d_Pd);
        d_Pd = z_r + beta*beta*d_Pd;

    end % end of tCG loop
    EtaNormP = sqrt(e_Pe);
    num_inner = min(j,maxinner);
    return;



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%% Hessian of f %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% apply Hess of f at X to eta
function [He,Ae,Be] = H(A,B,X,AX,BX,eta);
    
    CNT_INNER = 1;
    CNT_A     = 2;
    CNT_B     = 3;
    CNT_PREC  = 4;
    CNT_FLOPS = 5;
    [n,p] = size(eta);
    
    % apply Hessian: two choices
    % exact - H_x(eta) = 2 * PBx_Bx * [A*eta - B*eta*(X'*A*X)]
    %    TM - H_x(eta) =     PBx_Bx * A*eta
    if isa(A,'double')
        Ae = A*eta;                       inccntrs(CNT_A,p);
    else
        Ae = A(eta);                      inccntrs(CNT_A,p);
    end
    
    if isempty(B),
        Be = eta;
    elseif isa(B,'double'),
        Be = B*eta;                       inccntrs(CNT_B,p);
    else
        Be = B(eta);                      inccntrs(CNT_B,p);
    end
    
    % intermediate value
    He = Ae - Be*spdiags(mydiag2(X,AX),0,p,p);    inccntrs(CNT_FLOPS,2*n*p);
    % apply projector P_Bx,BX
    He = tangentialize(2*He,BX);
    return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%% Compute the first k ritz vectors of X %%%%%%%%%%%%%%%%%%%%%%%%
function [rv,rl,r] = ritz_svd(VAV,VBV,k);
% compute the first k ritz values and primitize ritz vectors 
% of A,B with respect to V 
% use the SVD of VBV to enforce the VBV-orthogonality of the resulting
% primitive ritz vectors

    if nargin < 3,
        k = size(VAV,2);
    end

    % make VAV and VBV symmetric in memory using upper half
    VAV = triu(VAV) + triu(VAV,1)';
    VBV = triu(VBV) + triu(VBV,1)';

    % store original matrix and rank
    r = size(VAV,1);
    oVAV = VAV;
    or = r;
    [U,S,dummy] = svd(VBV);
    S = diag(S);
    % convert to standard symmetric eigenproblem
    sVAV = diag(1./sqrt(S)) * (U' * oVAV * U) * diag(1./sqrt(S));
    sVAV = triu(sVAV) + triu(sVAV,1)';
    % solve standard eigenproblem
    [rv,rl] = eig(sVAV);
    % sort vectors,values and select k smallest
    rl = diag(rl);
    [rl,y] = sort(rl,'ascend');
    rl = rl(1:k);
    rv = rv(:,y(1:k));
    % convert primitive ritz vectors to generalized eigenvectors
    rv = U*diag(1./sqrt(S))*rv;

    % check the VBV-orthonormality of rv
    orthloss = max(max(abs( rv'*VBV*rv - eye(k) )));
    while orthloss > 1e-10,
        % loss of VBV-orthonormality of rv (i.e., B-orthonormality of V*rv)
        % is too high
        r = r-1;
        if r < k,
            error(['In ritz_svd(): not enough primitive ritz vectors '...
                   'to continue algorithm.']);
        end
        % shrink size of basis
        U = U(:,1:r);
        S = S(1:r);
        % convert to standard symmetric eigenproblem
        sVAV = diag(1./sqrt(S)) * (U' * oVAV * U) * diag(1./sqrt(S));
        sVAV = triu(sVAV) + triu(sVAV,1)';
        % solve standard eigenproblem
        [rv,rl] = eig(sVAV);
        % sort vectors,values and select k smallest
        rl = diag(rl);
        [rl,y] = sort(rl,'ascend');
        rl = rl(1:k);
        rv = rv(:,y(1:k));
        % convert primitive ritz vectors to generalized eigenvectors
        rv = U*diag(1./sqrt(S))*rv;
       
        % compute loss of orthogonality
        orthloss = max(max(abs( rv'*VBV*rv - eye(k) )));
    end
return;



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%% Compute the first p ritz vectors of X %%%%%%%%%%%%%%%%%%%%%%%%
function [rv,rl,r] = ritz(VAV,VBV,p);
% compute the first p ritz values and primitize ritz vectors 
% of A,B with respect to V 
    
% NOTE: this code was inspired by the directSolver.m routine
% authored by U. Hetmaniuk and R. Lehoucq, based on research
% described in the report:
% " A comparison of algorithms for modal analysis in the absence 
%   of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
%  Sandia National Laboratories, Technical report SAND2003-1028J.
    
    if nargin < 3,
        p = size(VAV,2);
    end
    
    % make VAV and VBV symmetric in memory
    VAV = triu(VAV) + triu(VAV,1)';
    VBV = triu(VBV) + triu(VBV,1)';
    
    [rv,rl] = eig(VAV,VBV);
    rl = diag(rl);
    [rl,y] = sort(rl,'ascend');
    rv = rv(:,y);
    
    r = size(VAV,1);
    or = r;
    
    % check the B-orthonormality of rv
    orthloss = max(max(abs( rv'*VBV*rv - eye(r) )));
    while orthloss > 1e-12,
        r = r-1;
        if r < p,
            error(['In ritz(): not enough primitive ritz vectors '...
                   'to continue algorithm.']);
        end
        VAV = VAV(1:r,1:r);
        VBV = VBV(1:r,1:r);
        [rv,rl] = eig(VAV,VBV);
        rl = diag(rl);
        [rl,y] = sort(rl,'ascend');
        rv = rv(:,y);
        orthloss = max(max(abs( rv'*VBV*rv - eye(r) )));
    end
    
    % grab the first p primitive ritz vectors
    % pad to original number of rows
    rv = [ rv(:,1:p) ; zeros(or-r,p) ];
    rl = rl(1:p);
return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%  Tangentialize  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function z = tangentialize(y,BX);
% 
% Orthogonalize y against the space generated by X, in the B-inner product
%
%  z = [I - X*inv(X'*B*B*X)*X'*B] * y
    CNT_FLOPS = 5;
    [n,p] = size(y);
    
    den = BX'*BX;                   inccntrs(CNT_FLOPS,n*p^2); % den is symmetric
    R = chol(den);
    tmp = R\(R'\(BX'*y));           inccntrs(CNT_FLOPS,2*n*p^2);
    z = y - BX*tmp;                 inccntrs(CNT_FLOPS,2*n*p^2+n*p);
    return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  dbgprint_1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function dbgprint_1(A,B,X,AX,BX,eta,Heta,Aeta,Beta,maxinner,fout);
    p = size(X,2);
    % A*eta == Aeta
    if isa(A,'double'),
        err = norm(Aeta- A*eta,'fro') / norm(A*eta,'fro');
    else
        err = norm(Aeta-A(eta),'fro') / norm(A(eta),'fro');
    end
    fprintf(fout,' DBG |A eta - Aeta|: %16.8e (A*eta cache accuracy)\n',err);
    if isempty(B),
        % eta == Beta
        err = norm(Beta-eta,'fro') / norm(eta,'fro');
    elseif isa(B,'double'),
        % B*eta == Beta
        err = norm(Beta-B*eta,'fro') / norm(B*eta,'fro');
    else
        % B*eta == Beta
        err = norm(Beta-B(eta),'fro') / norm(B(eta),'fro');
    end
    fprintf(fout,' DBG |B eta - Beta|: %16.8e (B*eta cache accuracy)\n',err);
    % X on manifold
    err = norm(X'*BX - eye(p),'fro');
    fprintf(fout,' DBG  |X^T B X - I|: %16.8e (X on manifold)\n',err);
    % eta \in T_x M
    err = norm(X'*Beta,'fro');
    fprintf(fout,' DBG  | X^T B eta |: %16.8e (TR solution tangentiality)\n',err);
    if maxinner > 0,
        % H*eta == Heta
        err = norm(Heta- H(A,B,X,AX,BX,eta),'fro') / norm(H(A,B,X,AX,BX,eta),'fro');
        fprintf(fout,' DBG |H eta - Heta|: %16.8e (H*eta cache accuracy)\n',err);
    end
    return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   mytrace2   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function t = mytrace2(X,Y)
    [n,p] = size(X);
    t = 0;
    for j=1:p,
        t = t + X(:,j)'*Y(:,j);
    end
    CNT_FLOPS = 5; inccntrs(CNT_FLOPS,2*n*p);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   mydiag2    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function d = mydiag2(X,Y)
    [n,p] = size(X);
    d = zeros(p,1);
    for j=1:p,
        d(j) = X(:,j)'*Y(:,j);
    end
    CNT_FLOPS = 5; inccntrs(CNT_FLOPS,2*n*p);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%  inccntrs  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function c = inccntrs(cntr,inc)
   persistent cntrs;
   if isempty(cntrs),
      cntrs = zeros(cntr,1);
   end
   cntrs(cntr) = cntrs(cntr) + inc;
   c = cntrs(cntr);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  mysym %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function S = mysym(B)
    S = triu(B) + triu(B,1)';


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  get_args   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [A,B,n,k,opts] = get_args(varargin);
% Process inputs and do error-checking 
    
    if isa(varargin{1},'double')
        A = varargin{1};
        Amatrix = 1;
    else
        A = fcnchk(varargin{1});
        Amatrix = 0;
    end

    if Amatrix
        [m,n] = size(A);
        if (m ~= n) || ~isequal(A,A'),
            error(['A must be a symmetric matrix or'...
                   ' a function which computes A*X.'])
        end
        clear m;
    else
        nstr = 'Size of problem, ''n'', must be a positive integer.';
        if nargin < 2,
            error(nstr);
        end
        n = varargin{2};
        if ~isequal(size(n),[1,1]) | ~isreal(n)
            error(nstr)
        end
        if (round(n) ~= n)
            warning('RTRESGEV:NonIntegerSize',['%s\n         ' ...
                'Rounding input size.'],nstr)
            n = round(n);
        end
        if issparse(n)
            n = full(n);
        end
        clear nstr
    end

    Bnotthere = 0;
    Bmatrix = 0;
    Bstr = sprintf(['Generalized matrix B must be the same size as A and' ...
                    ' a symmetric positive (semi-)definite matrix or a function which computes B*X.']);
    if (nargin < (3-Amatrix-Bnotthere))
        B = [];
        Bnotthere = 1;
    else
        Bk = varargin{3-Amatrix-Bnotthere};
        if isempty(Bk) % allow rtresgev(A,[],k,sigma,opts);
            B = Bk;
        else
            if isa(Bk,'double'),
                % could be B matrix or K
                if isequal(size(Bk),[1,1]) & (n ~= 1)
                    % it's K
                    B = [];
                    k = Bk;
                    Bnotthere = 1;
                else % rtresgev(9,8,...) assumes A=9, B=8, ... NOT A=9, k=8, ...
                    % it's B matrix
                    B = Bk;
                    if ~isequal(size(B),[n,n]) || ~isequal(B,B'),
                        error(Bstr)
                    else
                        Bmatrix = 1;
                    end
                end
            elseif Amatrix,
                % is B function, which reqires A function
                error(['B must be specified as a matrix when A is specified as a matrix.'])
            else
                % is B function, and everything is okay
                B = fcnchk(Bk);
                Bmatrix = 0;
            end
        end
    end
    clear Bstr

    % Amatrix implies args are
    % RTRESGEV(A,K,OPTS)        or
    % RTRESGEV(A,B,K,OPTS)
    if Amatrix & ((nargin - ~Bnotthere)>3)
        error('Too many inputs.')
    end

    if (nargin < (4-Amatrix-Bnotthere))
        % RTRESGEV(A,B)
        % RTRESGEV(AFUN,N,OPTS)
        % RTRESGEV(AFUN,N,BFUN,OPTS)
        k = min(n,6);
    else
        % RTRESGEV(A,K)
        % RTRESGEV(A,B,K)
        % RTRESGEV(AFUN,N,K,OPTS)
        % RTRESGEV(AFUN,N,BFUN,K,OPTS)
        k = varargin{4-Amatrix-Bnotthere};
    end
    
    kstr = ['Number of eigenvalues requested, k, must be a' ...
            ' positive integer <= n.'];
    if ~isa(k,'double') | ~isequal(size(k),[1,1]) | ~isreal(k) | (k>n)
        error(kstr)
    end
    if issparse(k)
        k = full(k);
    end
    if (round(k) ~= k)
        warning('RTRESGEV:NonIntegerEigQty',['%s\n         ' ...
                'Rounding number of eigenvalues.'],kstr)
        k = round(k);
    end

    if (nargin >= (5-Amatrix-Bnotthere))
        opts = varargin{5-Amatrix-Bnotthere};
        if ~isa(opts,'struct')
            error('Options argument must be a structure.')
        end
        if (nargin > (5-Amatrix-Bnotthere))
            error('Too many inputs.')
        end
    else
        % create an empty struct to return
        opts = struct();
    end
    return; 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  get_string %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [ret,opts] = get_string(opts,argname,argdesc,def,options);
    % Process inputs and do error-checking 
    errstr = sprintf('%s opts.%s must be: \n',argdesc,argname);
    errstr = [errstr, sprintf('%s  ',options{:})];
    if isfield(opts,argname)
        ret = getfield(opts,argname);
        valid = 0;
        if isstr(ret),
            for j = 1:length(options),
                if isequal(ret,options{j}),
                    valid = 1;
                    break;
                end
            end
        end
        if ~valid,
            error(errstr);
        end
        
        % remove field from opts
        opts = rmfield(opts,argname);
    else
        ret = def;
    end
    return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  get_int %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [ret,opts] = get_int(opts,argname,argdesc,def,lb,ub);
    if nargin < 6
        ub = inf;
        if nargin < 5,
            lb = -inf;
        end
    end
    % Process inputs and do error-checking 
    errstr = sprintf('%s opts.%s must be an integer in [%d,%d]',...
                     argdesc,argname,lb,ub);
    if isfield(opts,argname)
        ret = getfield(opts,argname);
        valid = 0;
        % check that it is an int
        if isnumeric(ret),
            ret = floor(ret);
            % check size (1 by 1) and bounds
            if isequal(size(ret),[1 1]) && lb <= ret && ret <= ub,
                valid = 1;
            end
        end
        if ~valid,
            error(errstr);
        end
        % remove field from opts
        opts = rmfield(opts,argname);
    else
        ret = def;
    end
    return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  get_handle %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [ret,opts] = get_handle(opts,argname,argdesc,def);
    if nargin < 4
        def = [];
    end
    % Process inputs and do error-checking 
    errstr = sprintf('%s opts.%s must be a function handle.',...
                     argdesc,argname);
    if isfield(opts,argname)
        ret = getfield(opts,argname);
        if ~isempty(ret),
            % check that it is a function handle
            ret = fcnchk(ret);
        end
        % remove field from opts
        opts = rmfield(opts,argname);
    else
        ret = def;
    end
    return;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  get_float %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [ret,opts] = get_float(opts,argname,argdesc,def,lb,ub,sz);
% only test bounds if sz == [1 1]
    if nargin < 7,
        sz = [];
    end
    if nargin < 6
        ub = inf;
    end
    if nargin < 5,
        lb = -inf;
    end
    % Process inputs and do error-checking 
    if isequal(sz,[1 1]),
        errstr = sprintf('%s opts.%s must be a scalar in [%d,%d]',...
                         argdesc,argname,lb,ub);
    elseif ~isempty(sz),
        errstr = sprintf('%s opts.%s must be an array of dimension %d by %d',...
                         argdesc,argname,sz(1),sz(2));
    
    else
        errstr = sprintf('%s opts.%s must be a numeric array.\n',...
                         argdesc,argname);
    end
    if isfield(opts,argname)
        ret = getfield(opts,argname);
        valid = 0;
        % check that it is an int
        if isnumeric(ret),
            ret = double(ret);
            % no size request, no checks at all
            if isempty(sz),
                valid = 1;
            % if scalar requested, perform bounds check
            elseif isequal(sz,[1 1]),
                if isequal(sz,size(ret)) && lb <= ret && ret <= ub,
                    valid = 1;
                end
            % if matrix requested, just check size
            elseif isequal(sz,size(ret)),
                valid = 1;
            end
        end
        if ~valid,
            error(errstr);
        end
        % remove field from opts
        opts = rmfield(opts,argname);
    else
        ret = def;
    end
    return;
