function [V, B_new, fval, VCandidates] = Func_InAnn_NestedOpt(X, Xt, V_init, B_init, zeta, rho, optsB_DA, optsV, fHandle_GLM_res, ZeroOut)
%==========================================================================
% This function implements a nested algorithm for updating B,
% where the gradient computation is based on the current optimal V.
%------------------------ Input Variables ---------------------------------
% X               - design matrix
% Xt              - transpose of X
% initV           - c of unknown parameter V
% Bini            - Provide the starting value of unknown parameter B
% zeta            - annealing parameter for the heating oprocess (inverse cooling), which controls
%                   the steepness of the function in approximating the sign function 
% rho             - the stepsize
% optsB_DA        - Options for optimizing over the fit B using Directional Ascent
% optsV           - Options for optimizing over V
% fHandle_GLM_res - functional handle of the Funct_GLMResidual_Gradient.m
% ZeroOut         - field mat: a binary matrix of the same size of B. 0 means the position should be fixed at 0
%------------------------ Output Variables ---------------------------------
% V               - solution of V
% B 	          - solution of the fit B
% depth           - approximated depth at convergence
%==========================================================================
debug = 0; % 0;%1; %

p = size(X, 2); m = size(B_init, 2);
if ~exist('ZeroOut', 'var') 
    ZeroOut = [];
end
ZeroOutEffective = exist('ZeroOut', 'var') && ~isempty(ZeroOut);

V = V_init;

if  ZeroOutEffective 
    B_init = B_init .* ZeroOut.mat; % zero some entries of B according to the zeros of ZeroOut.mat
    V = V_init .* ZeroOut.mat;
end

B_old = B_init;
        
optsV_withinB = optsV; 
optsV_withinB.numOfInitValues = 5; % -1; % optsV.numOfInitValues;  % Actually 3 seems enough for most cases
optsV_BLinesearch = optsV; 
optsV_BLinesearch.numOfInitValues = 4; % -1;   % optsV.numOfInitValues;   % 
optsV_afterB = optsV; 
optsV_afterB.numOfInitValues = 1; % 0; % optsV.numOfInitValues; 
% -1: do not do any optimization; calcuate the associated function value (using the current V)
% 0:  use the current V as the initial to do optimization
% >0: use multiple initial Vs generated by random sampling (plus V if it's not empty) to do optimization

if ZeroOutEffective && isempty(ZeroOut.BMethod)
    warning('No method specified on how to specify the zeros in B')
    ZeroOut.BMethod = 'penalized'; %  % 'constrained'; % 
    ZeroOut.lambda_B = 1e+8;
end

for i = 1:optsB_DA.maxiter
    R =  fHandle_GLM_res(X, B_old);
    if i == 1
        [~, grad_B0] = Funct_ObjGrad_B(X, Xt, V, B_old, R, zeta, fHandle_GLM_res, ZeroOut);
    end
    
    if debug >= 2
        [tmp_depth_polishedzeta, tmp_rawfval] = Funct_01DepthVal_zeta(X, R, V, zeta);
        tmp_depth01 = Funct_01DepthVal(X, R, V); 
            % In calling the two functions, NO penalty considered in zeroout. But since our estimates always obey the prescribed zeros, it still gives the right function value
        fprintf('current iter=%d, B+V- function value is %.2f, polished func val %.2f. 01 depth value: %.2f\n', ...
            i, tmp_rawfval, tmp_depth_polishedzeta, tmp_depth01)
    end
    
%     [V, ~, ~] = Func_ManOpt(X, Xt, V, R, zeta, optsV);
    [fval, V, VCandidates] = Func_InAnn_VOpt(X, Xt, V, R, zeta, optsV_withinB, ZeroOut);
    
    if debug >= 2
        [tmp_depth_polishedzeta, tmp_rawfval] = Funct_01DepthVal_zeta(X, R, V, zeta);
        tmp_depth01 = Funct_01DepthVal(X, R, V); 
            % In calling the two functions, NO penalty considered in zeroout. But since our estimates always obey the prescribed zeros, it still gives the right function value       
        fprintf('current iter=%d, V+B- function value is %.2f, polished func val %.2f. 01 depth value: %.2f\n', ...
            i, tmp_rawfval, tmp_depth_polishedzeta, tmp_depth01)        
        % tmp_rawfval is just fval
    end
    
    if ~ZeroOutEffective
        [fval, grad_B] = Funct_ObjGrad_B(X, Xt, V, B_old, R, zeta, fHandle_GLM_res); % no optimization, function value is the same as fval before
        fHandle_tmp = @(B) Func_negative_ObjGivenB(B, X, Xt, V, R, zeta, optsV_BLinesearch, fHandle_GLM_res);
            % Notice the negative sign changes the optimization to a minimization one
        [stepsize, B_new, ~, delta_B] = Armijo_linesearch(fHandle_tmp, grad_B, B_old, -fval, -norm(grad_B, 'fro')^2, 2*1e-3 + 8*1e-3 * (p > 4 && m >2), 5, 0.1, 1e-6); % -(-grad_B) is the search direction in this minimization problem
                                                                                                                     %1e-4 for realdata, 4*1e-3 for simulation 10*1e-3
    %     stepsize = 1/(rho*sqrt(i)); % 1 /sqrt(i); %     

        
        relGrad = max(max(abs(grad_B))) / max(max(abs(grad_B0)));
        B_error = max(max(abs(delta_B)));
    elseif strcmpi(ZeroOut.BMethod, 'penalized')
        % Way 1: penalized approach: 
        [fval, grad_B] = Funct_ObjGrad_B(X, Xt, V, B_old, R, zeta, fHandle_GLM_res, ZeroOut); % no optimization, function value is the same as fval before
        fHandle_tmp = @(B) Func_negative_ObjGivenB(B, X, Xt, V, R, zeta, optsV_BLinesearch, fHandle_GLM_res, ZeroOut);
            % Notice the negative sign changes the optimization to a minimization one
        [stepsize, B_new, ~, delta_B] = Armijo_linesearch(fHandle_tmp, grad_B, B_old, -fval, -norm(grad_B, 'fro')^2, 3* 1e-6, 5, 0.01, 1e-6); %, ZeroOut); % -(-grad_B) is the search direction in this minimization problem
    %     stepsize = 1/(rho*sqrt(i)); % 1 /sqrt(i);                                                                 % 1e-6   1e-4   2*1e-4                                                                                             
              
        
        if ZeroOutEffective
            B_new = B_new .* ZeroOut.mat; % zero B according to the zeros of ZeroOut.mat
        end       
        
        delta_B = B_new - B_old;
        
        relGrad = max(max(abs(grad_B))) / max(max(abs(grad_B0)));
        B_error = max(max(abs(delta_B)));

%     elseif strcmpi(ZeroOut.BMethod, 'constrained')
% 
%         % Way 2: constrained approach
%         [fval, grad_B] = Funct_ObjGrad_B(X, Xt, V, B_old, R, zeta, fHandle_GLM_res, ZeroOut); %[]); 
%         fHandle_tmp = @(B) Func_negative_ObjGivenB(B, X, Xt, V, R, zeta, optsV_BLinesearch, fHandle_GLM_res, ZeroOut); % []);
%             % Notice the negative sign changes the optimization to a minimization one
%         [stepsize, B_new, ~, delta_B] = Armijo_linesearch(fHandle_tmp, grad_B, B_old, -fval, -norm(grad_B, 'fro')^2, 1e-3, 5, 0.1, 1e-6, ZeroOut); % -(-grad_B) is the search direction in this minimization problem
%         relGrad = max(max(abs(delta_B/stepsize)))/max(max(abs(grad_B0)));
%         B_error = max(max(abs(delta_B)));
    else
        error('To be implemented')
    end    
%     if debug >= 1, fprintf('iteration step %d (maxIter %d):   delta_B is %.3f, function value is %.3f. \n', i, optsB_DA.maxiter, norm(delta_B,'fro'), fval), end

    
    if B_error < optsB_DA.tolerror || relGrad < optsB_DA.tolgradnorm
        break
    end
    
    B_old = B_new;
    
end

R =  fHandle_GLM_res(X, B_new);

if optsV_afterB.numOfInitValues == -1 % No further optimization. just use the current V
    [fval] = Funct_ObjGradHessian_V(X, [], V, R, zeta); %% the second output of  Funct_01DepthVal_zeta(X, R, V, zeta) 
else % Perform another optimization over V
    [fval, V] = Func_InAnn_VOpt(X, Xt, V, R, zeta, optsV_afterB, ZeroOut); % we can use subsampling to re-compute the depth and the associated V (given B)
end
end



function [fval, VOpt, Vres] = Func_InAnn_VOpt(X, Xt, V, R, zeta, optsV, ZeroOut)
%==========================================================================
% This function computes the data depth given B using a phi with parameter
% zeta.  It is used in the deepest fit estimation.
%------------------------ Input Variables ---------------------------------
% X               - design matrix
% Xt              - transpose of X
% R               - residual matrix
% zeta            - steepness for the apporximation of the sign function
% optsV           - options for V optimization

%------------------------ Output Variables ---------------------------------
% fval            - functiona value calculated based on the phi function
%                   with parameter zeta
%==========================================================================
if ~exist('ZeroOut', 'var')
    ZeroOut = [];
end
NumInits = optsV.numOfInitValues;
[n, p] = size(X); m = size(R, 2);
if NumInits == 0 && isempty(V)
    warning('Wrong input')
    NumInits = 1;
end

if NumInits > 0 
    if isempty(V)
        Vres = zeros(p, m, NumInits);
        [Vs_init] = Funct_InitVs(Xt, R, NumInits, ZeroOut);
    else % add V in addition to the sampled Vs
        Vres = zeros(p, m, NumInits+1);
        Vs_init(:, :, 1:NumInits) = Funct_InitVs(Xt, R, NumInits, ZeroOut);
        Vs_init(:, :, NumInits + 1) = V;
        NumInits = NumInits + 1;     
    end
    fres = zeros(NumInits, 1);
    for j = 1:NumInits
        V_init = Vs_init(:,:,j);
        [Vres(:,:,j), fres(j), ~] = Func_ManOpt(X, Xt, V_init, R, zeta, optsV, ZeroOut);
    end
    optInd = find(fres == min(fres), 1);   % Take the index corresponding to the smallest function value
    VOpt = Vres(:,:,optInd);         
    fval = fres(optInd);
else % in this case, we use the V as the (single) initial value
    [VOpt, fval, ~] = Func_ManOpt(X, Xt, V, R, zeta, optsV, ZeroOut);
end

end

function [fval, g] = Func_negative_ObjGivenB(B, X, Xt, V, R, zeta, optsV_BLinesearch, fHandle_GLM_res, ZeroOut)
if ~exist('ZeroOut', 'var')
    ZeroOut = [];
end
R =  fHandle_GLM_res(X, B);
if optsV_BLinesearch.numOfInitValues == -1 % no optimzation; use the current V
    
%     if exist('ZeroOut', 'var') && strcmpi(ZeroOut.BMethod, 'penalized') && ~isempty(ZeroOut.lambda_B)
%         [fval] = Funct_ObjGradHessian_V(X, [], V, R, zeta, ZeroOut); 
%         fval = fval + ZeroOut.lambda_B * norm(B .* (~ZeroOut.mat), 'fro')^ 2 / 2;
%     else
%         [fval] = Funct_ObjGradHessian_V(X, [], V, R, zeta);
%     end
    if nargout == 1
        [fval] = Funct_ObjGrad_B(X, Xt, V, B, R, zeta, fHandle_GLM_res, ZeroOut); % equivalent to the above
        fval = -fval;
    else
        [fval, g] = Funct_ObjGrad_B(X, Xt, V, B, R, zeta, fHandle_GLM_res, ZeroOut);
        fval = -fval;
        g = -g;
    end
else
%     if exist('ZeroOut', 'var') && strcmpi(ZeroOut.BMethod, 'penalized') && ~isempty(ZeroOut.lambda_B)
%         [fval, V] = Func_InAnn_VOpt(X, Xt, V, R, zeta, optsV_BLinesearch, ZeroOut);
% %         fval = fval + ZeroOut.lambda_B * norm(B .* (~ZeroOut.mat), 'fro')^ 2 / 2;
%     else
%         [fval] = Func_InAnn_VOpt(X, Xt, V, R, zeta, optsV_BLinesearch);
%     end
    if nargout == 1
        [~, V] = Func_InAnn_VOpt(X, Xt, V, R, zeta, optsV_BLinesearch, ZeroOut);
        [fval] = Funct_ObjGrad_B(X, Xt, V, B, R, zeta, fHandle_GLM_res, ZeroOut);
        fval = -fval;
    else
        [~, V] = Func_InAnn_VOpt(X, Xt, V, R, zeta, optsV_BLinesearch, ZeroOut);
        [fval, g] = Funct_ObjGrad_B(X, Xt, V, B, R, zeta, fHandle_GLM_res, ZeroOut);
        fval = -fval;
        g = -g;
    end
end
end
