%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% optimize_probitsem_pairwiseentropy_bound:
%
% Choose a subset of variables using a different approximation for H(Y) by
% pairwise entropies that will form a tree-structure.
%
% If a ILP solution is required, a Gurobi/MEX call can be done. Please
% check:
%
% - http://www.convexoptimization.com/wikimization/index.php/Gurobi_Mex:_A_MATLAB_interface_for_Gurobi
% - http://www.gurobi.com/
%
% Input:
%
% - L, S: parameters of probit model
% - sel_K_start: initialization vector
%
% Output:
%
% - sel_K: the selection of K variables
% - optim_found: returns TRUE if optimizer is guaranteed to have found the
%                optimal
%
% Created by: Ricardo Silva, London, 22/05/2011
% University College London
% Current version: 22/05/2011

function [sel_K optim_found] = optimize_probitsem_pairwise_entropy_bound(L, S, sel_K_start)

% Preliminaries %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

[num_y num_x] = size(L); num_x = num_x - 1;

% Calculate expectation of log terms %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

W = zeros(num_y, 1);
M = 100000;

x = chol(S)' * randn(num_x, M);

for y = 1:num_y
  m = L(y, 1:num_x) * x + L(y, end);
  py0 = normcdf(-m); py1 = 1 - py0; 
  log_py1 = log(py1); log_py1(py1 == 0) = 0;
  log_py0 = log(py0); log_py0(py0 == 0) = 0;
  W(y) = mean(py1 .* log_py1 + py0 .* log_py0);
end

clear('x');

% Get entropy terms %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

fprintf('Caching entropy information...\n');
[entropy_order cond_entropy] = optimize_pairwise_entropy_order(L, S);
entropy_sets = get_pairwise_entropy_sets(entropy_order, cond_entropy);

% Initialization %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

sel_K = sel_K_start;
z = zeros(1, num_y);
z(sel_K) = 1;

fprintf('Initial score = %f\n', sum(W(sel_K)) + ...
        get_pairwise_entropy_score(z(1:num_y), entropy_sets));
    
% Linear integer programming formulation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

num_extra_vars = num_y; % Negation of original variables
for y = 1:num_y
  % Product variables
  num_extra_vars = num_extra_vars + length(entropy_sets{y}.parents) + 1;
end
num_vars = num_y + num_extra_vars;
z_start = zeros(1, num_vars); z_start(1:num_y) = z;    

% Objective function %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

ilp_w = zeros(num_vars, 1);
ilp_w(1:num_y) = W;
col_pos = 2 * num_y + 1;
for y = 1:num_y
  num_p = length(entropy_sets{y}.parents);
  [~, idx_w] = sort(entropy_sets{y}.values(1:num_p), 'ascend');
  ilp_w(col_pos:(col_pos + num_p)) = [entropy_sets{y}.values(idx_w)
                                      entropy_sets{y}.values(end)];
  col_pos = col_pos + num_p + 1;
end

% Equality constraints %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Aeq = sparse(1 + num_y, num_vars); beq = zeros(1 + num_y, 1);
Aeq(1, 1:num_y) = 1; beq(1) = sum(z); % Sum to K
for y = 1:num_y
  % Relation to negated variables: I'm assuming Gurobi (or whatever library
  % is being used) is smart enough to preprocess these variables away. I'm
  % just making them explicitly to make the rest of the code easier to read
  Aeq(1 + y, [y (y + num_y)]) = [1 1]; beq(1 + y) = 1; %#ok<SPRIX>
  z_start(y + num_y) = 1 - z_start(y);
end

% Inequality constraints %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

num_constraints = 0; num_A_entries = 0; num_b_entries = 0;
for y = 1:num_y
  num_p = length(entropy_sets{y}.parents);
  for p = 1:(num_p + 1)    
    v_set_size = p + (p <= num_p);
    num_constraints = num_constraints + v_set_size + 1;
    num_A_entries = num_A_entries + 3 * v_set_size + 1;    
  end
  num_b_entries = num_b_entries + num_p + 1;
end

%A = sparse(num_constraints, num_vars);
%b = zeros(num_constraints, 1);
row_pos = 1; col_pos = 2 * num_y + 1;

A_idx = zeros(num_A_entries, 3); b_idx = zeros(num_b_entries, 2);
A_pos = 1; b_pos = 1;

for y = 1:num_y
    
  %fprintf('Adding constraints for %d out of %d\n', y, num_y);
  num_p = length(entropy_sets{y}.parents);
  [~, pre_idx_p] = sort(entropy_sets{y}.values(1:num_p), 'ascend');  
  idx_p = entropy_sets{y}.parents(pre_idx_p);
  
  for i = 1:(num_p + 1)

    % Select the relevant variables corresponding to this extra variable    
    
    if i <= num_p
      v_set = [y idx_p(i) (num_y + idx_p(1:(i - 1)))];
    else
      v_set = [y (num_y + idx_p(1:(i - 1)))];  
    end
    
    % Enter constraints z(col_pos) == 1 --> z(v) == 1, where 
    % "v" is in the set of original/negated variables defining this
    % z(col_pos).
    %
    % (That is, z(col_pos) - z(v) <= 0) for all relevant v
    
    for v = v_set
      %A(row_pos, [col_pos v]) = [1 -1]; 
      A_idx(A_pos, :) = [row_pos col_pos  1]; A_pos = A_pos + 1;
      A_idx(A_pos, :) = [row_pos       v -1]; A_pos = A_pos + 1;
      row_pos = row_pos + 1; 
    end    
    
    % Enter constraint z(col_pos) == 0 --> at least one z(v) is 0.
    %
    % (That is, sum (1 - z(v)) >= 1 - extra_z(col_pos))

    %A(row_pos, [v_set col_pos]) = [ones(1, length(v_set)) -1];
    %b(row_pos) = length(v_set) - 1;    
    for v = v_set      
      A_idx(A_pos, :) = [row_pos v 1]; A_pos = A_pos + 1;
    end    
    A_idx(A_pos, :) = [row_pos col_pos -1]; A_pos = A_pos + 1;    
    b_idx(b_pos, :) = [row_pos length(v_set) - 1]; b_pos = b_pos + 1;    
    row_pos = row_pos + 1;  
  
    % Adjust initial feasible solution      
    
    z_start(col_pos) = prod(z_start(v_set));

    % Advance
    
    col_pos = col_pos + 1;
    
  end
  
end

A = sparse(A_idx(:, 1), A_idx(:, 2), A_idx(:, 3), num_constraints, num_vars);
b = sparse(b_idx(:, 1), ones(num_b_entries, 1), b_idx(:, 2), num_constraints, 1);
clear('A_idx', 'b_idx');

objtype = -1; % 1 for minimize, -1 for maximize
contypes = [repmat('<', 1, size(A, 1)) repmat('=', 1, size(Aeq, 1))];
A = [A; Aeq];
b = [b; beq];
lb = [];
ub = [];
vtypes = repmat('B', 1, size(A, 2));

clear opts
opts.IterationLimit = 300000;
opts.FeasibilityTol = 1e-6;
opts.IntFeasTol = 1e-5;
opts.OptimalityTol = 1e-6;
opts.Method = 0; 
opts.Presolve = -1;
opts.Display = 1;
opts.Start = z_start;
opts.LogFile = 'test_gurobi_mex_MIP.log';

[z, ~, flag] = gurobi_mex(ilp_w, objtype, A, b, contypes, lb, ub, vtypes, opts);
z = z';

optim_found = flag == 2;
  
sel_K = find(z(1:num_y) == 1);
fprintf('Best score = %f\n', z * ilp_w);
