#include <iostream>
#include <cmath>
#include <vector>
#include <algorithm>
#include <random>
#include <cfloat>
#include <chrono>

#include <eigen3/Eigen/Core>
#include <eigen3/Eigen/LU>
#include <eigen3/Eigen/Eigenvalues>

#include "alloc_alg.h"
#include "optimistic.h"
#include "shared.h"
#include "alloc.h"

using namespace std;
using namespace Eigen;





/***************************************************************
* Compute expected reward of action
***************************************************************/
float AllocAlgorithm::expected_reward(MatrixXf &nu, MatrixXf &M) {
  float score = 0.0;
  int K = nu.rows();
  for (int i=0;i!=K;++i) {
    float inner = nu.row(i) * M.row(i).transpose();
    if (inner > 1.0) inner = 1.0;
    if (inner < 0.0) inner = 0.0;
    score+=inner;
  }
  return score;
}




/***************************************************************
* Implements the optimistic allocation algorithm
***************************************************************/
void AllocAlgorithm::alloc_alg(vector<Data> &datas, bool gamma_fixed) {
  int K = nu.rows();
  int D = nu.cols();

  gen.seed(chrono::system_clock::now().time_since_epoch().count());

  vector<MatrixXf> Gs(K);
  vector<VectorXf> sum_g_M_Y(K);
  vector<VectorXf> hat_nu(K);
  vector<MatrixXf> Gs_inverse(K);

  vector<double> gammas(K);

  MatrixXf tilde_nu(K, D);


  for (int k = 0;k!=K;++k) {
    Gs[k] = MatrixXf::Identity(D, D) * alpha;
    sum_g_M_Y[k] = VectorXf::Zero(D); 
  }

  /* compute the optimal expected reward */
  MatrixXf optimalMt(K, D);
  Alloc::optimal(nu, optimalMt);

  /* initialise the pseudo-regret */
  float regret = 0.0;
  float reward = 0.0;
  float optimal_expected_reward = expected_reward(nu, optimalMt);

  for (uint64_t t = 0;t!=n;++t) {
    /* compute estimates */
    for (int k = 0;k!=K;++k) {
      Gs_inverse[k] = Gs[k].inverse();
      hat_nu[k] = Gs_inverse[k] * sum_g_M_Y[k];
    }
    
    /* compute the optimistic strategy */
    MatrixXf Mt(K, D);
    float best = 0.0;
    opt_handler.optimistic(hat_nu, Gs, root_beta, tilde_nu, Mt, best);

    /* update the regret */
    regret+=optimal_expected_reward - expected_reward(nu, Mt);
    reward+=expected_reward(nu, Mt);

    /* record and output some data every 1000 steps */
    if (t % 1000 == 0) {
      Data data;
      data.push_back((double)(t+1)); 
      data.push_back(regret);
      for (int k = 0;k!=K;++k) {
        data.push_back(gammas[k]);
      }
      data.push_back(reward);
      datas.push_back(data);
    /* 
      cout << "-----------------------------------\n";
      cout << "t = " << t << "\n";
      cout << "regret = " << regret / (t + 1) << "\n";
      cout << tilde_nu << "\n\n";
      cout << Mt << "\n\n";*/
    }

    /* update gram matrices and get results */
    for (int k=0;k!=K;++k) {
      float mean = Mt.row(k) * nu.row(k).transpose();
      if (mean > 1) mean = 1;
      assert(mean >= -0.01);

      /* get the return for task k */
      int Yk = bernoulli(mean);

      /* compute the width of the confidence interval */
      float epsilon = 2 * root_beta * norm(Mt.row(k), Gs_inverse[k]);
      float expected = Mt.row(k) * hat_nu[k];

      /* compute gamma */
      gammas[k] = 4.0;
      if (!gamma_fixed) {
        if (expected+epsilon <= 0.5) {
          gammas[k] = 1.0/((expected+epsilon)*(1-expected-epsilon));
        }else if(expected-epsilon >= 0.5) {
          gammas[k] = 1.0/((expected-epsilon)*(1-expected+epsilon));
        }
      }

      /* update the gram matrix et. al. */
      Gs[k]+=gammas[k] * Mt.row(k).transpose() * Mt.row(k);
      sum_g_M_Y[k] += gammas[k] * Mt.row(k) * Yk;
    }
  }
}


/***************************************************************
* Returns random bernoulli r.v
***************************************************************/
int AllocAlgorithm::bernoulli(float p) {
  bernoulli_distribution dist(p);
  return dist(gen);
}





