#include <iostream>
#include <map>
#include <set>
#include <random>
#include <cmath>
#include <algorithm>
#include <fstream>

using namespace std;

/* stores a simple interval of type [a,b], [a,b), (a,b] or (a,b) */
class SimpleInterval {
  public:
  double left;
  double right;
  bool left_open;
  bool right_open;
  SimpleInterval(double l,double r, double lo, double ro) {
    left = l;
    right = r;
    left_open = lo;
    right_open = ro;
  }
  SimpleInterval() {
  }
};

/* stores a union of simple intervals */
class Interval {
  public:
  vector<SimpleInterval> sub_intervals;
  
  void print() {
    for (auto &i : sub_intervals) {
      cout << "[" << i.left << ", " << i.right << "] ";
    }
    cout << "\n";
  }
  
  Interval() {
    
  }

  Interval(double left, double right) {
    sub_intervals.push_back(SimpleInterval(left,right,false,false));
  }

  Interval intersect(Interval &i) {
    Interval n;
    for (auto &x : sub_intervals) {
      for (auto &y : i.sub_intervals) {
        double left = max(x.left, y.left);
        double right = min(x.right, y.right);

        if (left <= right) {
          bool left_open = false;
          bool right_open = false;
          if (x.left == left && x.left_open) left_open = true;
          if (y.left == left && y.left_open) left_open = true;
          if (x.right == right && x.right_open) right_open = true;
          if (y.right == right && y.right_open) right_open = true;

          if (left < right || (left_open == false && right_open == false)) {
            n.sub_intervals.push_back(SimpleInterval(left,right,left_open,right_open));    
          }
        }
      }
    }
    return n;
  }
};

/* stores a line */
class Line {
  public:
  double x1,y1,x2,y2;
  double grad, intercept;
  bool left_open;       /* stores whether or not the line is open on the left/right ends */
  bool right_open;
  Line(double a, double b, double c, double d, bool l, bool r) {
    x1 = a;
    y1 = b;
    x2 = c;
    y2 = d;

    grad = (y2 - y1) / (x2 - x1);
    intercept = y1 - grad * x1;
    left_open = l;
    right_open = r;
  }

  /* return value of line at point x*/
  double value(double x) {
    return grad * x + intercept;
  }

  /* return the "level-set" of x values where the y-value is between lower and upper */
  bool get_interval(double lower, double upper, SimpleInterval &i) {
    double a = (lower - intercept) / grad;
    double b = (upper - intercept) / grad;
    double left = min(a,b);
    double right = max(a,b);


    i.left_open = false;
    i.right_open = false;
    if (left_open && left <= x1) {
      i.left_open = true;
    }
    if (right_open && right >= x2) {
      i.right_open = true;
    }

    i.left = max(left,x1);
    i.right = min(right,x2);

    return i.left <= i.right;
  }
};

/* stores a piecewise linear function as a set of lines */
class PiecewiseLinear : public vector<Line> {
  public:
  double maximise(Interval &i);

  /* get the set "level sets" of the piecewise linear function for y values between lower and upper */
  Interval get_interval(double lower, double upper) {
    Interval n;
    for (auto &line : *this) {
      SimpleInterval i;
      if (line.get_interval(lower, upper, i)) {
        n.sub_intervals.push_back(i);
      }
      
    }
    return n;
  }

  /* return the value of the piecewise linear function at x. Throws error if does not exist */
  double value(double x) {
    for (auto &line : *this) {
      if (x >= line.x1 && x <= line.x2) {
        return line.value(x);
      }
    }
    throw 1;
  }
};


/* runs structured UCB */
double alg_ucbd(vector<PiecewiseLinear> arms, Interval params,  double theta, int n) {
  /* setup randomisation */
  random_device rd;
  mt19937 gen(rd());

  /* vector of distributions for the returns */
  vector<normal_distribution<> > norms;

  /* store counts, true means and cumulative returns for each arm */
  vector<int> counts(arms.size(),0);
  vector<double> means;
  vector<double> sums(arms.size(),0.0);

  
  /* number of arms */
  int K = arms.size();

  /* theoretically optimal alpha */
  double alpha = 4.0;

  /* add the means and distributions to corresponding vectors */
  for (auto &a : arms) {
    double val = a.value(theta);
    normal_distribution<> norm(val, 1.0); /* variance 1 */
    means.push_back(val);
    norms.push_back(norm);
  }

  /* compute the best arm */
  double best = *max_element(means.begin(),means.end());

  /* initialise the regret */
  double regret = 0.0;

  /* iterate over time */
  for (int t = 0;t < n;t++) {
    /* initial hypothesis region */
    Interval i = params;

    /* compute \tilde\Theta by iterating over arms */
    for (int k = 0;k < K;k++) {
      if (counts[k] > 0) {    /* arms that have not been chosen do not restrict the region */
        double mean = sums[k] / counts[k];  
        double conf = sqrt(alpha * log(t) / counts[k]);
        Interval region = arms[k].get_interval(mean - conf, mean + conf);
        i = i.intersect(region);
      }
    }
    /* maximise the value over the region */
    double best_val = -100;
    int action = 0;
    for (int k = 0;k < K;k++) {
      for (auto &x : i.sub_intervals) {
        double val = max(arms[k].value(x.left), arms[k].value(x.right));
        if (val > best_val) {
          best_val = val;
          action = k;
        }
      }
    }
    /* increment the return, counts, and regret */
    sums[action]+=norms[action](gen);
    counts[action]++;
    regret+=best - means[action];
    
  }

  return regret;
}

/* runs UCB on 2-armed bandit with mean1, mean2 */
double alg_ucb(double mean1, double mean2, int n) {
  random_device rd;
  mt19937 gen(rd());
  normal_distribution<> norm1(mean1,1.0);
  normal_distribution<> norm2(mean2,1.0);
  double alpha = 2.0;

  /* compute return of optimal arm */
  double best = max(mean1, mean2);

  /* each arm is sampled once */
  int n1 = 1;
  int n2 = 1;

  /* store cumulative return from each arm */
  double sum1 = norm1(gen);
  double sum2 = norm2(gen);
  
  /* initial regret */
  double regret = 2*best - mean1 - mean2;

  /* note t starts at 2, since we have already chosen two actions */
  for (int t = 2;t < n;t++) {
    /* compute the index for each arm */
    double idx1 = sum1 / n1 + sqrt(alpha / n1 * log(t)); 
    double idx2 = sum2 / n2 + sqrt(alpha / n2 * log(t)); 

    /* choose arm maximising index */
    if (idx1 > idx2) {
      n1++;
      sum1+=norm1(gen);
      regret+=best - mean1;
    }else{
      n2++;
      sum2+=norm2(gen);
      regret+=best - mean2;
    }
  }
  /* return the regret */
  return regret;
}



/* Compute tests on the problem with:
  \Theta = [-1, 1]
  \mu_1(\theta) = \ind{\theta > 0}
  \mu_2(\theta) = -\theta \ind{\theta < 0}
*/
void test_c() {
  int N = 500;            /* number of data points collected */
  int n = 50000;          /* horizon */

  /* initialise the parameter space and arms */
  Interval Theta(-1,1);
  PiecewiseLinear arm1;

  arm1.push_back(Line(-1,0,0,0,false,false));
  arm1.push_back(Line(0,0,1,1,false,false));

  PiecewiseLinear arm2;

  arm2.push_back(Line(-1,1,0,0,false,false));
  arm2.push_back(Line(0,0,1,0,false,false));

  vector<PiecewiseLinear> arms;
  arms.push_back(arm1);
  arms.push_back(arm2);
 
  ofstream out1("exp-c-1.txt", ios::out);
  out1 << "theta ucbd_regret ucb_regret\n";
  for (double theta = -1.0;theta <= 1.0;theta+=0.1) {                       /* iterate over theta \in [-1,1] */
    double total_ucbd = 0.0;                                                /* store the regret for UCB-S and UCB respectively */
    double total_ucb = 0.0;
    for (int i = 0;i < N;i++) {
      random_shuffle(arms.begin(),arms.end());                              /* randomly permute the arms before each test */
      total_ucbd+=alg_ucbd(arms, Theta, theta, n);                          /* run structured UCB */
      total_ucb+=alg_ucb(arms[0].value(theta), arms[1].value(theta), n);    /* run standard UCB */
    }
    out1 << theta << " " << total_ucbd / N << " " << total_ucb / N << "\n";
    cout << theta << " " << total_ucbd / N << " " << total_ucb / N << "\n";
  }
  out1.close();
}

/* Compute tests on the problem with:
  \Theta = [-1, 1]
  \mu_1(\theta) = 0 
  \mu_2(\theta) = \theta 
*/
void test_b() {
  int N = 500;
  int n = 50000;
  Interval Theta(-1,1);
  PiecewiseLinear arm1;

  arm1.push_back(Line(-1,0,1,0,false,false));

  PiecewiseLinear arm2;

  arm2.push_back(Line(-1,-1,1,1,false,false));

  vector<PiecewiseLinear> arms;
  arms.push_back(arm1);
  arms.push_back(arm2);
 
  ofstream out1("exp-b-1.txt", ios::out);
  out1 << "theta ucbd_regret ucb_regret\n";
  for (double theta = -1.0;theta <= 1.0;theta+=0.1) {
    double total_ucbd = 0.0;
    double total_ucb = 0.0;
    for (int i = 0;i < N;i++) {
      random_shuffle(arms.begin(),arms.end());
      total_ucbd+=alg_ucbd(arms, Theta, theta, n);
      total_ucb+=alg_ucb(arms[0].value(theta), arms[1].value(theta), n);
    }
    out1 << theta << " " << total_ucbd / N << " " << total_ucb / N << "\n";
    cout << theta << " " << total_ucbd / N << " " << total_ucb / N << "\n";
  }
  out1.close();
}

/* Compute tests on the problem with:
  \Theta = [-1, 1]
  \mu_1(\theta) = \theta 
  \mu_2(\theta) = -\theta 
*/
void test_a() {
  int N = 500;          /* number of samples for each data point */
  int n = 50000;        /* horizon */

  /* initialise parameter space and arms */
  Interval Theta(-1,1);
  PiecewiseLinear arm1;

  arm1.push_back(Line(-1,-1,1,1,false,false));

  PiecewiseLinear arm2;

  arm2.push_back(Line(-1,1,1,-1,false,false));

  vector<PiecewiseLinear> arms;
  arms.push_back(arm1);
  arms.push_back(arm2);
 
  /* estimate the expected regret with varying theta */
  ofstream out1("exp-a-1.txt", ios::out);
  out1 << "theta ucbd_regret ucb_regret\n";
  for (double theta = -0.2;theta <=0.2;theta+=0.01) {
    double total_ucbd = 0.0;
    double total_ucb = 0.0;
    for (int i = 0;i < N;i++) {
      random_shuffle(arms.begin(),arms.end());                            /* randomly permute the arms */
      total_ucbd+=alg_ucbd(arms, Theta, theta, n);                        /* run UCB-S */
      total_ucb+=alg_ucb(arms[0].value(theta), arms[1].value(theta), n);  /* run regular UCB */
    }
    out1 << theta << " " << total_ucbd / N << " " << total_ucb / N << "\n";
    cout << theta << " " << total_ucbd / N << " " << total_ucb / N << "\n";
  }
  out1.close();

  /* estimate the expected regret for fixed \theta = 0.04, while increasing the horizon */
  ofstream out2("exp-a-2.txt", ios::out);
  out2 << "n ucbd_regret ucb_regret\n";
  double theta = 0.04;
  for (int t = 10;t < 100000;t+=5000) {
    double total_ucbd = 0.0;
    double total_ucb = 0.0;
    for (int i = 0;i < N;i++) {
      random_shuffle(arms.begin(),arms.end());                            /* randomly permute the arms */
      total_ucbd+=alg_ucbd(arms, Theta, theta, t);                        /* run UCB-S */
      total_ucb+=alg_ucb(arms[0].value(theta), arms[1].value(theta), t);  /* run regular UCB */
    }
    out2 << t << " " << total_ucbd / N << " " << total_ucb / N << "\n";
    cout << t << " " << total_ucbd / N << " " << total_ucb / N << "\n";
  }
  out2.close();
}



int main() {
  /* run each of the tests */
  test_a();
  test_b();
  test_c();
  return 0;
}









