"""
We implement additional hyperparameter optimization methods not present in
https://scikit-optimize.github.io/.

Gist: https://gist.github.com/Deepblue129/2c5fae9daf0529ed589018c6353c9f7b
"""

import math
import logging
import random

from tqdm import tqdm

logger = logging.getLogger(__name__)


def _random_points(dimensions, n_points, random_seed=None, test=False):
    """ Generate a random sample of points from dimensions """
    # NOTE: We supply as `randint` to `random_state`; otherwise, dimensions with the same distribution would
    # recive the same sequence of random numbers.
    # We seed `random` so the random seeds generated are deterministic.
    random.seed(random_seed)
    points = {
        d.name: d.rvs(n_samples=n_points, random_state=random.randint(0, 2**32))
        for d in dimensions
    }
    if test == True:
        points = [{k: i for k in points} for i in range(n_points) if i >= 10 or i ==4 or i ==5 or i == 6]
#        points = [{k: i for k in points} for i in range(n_points) if i >= 0 or i ==4 or i ==5 or i == 6]
    else:
        points = [{k: points[k][i] for k in points} for i in range(n_points)]
	
    return points

### functions for confidence curve

###### normal distribution

# import required libraries
from scipy.stats import norm
from scipy.stats import expon
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
 
# Creating the distribution
data = np.arange(1,10,0.01)
pdf = norm.pdf(data , loc = 5.3 , scale = 1 )
 
#Visualizing the distribution
 
'''
sb.set_style('whitegrid')
sb.lineplot(data, pdf , color = 'black')
plt.xlabel('Heights')
plt.ylabel('Probability Density')
'''

def Phi(x, mean = 0, sd = 1):
	return norm(loc = mean, scale = sd).cdf(x)

def phi(x, mean = 0, sd = 1):
	return norm(loc = mean, scale = sd).pdf(x)

import time
from timeit import default_timer as timer
def get_topk_confidence(mu_array, m, sigma, threshold=0.85):

	n = len(mu_array)
	probability = 0.0


	total_iteration = round(1.0 / sigma)
	if sigma < 0.05:
		total_iteration *= 2
	y = 1.0 / total_iteration
	delta_y = 1.0 / total_iteration

	total_pdf = 0

	for iteration in range(total_iteration):

		part_1 = 0.0
		part_2 = 1.0

		y += delta_y

		for i in range(m):
#			x = (y - mu_array[i]) / sigma
			phi_x = phi(y, mu_array[i], sigma)
			Phi_x = Phi(y, mu_array[i], sigma)
#			print("[i] : phi Phi", y, phi_x, Phi_x)
			if Phi_x > 1e-120:
				part_1 += phi_x / Phi_x
			part_2 *= Phi_x
#			print(x)

		total_pdf += part_1 * part_2 * delta_y
#		print(total_pdf)

		for i in range(m, n):
#			print(i)
#			x = (y - mu_array[i]) / sigma
			part_2 *= Phi(y, mu_array[i], sigma)

		probability += part_1 * part_2 * delta_y
#		print(y, probability)

	print("probability = ", probability)
#	print("total pdf = ", total_pdf / sigma)
	print("total pdf = ", total_pdf)

	return probability

'''
### for testing
mu_array = []
mu_array = [0.28, 0.25, .24, .213, .19]
print(get_confidence_curve(mu_array, 0.2))
#print(get_topk_confidence(mu_array, 2, 0.2))
exit()
'''



def get_topk_confidence_backup(mu_array, m, sigma):

	n = len(mu_array)
	probability = 0.0

	part_1 = 0.0
	part_2 = 1.0

	total_iteration = 100
	y = 1.0 / total_iteration
	delta_y = 1.0 / total_iteration

	total_pdf = 0

	for iteration in range(total_iteration):

		y += delta_y

		for i in range(m):
			x = (y - mu_array[i]) / sigma
			phi_x = phi(x)
			Phi_x = Phi(x)
			part_1 += phi_x / Phi_x
			part_2 *= Phi_x
#			print(x)

		total_pdf += part_1 * part_2 * delta_y

		for i in range(m, n):
			print(i)
			x = (y - mu_array[i]) / sigma
			part_2 *= Phi(x)

		probability += part_1 * part_2 * delta_y

	print("probability = ", probability)
	print("total pdf = ", total_pdf)
	return probability / (sigma * 100)

def get_confidence_curve(mu_array, sigma, n_models=5, threshold=0.85):
	n_configurations = len(mu_array)
	n_configurations = min(n_configurations, n_models)

	start = timer()
	confidence_curve = []
	for i in range(n_configurations):
		probability = get_topk_confidence(mu_array, i + 1, sigma)
		confidence_curve.append(probability)
		if probability > threshold:
			break

	print("Processing time (sec) : {}".format(timer() - start))
	return confidence_curve

'''	
print(get_confidence_curve(mu_array, 0.2))
mu_array = []
mu_array = [0.5, 0.25, .24, .213, .19]
#print(get_topk_confidence(mu_array, 1, 0.1))
exit()
'''

def get_next_round_n_model(mu_array, sigma, threshold, n_models=5):
	n_configurations = len(mu_array)
	n_configurations = min(n_configurations, n_models)
	confidence_curve = get_confidence_curve(mu_array, sigma, n_models, threshold)

	print("confidence_curve = ", confidence_curve)


	n_model = 1
	while True:
		if n_configurations == n_model or confidence_curve[n_model - 1] > threshold:
			break
		n_model += 1

	return n_model

'''
mu_array = []
mu_array = [.253, .2526, .2529, .213, .19]
print(get_next_round_n_model(mu_array, sigma = 0.01, threshold = 0.85, n_models=3))
exit()
'''

def get_new_sigma(total_resources_per_model):
	'''
	Add segment-based approximation
	'''
	x = total_resources_per_model
	if x <= 5:
		return 0.035
	if x < 50:
		return expon.pdf((x - 5)/45) / 27
	else:
		return 0.0023 + 0.000033 * (x - 50)

def get_sigma(total_resources_per_model):
	return get_new_sigma(total_resources_per_model)
	theta = 1
	x = total_resources_per_model / 50
	### mini simulation
#	x = total_resources_per_model - 1
	return expon.pdf(x) / 5

def successive_discarding(
		objective,
		dimensions,
		max_resources_per_round=45*3,
		total_budgets=45*3*5,
		threshold=0.9,
		initial_resources=3,
		n_models=45,
		random_seed=None,
		progress_bar=True,
		test=False):
	"""
	Successive Discarding algorithm,

	keep the best models every iteration of the `initial_models' downsamoling each time

	Args:
		objective (callable): objective function to minimize
			Named Args:
				resources (int): number of resources (e.g., epochs) to use while training model
				checkpoint (any): saved data from past run
			Returns:
				score (float): score to minimize
				checkpoint (any): saved data from run
		dimensions (list of skopt.Dimensions): list of dimensions to minimize under
		max_resources_per_round: Max number of resources (e.g. epochs) to use per round
		threshold: the confidence threshold to keep while discarding
		n_models (int): number of models to evaluate
		random_seed (int, optional): Random seed for generating hyperparameters
		progress_bar (boolean or tqdm): Iff to use or update a progress bar.
	Returns:
		scores (list of floats): Scores of the top objective executions
		hyperparameters (list of lists of dict): Hyperparameters with a one to one correspondence to scores.
	"""

	round_n_models = lambda n: max(round(n), 1)

	total_resources_per_model = 0
	total_resources = 0
	round_number = 0
	hyperparameters = _random_points(dimensions, round_n_models(n_models), random_seed, test)
	checkpoints = [None for _ in range(round_n_models(n_models))]
	scores = [0 for _ in range(round_n_models(n_models))]

	# Create a new progress bar
	remember_to_close = False
	if not isinstance(progress_bar, tqdm) and progress_bar:
		remember_to_close = True
		progress_bar = tqdm()
		setattr(progress_bar, 'stats', {'max_score': 0, 'models_evaluated': 0})

	while round_number < 5 and n_models > 1 and total_resources < total_budgets: # Or total_resources < total_budget
		round_number += 1
		# Compute the number of resources to continue running each model with
		update_n_resources = round_n_models(max_resources_per_round / n_models)

		if total_resources + update_n_resources * n_models > total_budgets:
			update_n_resources = round_n_models((total_budgets - total_resources) / n_models)

		if total_resources_per_model + update_n_resources > 200: ## TODO: max_resources_per_model
			update_n_resources = 200 - total_resources_per_model

		print("this round update resource : ", update_n_resources)
		print("total resources per model: ", total_resources_per_model)
		results = []
		cnt = 0
		for score, checkpoint, params in zip(scores, checkpoints, hyperparameters):
			new_score, new_checkpoint = objective(
#				resources=update_n_resources, checkpoint=checkpoint, **params)
				resources=total_resources_per_model + update_n_resources - 1, checkpoint=checkpoint, **params)
#			print("new score", new_score)
			new_score = max(score, new_score)
			results.append(tuple([new_score, new_checkpoint]))
			if isinstance(progress_bar, tqdm):
				progress_bar.update(update_n_resources)
				if progress_bar.stats['max_score'] < new_score:
					progress_bar.stats['max_score'] = new_score
					progress_bar.set_postfix(progress_bar.stats)
			cnt += 1

		print("cnt ---------", cnt)

		total_resources += update_n_resources * n_models
		total_resources_per_model += update_n_resources

		is_last_iteration = total_resources >= total_budgets or round_number >=5 or n_models == 1

		if not is_last_iteration:
			# Sort by minimum score `k[0][0]'
			results = sorted(zip(results, hyperparameters), key=lambda k: -k[0][0])
			mu_array = []
			# Update `hyperparameters' lists
			results, hyperparameters = zip(*results)
			
			mu_array = [result[0] for result in results]
			print("results ~~~ ", results)
			sigma = get_sigma(total_resources_per_model)

			print(" --- mu : ", mu_array)
			print(" --- sigma : ", sigma)
			print(" --- n_models : ", n_models)
			if sigma == 0:
				break
			models_evaluated = len(results) - n_models

			n_models = get_next_round_n_model(mu_array, sigma, threshold, n_models)

			results = results[: n_models]
			hyperparameters = hyperparameters[: n_models]


		
		else:
			models_evaluated = len(results)

		scores, checkpoints = zip(*results)
		print("scores:", scores[: n_models])
		print("checkpoints:", checkpoints[: n_models])

		print("Theses are the hyperparameters for the next round -------- ", hyperparameters)
		
		print(" [----] total_iterations:", total_resources_per_model)

		if isinstance(progress_bar, tqdm):
			progress_bar.stats['models_evaluated'] += models_evaluated
			progress_bar.set_postfix(progress_bar.stats)

	if remember_to_close:
		progress_bar.close()

	# final accuracy curve
	sigma = get_sigma(total_resources_per_model)

	if sigma > 0:
		confidence_curve = get_confidence_curve(scores, sigma, n_models, threshold)

	print("final confidence curve :", confidence_curve)

	return scores, hyperparameters

### TEST ###
import unittest

import random
from skopt.space import Real, Integer

from lib.utils import config_logging

synthesized_accuracy = [[0.28, 0.25, .24, .213, .19],
						[.30,   .27, .26, .24,  .20],
						[.33,   .31, .29, .23,  .19],
						[.34,   .35, .31, .29,  .22],
						[.35,   .37, .32, .30,  .24]
]

import pandas as pd
def synthesized_test():
	df = pd.read_excel('data/syn_v0.xlsx')
#print(df)

#print(df.iloc[0, 1])
#exit()

	synthesized_dimensions = [Integer(0, 4, name='integer')]
	def mock_objective(resources, integer=0, checkpoint=None):
		'''
		if checkpoint is not None:
			return checkpoint, checkpoint
		'''
		return df.iloc[resources, integer], integer
		return synthesized_accuracy[resources][integer], integer

	scores, hyperparameters = successive_discarding(
			objective = mock_objective,
			dimensions = synthesized_dimensions,
			max_resources_per_round = 250,
			total_budgets = 300*5, # 250*5
			threshold = 0.8, # 0.85
			initial_resources=3,
			n_models=50,
			random_seed=None,
			progress_bar=True,
			test=True)

	print(scores, hyperparameters)

'''

from data.nasbench2.nats import get_accuracy

def objective(resources, integer=0, checkpoint=None):
	return get_accuracy(integer, resources, is_tss=True) / 100, integer
#	return get_accuracy(integer, resources, is_tss=True) / 100, integer

synthesized_dimensions = [Integer(0, 4, name='integer')]


scores, hyperparameters = successive_discarding(
			objective = objective,
			dimensions = synthesized_dimensions,
			max_resources_per_round = 250,
			total_budgets = 250*2, # decrease total budget
			threshold = 0.9,
			initial_resources=3,
			n_models=50,
			random_seed=None,
			progress_bar=True,
			test=True)


print(scores, hyperparameters)
'''


def successive_halving(
        objective,
        dimensions,
        max_resources_per_model=81,
        downsample=3,  # Test random downsamples work and check boundaries
        initial_resources=3,
        n_models=45,
        random_seed=None,
        progress_bar=True):
    """
    Adaptation of the Successive Halving algorithm.

    tl;dr keep the best models every iteration of the `initial_models` downsampling each time

    Adaptation: Instead of running for N / 2 models for 2T, we run N / 2 models for T**downsample.
    This adaptation is the same adaptation of Successive Halving in hyperband.

    Reference: http://proceedings.mlr.press/v51/jamieson16.pdf
    Reference: http://www.argmin.net/2016/06/23/hyperband/

    TODO: Splitting in half is a fairly random number. We could possibly look for a better split
    point. For example, we could use the large margin to split points. Or predict the performance
    of hyperparameters would do well in the future.

    Args:
        objective (callable): objective function to minimize
            Named Args:
                resources (int): number of resources (e.g. epochs) to use while training model
                checkpoint (any): saved data from past run
                **hyperparameters (any): hyperparameters to run
            Returns:
                score (float): score to minimize
                checkpoint (any): saved data from run
        dimensions (list of skopt.Dimensions): list of dimensions to minimize under
        max_resources_per_model: Max number of resources (e.g. epochs) to use per model
        downsample: Downsampling of models (e.g. halving is a downsampling of 2)
        initial_resources: Number of resources (e.g. epochs) to use initially to evaluate first
          round.
        n_models (int): Number of models to evaluate
        random_seed (int, optional): Random seed for generating hyperparameters
        progress_bar (boolean or tqdm): Iff to use or update a progress bar.
    Returns:
        scores (list of floats): Scores of the top objective executions
        hyperparameters (list of lists of dict): Hyperparameters with a one to one correspondence
            to scores.
    """
    if downsample <= 1:
        raise ValueError('Downsample must be > 1; otherwise, the number of resources allocated' +
                         'does not grow')

    round_n_models = lambda n: max(round(n), 1)

    total_resources_per_model = 0
    hyperparameters = _random_points(dimensions, round_n_models(n_models), random_seed)
    checkpoints = [None for _ in range(round_n_models(n_models))]
    scores = [math.inf for _ in range(round_n_models(n_models))]

    # Create a new progress bar
    remember_to_close = False
    if not isinstance(progress_bar, tqdm) and progress_bar:
        remember_to_close = True
        # TODO: Compute the tqdm total
        progress_bar = tqdm()
        # Keep tabs on a set of stats
        setattr(progress_bar, 'stats', {'min_score': math.inf, 'models_evaluated': 0})

    while total_resources_per_model < max_resources_per_model:
        # Compute number of resources to continue running each model with
        if total_resources_per_model == 0:
            update_n_resources = initial_resources
        else:
            update_n_resources = min(
                total_resources_per_model * downsample - total_resources_per_model,
                max_resources_per_model - total_resources_per_model)

        results = []
        for score, checkpoint, params in zip(scores, checkpoints, hyperparameters):
            new_score, new_checkpoint = objective(
                resources=update_n_resources, checkpoint=checkpoint, **params)
            new_score = min(score, new_score)
            results.append(tuple([new_score, new_checkpoint]))
            if isinstance(progress_bar, tqdm):
                progress_bar.update(update_n_resources)
                if progress_bar.stats['min_score'] > new_score:
                    progress_bar.stats['min_score'] = new_score
                    progress_bar.set_postfix(progress_bar.stats)

        total_resources_per_model += update_n_resources

        # NOTE: If this is not the last
        is_last_iteration = total_resources_per_model >= max_resources_per_model
        if not is_last_iteration:
            # Sort by minimum score `k[0][0]`
            results = sorted(zip(results, hyperparameters), key=lambda k: k[0][0])
            models_evaluated = len(results) - round_n_models(n_models / downsample)
            results = results[:round_n_models(n_models / downsample)]
            # Update `hyperparameters` lists
            results, hyperparameters = zip(*results)
            n_models = n_models / downsample
        else:
            models_evaluated = len(results)

        # Update `scores` and `checkpoints` lists
        scores, checkpoints = zip(*results)

        if isinstance(progress_bar, tqdm):
            progress_bar.stats['models_evaluated'] += models_evaluated
            progress_bar.set_postfix(progress_bar.stats)

    if remember_to_close:
        progress_bar.close()

    return scores, hyperparameters


def hyperband(objective,
              dimensions,
              max_resources_per_model=81,
              downsample=3,
              total_resources=None,
              random_seed=None,
              progress_bar=True):
    """
    Adaptation of the Hyperband algorithm

    tl;dr search over the space of successive halving hyperparameters

    Adaptation: Originally Hyperband was implemented with the assumption that we cannot reuse
    models. We redid the math allowing for reusing models. This is particularly helpful in speeding
    up 1 GPU hyperparameter optimization. Just to clarify, by reusing models, we mean that
    given hyperparameters `x` and epochs `y`, we can use one model to evaluate all `y` integers
    with hyperparameters `x`.

    Reference: https://arxiv.org/pdf/1603.06560.pdf 
    Reference: http://www.argmin.net/2016/06/23/hyperband/

    TODO: Implement extension to hyperband proporting an increase of x4:
    https://arxiv.org/pdf/1705.10823.pdf 
    http://www.ijcai.org/Proceedings/15/Papers/487.pdf

    Args:
        objective (callable): objective function to minimize
            Named Args:
                resources (int): number of resources (e.g. epochs) to use while training model
                checkpoint (any): saved data from past run
                **hyperparameters (any): hyperparameters to run
            Returns:
                score (float): score to minimize
                checkpoint (any): saved data from run
        dimensions (list of skopt.Dimensions): list of dimensions to minimize under
        max_resources_per_model (float): Max number of resources (e.g. epochs) to use per model
        downsample (int): Downsampling of models (e.g. halving is a downsampling of 2)
        total_resources (optional): Max number of resources hyperband is allowed to use over the
            entirety of the algorithm.
        random_seed (int, optional): Random seed for generating hyperparameters
        progress_bar (boolean, optional): Boolean for displaying tqdm
    Returns:
        scores (list of floats): Scores of the top objective executions
        hyperparameters (list of lists of dict): Hyperparameters with a one to one correspondence
            to scores.
    """
    if downsample <= 1:
        raise ValueError('Downsample must be > 1; otherwise, the number of resources allocated' +
                         'does not grow')

    all_scores = []
    all_hyperparameters = []

    # Number of times to run hyperband
    # Ex. `max_resources_per_model = 81 and downsample = 3`
    #     Then => initial_resources = [1, 3, 9, 27, 81]
    #     And => `hyperband_rounds = 5`
    #     And => `successive_halving_rounds = [5, 4, 3, 2, 1]`
    n_hyperband_rounds = math.floor(math.log(max_resources_per_model, downsample)) + 1
    if total_resources is None:
        # TODO: Multiply by the number of dimensions so it scales the number of models
        # given the large space
        total_resources_per_round = max_resources_per_model * n_hyperband_rounds
    else:
        total_resources_per_round = total_resources / n_hyperband_rounds
    total_models_evaluated = 0

    if progress_bar:
        progress_bar = tqdm(total=total_resources_per_round * n_hyperband_rounds)
        setattr(progress_bar, 'stats', {'min_score': math.inf, 'models_evaluated': 0})

    for i in reversed(range(n_hyperband_rounds)):
        n_successive_halving_rounds = i + 1

        # NOTE: Attained by running the below code on https://sandbox.open.wolframcloud.com:
        #   Reduce[Power[d, j - 1] * (x / Power[d, j]) +
        #   Sum[(Power[d, i] - Power[d, i - 1]) * (x / Power[d, i]), {i, j, k}] == e
        #   && k >=j>=1 && k>=1 && d>=1, {x}]
        # `e` is `total_resources_per_round`
        # `x` is `n_models`
        # `k - j` is `i`
        # `d` is downsample
        # The summation is similar to the successive halving rounds loop. It computes the number
        # of resources with reuse run in total. This is different from hyperband that assumes
        # no reuse.
        n_models = downsample * total_resources_per_round
        n_models /= downsample * (1 + i) - i
        n_models /= downsample**(-i + n_hyperband_rounds - 1)
        total_models_evaluated += n_models

        scores, hyperparameters = successive_halving(
            objective=objective,
            dimensions=dimensions,
            max_resources_per_model=max_resources_per_model,
            downsample=downsample,
            initial_resources=max_resources_per_model / downsample**i,
            n_models=n_models,
            random_seed=random_seed,
            progress_bar=progress_bar)
        logger.info('Finished hyperband round: %d of %d', n_hyperband_rounds - i - 1,
                    n_hyperband_rounds - 1)
        all_scores.extend(scores)
        all_hyperparameters.extend(hyperparameters)

    if isinstance(progress_bar, tqdm):
        progress_bar.close()

    logger.info('Total models evaluated: %f', total_models_evaluated)
    logger.info('Total resources used: %f', total_resources_per_round * n_hyperband_rounds)
    logger.info('Total resources used per model on average: %f',
                total_models_evaluated / total_resources_per_round * n_hyperband_rounds)

    return all_scores, all_hyperparameters


### TEST ###
config_logging()

mock_dimensions = [Integer(1, 100, name='integer')]


def mock(resources, integer=0, checkpoint=None):
    # `integer` is a hyperparameter set the first batch
    if checkpoint is not None:
        return checkpoint, checkpoint
    return integer, integer


class TestHyperparameterOptimization(unittest.TestCase):

    def test_hyperband_simple(self):
        # Basic check on hyperband
        scores, hyperparameters = hyperband(objective=mock, dimensions=mock_dimensions)
        for score, hyperparameter in zip(scores, hyperparameters):
            self.assertEqual(score, hyperparameter['integer'])

    def test_successive_halving_simple(self):
        # Basic check on successive halving
        scores, hyperparameters = successive_halving(objective=mock, dimensions=mock_dimensions)
        for score, hyperparameter in zip(scores, hyperparameters):
            self.assertEqual(score, hyperparameter['integer'])

    def test_hyperband_no_progress_bar(self):
        # Basic check on hyperband
        scores, hyperparameters = hyperband(
            objective=mock, dimensions=mock_dimensions, progress_bar=False)
        for score, hyperparameter in zip(scores, hyperparameters):
            self.assertEqual(score, hyperparameter['integer'])

    def test_successive_halving_no_progress_bar(self):
        # Basic check on successive halving
        scores, hyperparameters = successive_halving(
            objective=mock, dimensions=mock_dimensions, progress_bar=False)
        for score, hyperparameter in zip(scores, hyperparameters):
            self.assertEqual(score, hyperparameter['integer'])

    def test_successive_halving_downsample(self):
        with self.assertRaises(ValueError):
            successive_halving(
                objective=mock,
                dimensions=mock_dimensions,
                progress_bar=False,
                downsample=1,
                n_models=45)


if __name__ == '__main__':
    unittest.main()
