import tensorflow as tf

"""
    The hyper parameter of the ddpg algorithm
    also used for ddpg-pbrs
"""
ddpg_hyper_params = {
    "batch_size": 1024,
    "replay_buffer_size": 1000000,
    "update_freq": 100,
    "gamma": 0.999,
    "tau": 0.01,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "explo_method": "OU",
    "ou_noise_theta": 0.15,
    "ou_noise_sigma": 0.5,
    "gaussian_explo_sigma_ratio_fix": 0.2,
    "gaussian_explo_sigma_ratio_max": 1.0,
    "gaussian_explo_sigma_ratio_min": 1e-5,
    "gaussian_explo_sigma_ratio_decay_ep": 60000,
    "actor_gradient_clip": True,
    "critic_gradient_clip": True,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "actor_net_layers": [4, 4],
    "critic_net_layers": [32, 32],
    "critic_action_input_layer_index": 1
}

"""
    The hyper parameter of the ddpg-dpba algorithm
"""
ddpg_dpba_hyper_params = {
    "batch_size": 1024,
    "replay_buffer_size": 1000000,
    "update_freq": 100,
    "gamma": 0.999,
    "tau": 0.01,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_phi": 1e-5,
    "explo_method": "OU",
    "ou_noise_theta": 0.15,
    "ou_noise_sigma": 0.5,
    "gaussian_explo_sigma_ratio_fix": 0.2,
    "gaussian_explo_sigma_ratio_max": 1.0,
    "gaussian_explo_sigma_ratio_min": 1e-5,
    "gaussian_explo_sigma_ratio_decay_ep": 60000,
    "actor_gradient_clip": True,
    "critic_gradient_clip": True,
    "phi_gradient_clip": True,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "phi_gradient_norm_clip": 50.0,
    "actor_net_layers": [4, 4],
    "critic_net_layers": [32, 32],
    "critic_action_input_layer_index": 1,
    "phi_net_layers": [16, 8],
    "phi_hidden_layer_act_func": tf.nn.tanh
}

"""
    The hyper parameter of the ddpg-oprs-v1 algorithm
    also used for ddpg-oprs-v1-freeze
"""
ddpg_oprs_v1_hyper_params = {
    "batch_size": 512, #1024,
    "replay_buffer_size": 1000000,
    "update_freq": 50,
    "update_num_per_switch": 400,
    "gamma": 0.999,
    "tau": 0.01,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_f": 5e-4,
    "explo_method": "OU",
    "ou_noise_theta": 0.15,
    "ou_noise_sigma": 0.5,
    "gaussian_explo_sigma_ratio_fix": 0.2,
    "gaussian_explo_sigma_ratio_max": 1.0,
    "gaussian_explo_sigma_ratio_min": 1e-5,
    "gaussian_explo_sigma_ratio_decay_ep": 60000,
    "actor_gradient_clip": True,
    "critic_gradient_clip": True,
    "f_gradient_clip": True,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "f_gradient_norm_clip": 50.0, #10.0 for neg,
    "actor_net_layers": [4, 4],
    "critic_net_layers": [32, 32],
    "critic_action_input_layer_index": 1,
    "f_net_layers": [16, 8],
    "net_add_one": True,
    "f_hidden_layer_act_func": tf.nn.tanh,
    "f_output_layer_act_func": None,
    "update_pi_f_simutaneously": False,
    "f_phi_min": None,#-1,
    "f_phi_max": None,#1
}


"""
    The hyper parameter of the ddpg-oprs-v1-fop algorithm
"""
ddpg_oprs_v1_fop_hyper_params = {
    "batch_size": 1024,
    "replay_buffer_size": 1000000,
    "update_freq": 100,
    "truncation_size": 1000,
    "update_num_per_switch_atof": 1000,
    "update_num_per_switch_ftoa": 4,
    "gamma": 0.999,
    "tau": 0.01,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_f": 1e-5,
    "explo_method": "OU",
    "ou_noise_theta": 0.15,
    "ou_noise_sigma": 0.5,
    "gaussian_explo_sigma_ratio_fix": 0.2,
    "gaussian_explo_sigma_ratio_max": 1.0,
    "gaussian_explo_sigma_ratio_min": 1e-5,
    "gaussian_explo_sigma_ratio_decay_ep": 60000,
    "actor_gradient_clip": True,
    "critic_gradient_clip": True,
    "f_gradient_clip": True,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "f_gradient_norm_clip": 50.0,
    "actor_net_layers": [4, 4],
    "critic_net_layers": [32, 32],
    "critic_action_input_layer_index": 1,
    "f_net_layers": [16, 8],
    "f_optim_epochs": 50,
    "f_optim_batch_size": 1024,
    "net_add_one": True,
    "f_hidden_layer_act_func": tf.nn.tanh,
    "f_output_layer_act_func": None,
    "f_phi_min": None,
    "f_phi_max": None
}

"""
    The hyper parameter of the ddpg-oprs-v2 algorithm
"""
ddpg_oprs_v2_hyper_params = {
    "policy_batch_size": 40,
    "weight_func_batch_size": 40,
    "replay_buffer_size": 50000,
    "update_freq": 100,
    "update_num_per_switch": 5,
    "gamma": 0.999,
    "tau": 0.01,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_f": 1e-5,
    "explo_method": "OU",
    "ou_noise_theta": 0.15,
    "ou_noise_sigma": 0.5,
    "gaussian_explo_sigma_ratio_fix": 0.2,
    "gaussian_explo_sigma_ratio_max": 1.0,
    "gaussian_explo_sigma_ratio_min": 1e-5,
    "gaussian_explo_sigma_ratio_decay_ep": 60000,
    "actor_gradient_clip": True,
    "critic_gradient_clip": True,
    "f_gradient_clip": True,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "f_gradient_norm_clip": 50.0,
    "actor_net_layers": [4, 4],
    "critic_net_layers": [32, 32],
    "critic_action_input_layer_index": 1,
    "f_net_layers": [16, 8],
    "net_add_one": True,
    "f_hidden_layer_act_func": tf.nn.tanh
}

"""
    The hyper parameter of the ddpg-oprs-v2-approx algorithm
"""
ddpg_oprs_v2_approx_hyper_params = {
    "policy_batch_size": 40,
    "weight_func_batch_size": 40,
    "replay_buffer_size": 50000,
    "update_freq": 100,
    "update_num_per_switch": 5,
    "nabla_theta_wrt_phi_sam_num": 4,
    "gamma": 0.999,
    "tau": 0.01,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_f": 1e-5,
    "explo_method": "OU",
    "ou_noise_theta": 0.15,
    "ou_noise_sigma": 0.5,
    "gaussian_explo_sigma_ratio_fix": 0.2,
    "gaussian_explo_sigma_ratio_max": 1.0,
    "gaussian_explo_sigma_ratio_min": 1e-5,
    "gaussian_explo_sigma_ratio_decay_ep": 60000,
    "actor_gradient_clip": True,
    "critic_gradient_clip": True,
    "f_gradient_clip": True,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "f_gradient_norm_clip": 50.0,
    "actor_net_layers": [4, 4],
    "critic_net_layers": [32, 32],
    "critic_action_input_layer_index": 1,
    "f_net_layers": [16, 8],
    "net_add_one": True,
    "f_hidden_layer_act_func": tf.nn.tanh,
    "f_output_layer_act_func": None,
    "f_phi_min": None,
    "f_phi_max": None
}

"""
    The hyper parameter of the ddpg-oprs-v2-fop algorithm
"""
ddpg_oprs_v2_fop_hyper_params = {
    "policy_batch_size": 40,
    "weight_func_batch_size": 40,
    "replay_buffer_size": 50000,
    "update_freq": 50,
    "update_num_per_switch_atof": 800,
    "update_num_per_switch_ftoa": 20,
    "nabla_theta_wrt_phi_sam_num": 20,#4,
    "episode_truncation_size": 40,
    "gamma": 0.999,
    "tau": 0.001, #0.01,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_f": 2e-4,#1e-5,
    "explo_method": "OU",
    "ou_noise_theta": 0.15,
    "ou_noise_sigma": 0.5,
    "gaussian_explo_sigma_ratio_fix": 0.2,
    "gaussian_explo_sigma_ratio_max": 1.0,
    "gaussian_explo_sigma_ratio_min": 1e-5,
    "gaussian_explo_sigma_ratio_decay_ep": 60000,
    "actor_gradient_clip": True,
    "critic_gradient_clip": True,
    "f_gradient_clip": True,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "f_gradient_norm_clip": 50.0,
    "actor_net_layers": [4, 4],
    "critic_net_layers": [32, 32],
    "critic_action_input_layer_index": 1,
    "f_net_layers": [16, 8],
    "f_optim_epochs": 50,
    "f_optim_batch_size": 1024,
    "net_add_one": True,
    "f_hidden_layer_act_func": tf.nn.tanh,
    "f_output_layer_act_func": None,
    "f_phi_min": None,
    "f_phi_max": None
}

"""
    The hyper parameter of the ddpg-oprs-v2-fsa algorithm
    2020-01-13, 9:57, positive test, switch_atof: 800, switch_ftoa: 4, lr_f: 1e-5, foutput: None, result: 160
    2020-01-14, 10:51, positive test, switch_atof: 800, switch_ftoa: 4, lr_f: 1e-5, foutput: None,
                        negative gradient, 
    
    
    2020-01-14, 10:49, negative test, switch_atof: 800, switch_ftoa: 4, lr_f: 1e-3, foutput: None,
                        negative gradient, bad
    2020-01-14, 17:32, negative test, switch_atof: 800, switch_ftoa: 4, lr_f: 1e-3, foutput: [-1,1],
                        negative gradient, f_norm: 1.0, bad
    
    2020-01-15, 09:29, negative test, switch_atof: 800, switch_ftoa: 10, lr_f: 1e-3, foutput: None,
                        positive gradient,
"""
ddpg_oprs_v2_fsa_hyper_params = {
    "policy_batch_size": 40,
    "weight_func_batch_size": 40,
    "replay_buffer_size": 50000,
    "update_freq": 50,#100,
    "update_num_per_switch_atof": 800,#400,
    "update_num_per_switch_ftoa": 1,#4,#30,#12 for pos,
    "nabla_theta_wrt_phi_sam_num": 80, #40,#20, #20 for pos,#4,
    "episode_truncation_size": 40,
    "gamma": 0.999,
    "tau": 0.01,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_f": 1e-3,#1e-5,#5e-4,#1e-5 for pos,
    "explo_method": "OU",
    "ou_noise_theta": 0.15,
    "ou_noise_sigma": 0.5,
    "gaussian_explo_sigma_ratio_fix": 0.2,
    "gaussian_explo_sigma_ratio_max": 1.0,
    "gaussian_explo_sigma_ratio_min": 1e-5,
    "gaussian_explo_sigma_ratio_decay_ep": 60000,
    "actor_gradient_clip": True,
    "critic_gradient_clip": True,
    "f_gradient_clip": False,#True for pos,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "f_gradient_norm_clip": 50.0, #50.0 for pos,
    "actor_net_layers": [4, 4],
    "critic_net_layers": [32, 32],
    "critic_action_input_layer_index": 1,
    "f_net_layers": [16, 8], #[4, 4], #[16, 8],
    "f_optim_epochs": 50,
    "f_optim_batch_size": 1024,
    "net_add_one": True,
    "f_hidden_layer_act_func": tf.nn.tanh,#tf.nn.relu,#tf.nn.tanh,
    "f_output_layer_act_func": None,
    "f_phi_min": None,#None for pos,
    "f_phi_max": None,#None for pos
}

"""
    The hyper parameter of the ddpg-oprs-v2-fsaqin algorithm
"""
ddpg_oprs_v2_fsaqin_hyper_params = {
    "policy_batch_size": 40,
    "weight_func_batch_size": 40,
    "replay_buffer_size": 50000,
    "update_freq": 50,
    "update_num_per_switch_atof": 800,#5,
    "update_num_per_switch_ftoa": 1,
    "nabla_theta_wrt_phi_sam_num": 100,
    "episode_truncation_size": 40,
    "gamma": 0.999,
    "tau": 0.01,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_f": 2e-3,#5e-4, #1e-5 for pos,
    "explo_method": "OU",
    "ou_noise_theta": 0.15,
    "ou_noise_sigma": 0.5,
    "gaussian_explo_sigma_ratio_fix": 0.2,
    "gaussian_explo_sigma_ratio_max": 1.0,
    "gaussian_explo_sigma_ratio_min": 1e-5,
    "gaussian_explo_sigma_ratio_decay_ep": 60000,
    "actor_gradient_clip": True,
    "critic_gradient_clip": True,
    "f_gradient_clip": True,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "f_gradient_norm_clip": 50.0,#10.0,
    "actor_net_layers": [4, 4],
    "critic_net_layers": [32, 32],
    "critic_action_input_layer_index": 1,
    "f_net_layers": [16, 8],
    "f_optim_epochs": 50,
    "f_optim_batch_size": 1024,
    "enable_grad_q_shaped_wrt_a": True,
    "net_add_one": True,
    "f_hidden_layer_act_func": tf.nn.tanh,
    "f_output_layer_act_func": None,
    "f_phi_min": None,
    "f_phi_max": None
}

"""
    The hyper parameter of the ddpg-oprs-v3-fsaqin algorithm
"""
ddpg_oprs_v3_fsaqin_hyper_params = {
    "policy_batch_size": 40,
    "weight_func_batch_size": 40,
    "replay_buffer_size": 50000,
    "update_freq": 50, #100,
    "update_num_per_switch_atof": 800, #400,
    "update_num_per_switch_ftoa": 1,
    "nabla_theta_wrt_phi_sam_num": 40,#80, #100,
    "episode_truncation_size": 40,
    "gamma": 0.999,
    "tau": 0.01,
    "lr_actor": 1e-4,
    "lr_critic": 2e-4,
    "lr_f": 2e-3,#1e-5,#5e-4,#1e-6,#1e-5,#1e-3,
    "lr_h": 1e-3,
    "explo_method": "OU",
    "ou_noise_theta": 0.15,
    "ou_noise_sigma": 0.5,
    "gaussian_explo_sigma_ratio_fix": 0.2,
    "gaussian_explo_sigma_ratio_max": 1.0,
    "gaussian_explo_sigma_ratio_min": 1e-5,
    "gaussian_explo_sigma_ratio_decay_ep": 60000,
    "actor_gradient_clip": True,
    "critic_gradient_clip": True,
    "f_gradient_clip": True,
    "actor_gradient_norm_clip": 1.0,
    "critic_gradient_norm_clip": 1.0,
    "f_gradient_norm_clip": 50.0,
    "actor_net_layers": [4, 4],
    "critic_net_layers": [32, 32],
    "critic_action_input_layer_index": 1,
    "f_net_layers": [16, 8],
    "f_optim_epochs": 50,
    "f_optim_batch_size": 1024,
    "enable_hessian_computing": True, # if opg is disabled, we require actor output activation is tanh
    "hessian_opg_approx": True,
    "enable_grad_q_shaped_wrt_a": True,
    "net_add_one": True,
    "f_hidden_layer_act_func": tf.nn.tanh,
    "f_output_layer_act_func": None,
    "f_phi_min": None,
    "f_phi_max": None
}
