import numpy as np
import tensorflow as tf
print(tf.__version__)
import random as rn

import joblib
import itertools
import pandas as pd
from matplotlib import pyplot as plt

from data_engineering.analyze_data import analyze
from data_engineering.generate_synthetic_dataset import generate_brownian_asset_dynamics
from models.dos_model import train_dos_model
from models.rrlsm_model import train_rrlsm_model
from models.fqi_model import train_mc_fqi_model
from models.ospg_model import train_mc_ospg_model

#REPRODUCIBILITY
SEED = 42
import os
import time
os.environ['PYTHONHASHSEED'] = '0'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
np.random.seed(SEED)
rn.seed(SEED)
tf.random.set_seed(SEED)

RESULTS_DIR = './'

OPTION_TYPE='american_geometric_call'
BATCH_SIZE = 128
NUM_TRAIN_FOLDS = 1
NUM_TEST_FOLDS = 1
NUM_TRAIN_BATCHES = 10000
NUM_TEST_BATCHES = 3000

NUM_ASSETS = [7, 13, 20, 100,200]
CURRENT_PRICE = [90, 100, 110]
NUM_TRAJECTORIES = [BATCH_SIZE*NUM_TRAIN_BATCHES]


EXPERIMENT_FNAME = RESULTS_DIR + 'american_geometric_call_experiment.csv'


def main():
    global time
    start_time = time.time()
    experiment_df = pd.DataFrame(
    {
        'algorithm': [],
        'num_assets': [],
        'current_price': [],
        'fold': [],
        'reward_mean': [],
        'reward_std': [],
        'num_trajectories': [],
        'train_time': [],
        'prediction_time_per_ts': []
    })

    experiments = list(itertools.product(NUM_ASSETS, CURRENT_PRICE, NUM_TRAJECTORIES))
    exp_ctr = 0
    exp_row = 0
    for experiment_params in experiments:
        exp_ctr = exp_ctr + 1
        option_parameters = {
            'risk_free_rate': 0.0,
            'dividend_yield': 0.02,
            'volatility_sigma': 0.25,
            'time_horizon_yrs': 2,
            'num_exercise_opportunities': 100,
            'option_strike_price': 100,
            'num_assets': experiment_params[0],
            'current_price': experiment_params[1],
            'rhoij': 0.75,
            'option_type': OPTION_TYPE
        }

        ospg_config = {
            'batch_size': BATCH_SIZE,
            'os_epochs': 1,
            'train_samples_per_epoch': NUM_TRAIN_BATCHES,
            'test_samples_per_epoch': NUM_TEST_BATCHES,
            'os_lr': 0.001,
            'L': option_parameters['num_exercise_opportunities'],
            'F': option_parameters['num_assets'],
            'clipnorm': 5,
            'use_DNN': True,
            'include_R': True,
            'num_stacked_layers': 2,
            'units_hidden': (20 + option_parameters['num_assets'])
        }

        fqi_config = {
            'batch_size': BATCH_SIZE,
            'os_epochs': 1,
            'train_samples_per_epoch': NUM_TRAIN_BATCHES,
            'test_samples_per_epoch': NUM_TEST_BATCHES,
            'q_lr': 0.001,
            'L': option_parameters['num_exercise_opportunities'],
            'F': option_parameters['num_assets'],
            'clipnorm': 5,
            'skip_connect': False,
            'use_DNN': True,
            'include_R': True,
            'num_stacked_layers': 2,
            'units_hidden': (20 + option_parameters['num_assets'])
        }


        ## FQI
        q_result = train_mc_fqi_model(fqi_config, option_parameters, NUM_TRAIN_FOLDS, NUM_TEST_FOLDS)
        for fold in range(NUM_TRAIN_FOLDS):
            experiment_df.loc[exp_row] = ['DNN_FQI', option_parameters['num_assets'],
                                          option_parameters['current_price'],
                                          fold, q_result['q_reward_mean'][fold], q_result['q_reward_std'][fold],
                                          experiment_params[2],
                                          q_result['train_times'][fold], q_result['prediction_time_per_ts'][fold]]
            exp_row = exp_row + 1

        ## OSPG
        os_result = train_mc_ospg_model(ospg_config, option_parameters, NUM_TRAIN_FOLDS, NUM_TEST_FOLDS)
        for fold in range(NUM_TRAIN_FOLDS):
            experiment_df.loc[exp_row] = ['DNN_OSPG', option_parameters['num_assets'],
                                          option_parameters['current_price'],
                                          fold, os_result['os_reward_mean'][fold], os_result['os_reward_std'][fold],
                                          experiment_params[2],
                                          os_result['train_times'][fold], os_result['prediction_time_per_ts'][fold]]
            exp_row = exp_row + 1

        print('done_experiment %d of %d' % (exp_ctr, len(experiments)))

    experiment_df.to_csv(RESULTS_DIR+EXPERIMENT_FNAME)
    end_time=time.time()
    print('run_time(min): ' + str((end_time-start_time)/60))
    print("done")


if __name__ == '__main__':
    main()