################################################################################
# Script to simulate cosine and/or polynomial datasets and save the data in 
# .npy format.
################################################################################

import sys
from pathlib import Path
import random
import numpy as np
import math

def generate_polynomial(data_path, n_samples=10000, n_features=1000):
    """
    Generates samples following a polynomial of degree 2, where each feature is 
    determined by the function f(x) = ax^2 + bx + c, using fixed parameters a, 
    b, and c for each sample. Each sample consists of a set of features 
    corresponding to the f(x) values obtained by evaluating the function for 
    different x.
    
    Parameters
    ----------
    data_path : str
        Path to folder to save data.
    n_samples : int, default = 10000
        Number of samples to generate. This is equivalent to the number of 
        values to try for the a, b, and c parameters.
    n_features : int, default = 1000
        Number of features to generate. This is equivalent to the number of x 
        values used to evaluate the function f(x) for each sample.
    """
    def f(x, a, b, c):
        return a*x**2 + b*x + c
    
    # Set the random seed for reproducibility
    random.seed(0)
    
    # Define name of output file in .npy format
    final_filename = data_path/'polynomial.npy'
    if not Path.exists(final_filename):
        print('Generating Polynomial Dataset')
        # Generate x values which are fixed across all samples
        x_values = [random.uniform(-10, 10) for _ in range(n_features)]
        samples = []
        for i in range(n_samples):
            # For each sample with index i...
            # Choose a random value for 'a', 'b', and 'c' between -10 and 10
            a = random.uniform(-10, 10)
            b = random.uniform(-10, 10)
            c = random.uniform(-10, 10)
            
            # Generate sample with index i
            sample_i = [f(x, a, b, c) for x in x_values]
            samples.append(sample_i)
        
        # Convert to array of shape (n_samples, n_features)
        dataset = np.asarray(samples)
        
        # Save in .npy format
        np.save(final_filename, dataset)
        
        print(f'Polynomial dataset with {dataset.shape[0]} rows and '\
              f'{dataset.shape[1]} columns saved at {final_filename}')
    else:
        print(f'{final_filename} file already exsits. Will not '\
              'overwrite. Terminating.')
        

def generate_cosine(data_path, n_samples=10000, n_features=1000):
    """
    Generates samples following a cosine function, where each feature is 
    determined by the function f(x) = acos(bx + c), using fixed parameters a, b, 
    and c for each sample. Each sample consists of a set of features 
    corresponding to the f(x) values obtained by evaluating the function for 
    different x.
    
    Parameters
    ----------
    data_path : str
        Path to folder to save data.
    n_samples : int, default = 10000
        Number of samples to generate. This is equivalent to the number of 
        values to try for the a, b, and c parameters.
    n_features : int, default = 1000
        Number of features to generate. This is equivalent to the number of x 
        values used to evaluate the function f(x) for each sample.
    """
    def f(x, a, b, c):
        return a*np.cos(b*x + c)
    
    # Set the random seed for reproducibility
    random.seed(0)
              
    # Define name of output file in .npy format
    final_filename = data_path/'cosine.npy'
    if not Path.exists(final_filename):
        print('Generating Cosine Dataset')
        # Generate x values which are fixed across all samples
        x_values = [random.uniform(-10, 10) for _ in range(n_features)]
        samples = []
        for i in range(n_samples):
            # For each sample with index i...
            # Choose a random value for 'a', 'b', and 'c' between -10 and 10
            a = random.uniform(-10, 10)
            # Choose random values for 'b' and 'c' between -pi and pi
            b = random.uniform(-math.pi, math.pi)
            c = random.uniform(-math.pi, math.pi)
            
            # Generate sample with index i
            sample_i = [f(x, a, b, c) for x in x_values]
            samples.append(sample_i)
        
        # Convert to array of shape (n_samples, n_features)
        dataset = np.asarray(samples)
        
        # Save in .npy format
        np.save(final_filename, dataset)
        print(f'Univariate-cosine dataset with {dataset.shape[0]} rows and '\
              f'{dataset.shape[1]} columns saved at {final_filename}')
    else:
        print(f'{final_filename} file already exsits. Will not '\
              'overwrite. Terminating.')

def main():
    if len(sys.argv) < 3:
        print(len(sys.argv))
        print("Error - Incorrect input")
        print("Expecting python3 simulate_data.py [dataset_name] [data_path]")
        sys.exit(0)
    
    # Parse the input arguments
    # E.g. dataset_name = 'all'
    # E.g. data_path = '../../data/'
    _, dataset_name, data_path = sys.argv
    data_path = Path(data_path)
    
    if not Path.exists(data_path):
        Path.mkdir(data_path)
    
    # Simulate data
    if dataset_name == 'all':
        generate_polynomial(data_path, 10000, 1000)
        generate_cosine(data_path, 10000, 1000)
    elif dataset_name == 'polynomial':
        generate_polynomial(data_path, 10000, 1000)
    elif dataset_name == 'cosine':
        generate_cosine(data_path, 10000, 1000)
    else:
        raise ValueError(f'{dataset_name} not recognized, please pass either: '\
              'polynomial or cosine.')
    
if __name__ == "__main__":
    main()