import pandas as pd
import numpy as np

# load data
data1 = pd.read_csv("./headline_S1.csv")
data2 = pd.read_csv("./headline_S2.csv")
data3 = pd.read_csv("./headline_S3.csv")
# one-hot encoding Ethnity column for data3 and label them as 1,2,3,4,5,6
ethnicity_matrix = np.zeros((len(data3),6))

for ind,i in enumerate(data3["Ethnicity"]):
    ethnicity_matrix[ind][int(i)-1] = 1
data3[["Ethnicity_"+str(i) for i in range(1,7)]] = ethnicity_matrix
del data3["Ethnicity"]

data1.columns = data1.columns.str.lower()
data2.columns = data2.columns.str.lower()
data3.columns = data3.columns.str.lower()

# merge the datasets
data = pd.concat([data1,data2,data3],ignore_index=True)
data_drop = data.dropna(axis=1).astype(int)

# category columns
data_drop.to_csv("./headline_total.csv",index=False)