# Combine all the results for Cluster SEV into a single table
import pandas as pd
import numpy as np

# read the table
df = pd.read_csv("../Results/csv/Exp1_flexclust_summary_final.csv")
df_original = pd.read_csv("../Results/csv/Exp1_cluster_summary_final.csv")
df_original = df_original.groupby(["Dataset","Model"]).mean().round(2)
df["Tolerance"] = df["Tolerance"].apply(lambda x: round(x,2))
df_original["Tolerance"] = 0.2
# # remove the flexible tolerance with 0.2
# df = df[df["Tolerance"] != 0.2]
# remove the headline1, headline2, headline3
df = df[df["Dataset"] != "headline1"]
df = df[df["Dataset"] != "headline2"]
df = df[df["Dataset"] != "headline3"]

# df["Proportion of using flexible SEV"] = df["Proportion of using flexible SEV"] * 100
df_mean = df.groupby(["Dataset","Model","Tolerance"]).mean().round(2)
del df_mean["Iteration"]
# remove the last row
df_mean = df_mean.iloc[:-1,:]
df_value = df_mean.copy()
df_std = df.groupby(["Dataset","Model","Tolerance"]).std().round(2)
del df_std["Iteration"]
df_std.columns = [i+"_std" for i in df_std.columns]


# calculate the percentage of improvement with the same Dataset and Model for Mean SEV and L_inf
for dataset in df_mean.index.get_level_values(0).unique():
    for model in df_mean.index.get_level_values(1).unique():
        for tolerance in df_mean.index.get_level_values(2).unique():
            try:
                df_value.loc[(dataset,model,tolerance),"Average SEV"] = "(" + str(round((df_mean.loc[(dataset,model,tolerance),"Average SEV"] - df_original.loc[(dataset,model),"Average SEV"])/df_original.loc[(dataset,model),"Average SEV"]*100,2)) + "\%)"
                df_value.loc[(dataset,model,tolerance),"Average L_inf"] = "(" + str(round((df_mean.loc[(dataset,model,tolerance),"Average L_inf"] - df_original.loc[(dataset,model),"Median L_inf"])/df_original.loc[(dataset,model),"Median L_inf"]*100,2)) + "\%)"
                df_value.loc[(dataset,model,tolerance),"Average GMM"] = "(" + str(round((df_mean.loc[(dataset,model,tolerance),"Average GMM"] - df_original.loc[(dataset,model),"Mean Likelihood"])/df_original.loc[(dataset,model),"Mean Likelihood"]*100,2)) + "\%)"
            except:
                print(dataset,model,tolerance,"is not in the original table")

print(df_value)


# for the one with the same column name make it mean+/-std in the same block
df_final = df_mean.copy()
# remove all the elements in df_final
df_final.iloc[:,:] = ""
for i in df_mean.index:
    for j in df_mean.columns:
        if j == "Average SEV" or j == "Average L_inf" or j=="Average GMM":
            # print(df_value.loc[i,j])
            try:
                df_final.loc[i,j] = "$" + str(df_mean.loc[i,j]) + "$" + df_value.loc[i,j]
            except:
                pass
                # print(df_value.loc[i,:])
        # elif j == "Proportion of using flexible SEV":
        #     df_final.loc[i,j] = str(df_mean.loc[i,j])+"\\%"
        else:
            df_final.loc[i,j] = "$" + str(df_mean.loc[i,j])+"\\pm"+str(df_std.loc[i,j+"_std"])+ "$"

# switch the order of the columns
df_final = df_final[["Train Accuracy","Test Accuracy","Train AUC","Test AUC","Average SEV","Average L_inf", "Average GMM","Average Time"]]
# rename the columns
df_final.columns = ["Train Accuracy","Test Accuracy","Train AUC","Test AUC","Average SEV","Average $L_\infty$","Mean GMM","Average Time $\\(10^{-2})$"]

print(df_final.to_latex(escape=False))


# print(df_final.to_latex(escape=False))
# df_final.to_csv("../Results/csv/Exp1_flexible_summary_latex.csv")