rm(list=ls())
require(ggplot2)
library(dplyr)

setwd("~/Documents/research/kiv/kiv_fix/supplement/code/results")

'%!in%' <- function(x,y)!('%in%'(x,y))

#############
# read in csv
#############

alg_options<-c('kernel_ridge','sieve','sieve_ridge','deep','KIV')
alg_names<-c('KernelReg','SieveIV','SieveIV(ridge)','DeepIV','KernelIV')

design_options<-c('NP','HLLT90','HLLT75','HLLT50','HLLT25','HLLT10','CC')
design_names<-c('Sigmoid',
                'Demand 0.9',
                'Demand 0.75',
                'Demand 0.5',
                'Demand 0.25',
                'Demand 0.1',
                'Linear')

dfs<-vector('list',length(alg_options)*length(design_options))
#expression(paste0('Demand ',rho,'=0.9'))

alg='KIV'
design='CC'
test<-read.csv(paste0(alg,'_',design,'_1000.csv'),header=FALSE)

a=1
d=2
i=1

for (a in 1:length(alg_options)) {
  
  alg<-alg_options[a]
  alg_name<-alg_names[a]
  
  for (d in 1:length(design_options)) {
    
    design<-design_options[d]
    design_name<-design_names[d]
    
    mse1<-read.csv(paste0(alg,'_',design,'_1000.csv'),header=FALSE)
    mse5<-read.csv(paste0(alg,'_',design,'_5000.csv'),header=FALSE)
    mse10<-read.csv(paste0(alg,'_',design,'_10000.csv'),header=FALSE)
    #mse10<-data.frame(rep(0,40))
    
    mse<-c(mse1[[1]],mse5[[1]],mse10[[1]])
    n<-c(rep(1,40),rep(5,40),rep(10,40))
    
    df<-data.frame(alg_name,design_name,mse,n)
    dfs[[i]]<-df
    i<-i+1
  }
}

dfs_ks<-vector('list',length(design_options))
alg<-'kernelsmooth'
alg_name<-'SmoothIV'
for (d in 1:length(design_options)) {
  design<-design_options[d]
  design_name<-design_names[d]
  
  mse1<-read.csv(paste0(alg,'_',design,'_1000.csv'),header=FALSE)
  
  mse<-mse1[[1]]
  n<-rep(1,40)
  
  df<-data.frame(alg_name,design_name,mse,n)
  dfs_ks[[d]]<-df
}

df<-do.call('rbind', dfs)
df_ks<-do.call('rbind',dfs_ks)

df_all<-rbind(df,df_ks)
df_all$mse<-log10(df_all$mse)
df_all$n<-as.factor(df_all$n)

df_all$alg_name <- factor(df_all$alg_name, levels = c("KernelReg","SieveIV", "SieveIV(ridge)", "DeepIV", "KernelIV", "SmoothIV"))

names(df_all)<-c("Algorithm","design_name","mse","n")

plot_order <- c("KernelReg", "SmoothIV", "SieveIV(ridge)", "DeepIV", "KernelIV")

###########
# make plot
###########

##

myplot <- ggplot(filter(df_all, design_name %!in% c("Sigmoid","Linear"), Algorithm!= "SieveIV"))
#myplot <- ggplot(filter(df_all, design_name!= "Sigmoid"))
myplot+geom_boxplot(aes(x = n, y =mse, fill= Algorithm, color = Algorithm), position = "identity", alpha = 0.3)+
  facet_grid(facets = .~design_name)+theme_bw()+xlab("Training Sample Size (1000)") + ylab("Out-of-Sample MSE (log)")+
  scale_fill_manual(values = c("#F8766D" , "#00BF7D", "#00B0F6", "#E76BF3", "#F0E442"), labels = plot_order, breaks = plot_order)+
  scale_color_manual(values = c("#F8766D", "#00BF7D", "#00B0F6", "#E76BF3", "#F0E442"), labels = plot_order, breaks = plot_order)

ggsave(filename = "../figures/demand.eps",device=cairo_ps, fallback_resolution = 600, width=8, height=2)

##

myplot2 <- ggplot(filter(df_all, design_name== "Sigmoid", Algorithm!= "SieveIV"))
myplot2+geom_boxplot(aes(x = n, y =mse, fill= Algorithm, color = Algorithm), position = "identity", alpha = .3)+facet_grid(facets = .~design_name)+theme_bw()+ 
  xlab("               Training Sample Size (1000)") + ylab("Out-of-Sample MSE (log)")+
  scale_fill_manual(values = c("#F8766D" , "#00BF7D", "#00B0F6", "#E76BF3", "#F0E442"), labels = plot_order, breaks = plot_order)+
  scale_color_manual(values = c("#F8766D", "#00BF7D", "#00B0F6", "#E76BF3", "#F0E442"), labels = plot_order, breaks = plot_order)

ggsave(filename = "../figures/sigmoid.eps", device=cairo_ps, fallback_resolution = 600, width=3, height=2)

myplot3 <- ggplot(filter(df_all, design_name== "Linear", Algorithm!= "SieveIV"))
myplot3+geom_boxplot(aes(x = n, y =mse, fill= Algorithm, color = Algorithm), position = "identity", alpha = .3)+facet_grid(facets = .~design_name)+theme_bw()+ 
  xlab("               Training Sample Size (1000)") + ylab("Out-of-Sample MSE (log)")+
  scale_fill_manual(values = c("#F8766D" , "#00BF7D", "#00B0F6", "#E76BF3", "#F0E442"), labels = plot_order, breaks = plot_order)+
  scale_color_manual(values = c("#F8766D", "#00BF7D", "#00B0F6", "#E76BF3", "#F0E442"), labels = plot_order, breaks = plot_order)

ggsave(filename = "../figures/linear.eps", device=cairo_ps, fallback_resolution = 600, width=3, height=2)

##

myplot4 <- ggplot(filter(df_all, design_name=="Demand 0.5", Algorithm!= "SieveIV"))
#myplot <- ggplot(filter(df_all, design_name!= "Sigmoid"))
myplot4+geom_boxplot(aes(x = n, y =mse, fill= Algorithm, color = Algorithm), position = "identity", alpha = 0.3)+
  facet_grid(facets = .~design_name)+theme_bw()+
  xlab("               Training Sample Size (1000)") + ylab("Out-of-Sample MSE (log)")+
  scale_fill_manual(values = c("#F8766D" , "#00BF7D", "#00B0F6", "#E76BF3", "#F0E442"), labels = plot_order, breaks = plot_order)+
  scale_color_manual(values = c("#F8766D", "#00BF7D", "#00B0F6", "#E76BF3", "#F0E442"), labels = plot_order, breaks = plot_order)

ggsave(filename = "../figures/demand_5.eps",device=cairo_ps, fallback_resolution = 600, width=3, height=2)