import pandas as pd
import plotly.graph_objs as go
import plotly.offline as pyo
import plotly.express as px
import plotly.io as pio
pio.kaleido.scope.mathjax = None

def table_1():
    mean_scores = [['Flan-T5-Large + human', 0, 58.91],
                   ['Flan-T5-Large + human', 20, 63.76],
                    ['Flan-T5-Large + human', 40, 66.96],
                    ['Flan-T5-Large + human', 60, 73.94],
                    ['Flan-T5-Large + human', 80, 78.02],
                    ['Flan-T5-Large + human', 100, 81.95],

                    ['Flan-T5-XL + human', 0, 68.12],
                   ['Flan-T5-XL + human', 20, 72.05],
                    ['Flan-T5-XL + human', 40, 75.98],
                    ['Flan-T5-XL + human', 60, 80.20],
                    ['Flan-T5-XL + human', 80, 84.13],
                    ['Flan-T5-XL + human', 100, 87.77],

                    ['Flan-T5-Large + Flan-T5-XL', 0, 58.91],
                   ['Flan-T5-Large + Flan-T5-XL', 20, 60.52],
                    ['Flan-T5-Large + Flan-T5-XL', 40, 59.78],
                    ['Flan-T5-Large + Flan-T5-XL', 60, 61.48],
                    ['Flan-T5-Large + Flan-T5-XL', 80, 62.36],
                    ['Flan-T5-Large + Flan-T5-XL', 100, 62.97],

                    ['Flan-T5-XL + Flan-T5-Large', 0, 68.12],
                   ['Flan-T5-XL + Flan-T5-Large', 20, 67.69],
                    ['Flan-T5-XL + Flan-T5-Large', 40, 65.65],
                    ['Flan-T5-XL + Flan-T5-Large', 60, 64.05],
                    ['Flan-T5-XL + Flan-T5-Large', 80, 62.88],
                    ['Flan-T5-XL + Flan-T5-Large', 100, 61.86]]

    std_scores = [['Flan-T5-Large + human', 0, 1.39],
                   ['Flan-T5-Large + human', 20, 0.44],
                    ['Flan-T5-Large + human', 40, 2.20],
                    ['Flan-T5-Large + human', 60, 2.77],
                    ['Flan-T5-Large + human', 80, 2.41],
                    ['Flan-T5-Large + human', 100, 1.65],

                    ['Flan-T5-XL + human', 0, 2.62],
                   ['Flan-T5-XL + human', 20, 2.73],
                    ['Flan-T5-XL + human', 40, 2.31],
                    ['Flan-T5-XL + human', 60, 1.65],
                    ['Flan-T5-XL + human', 80, 1.01],
                    ['Flan-T5-XL + human', 100, 0.75],

                    ['Flan-T5-Large + Flan-T5-XL', 0, 1.39],
                   ['Flan-T5-Large + Flan-T5-XL', 20, 1.64],
                    ['Flan-T5-Large + Flan-T5-XL', 40, 1.86],
                    ['Flan-T5-Large + Flan-T5-XL', 60, 2.03],
                    ['Flan-T5-Large + Flan-T5-XL', 80, 3.13],
                    ['Flan-T5-Large + Flan-T5-XL', 100, 2.48],

                    ['Flan-T5-XL + Flan-T5-Large', 0, 2.62],
                   ['Flan-T5-XL + Flan-T5-Large', 20, 2.73],
                    ['Flan-T5-XL + Flan-T5-Large', 40, 3.39],
                    ['Flan-T5-XL + Flan-T5-Large', 60, 3.64],
                    ['Flan-T5-XL + Flan-T5-Large', 80, 1.16],
                    ['Flan-T5-XL + Flan-T5-Large', 100, 0.67]]

    return mean_scores, std_scores

def table_2():
    mean_scores = [['LLama-7B + human', 0, 63.44],
                   ['LLama-7B + human', 20, 68.12],
                    ['LLama-7B + human', 40, 69.29],
                    ['LLama-7B + human', 60, 75.25],
                    ['LLama-7B + human', 80, 79.18],
                    ['LLama-7B + human', 100, 82.39],

                    ['LLama-13B + human', 0, 64.63],
                   ['LLama-13B + human', 20, 68.41],
                    ['LLama-13B + human', 40, 70.89],
                    ['LLama-13B + human', 60, 76.56],
                    ['LLama-13B + human', 80, 78.75],
                    ['LLama-13B + human', 100, 82.24],

                    ['LLama-7B + LLama-13B', 0, 63.44],
                   ['LLama-7B + LLama-13B', 20, 64.00],
                    ['LLama-7B + LLama-13B', 40, 63.63],
                    ['LLama-7B + LLama-13B', 60, 64.32],
                    ['LLama-7B + LLama-13B', 80, 64.75],
                    ['LLama-7B + LLama-13B', 100, 65.69],

                    ['LLama-13B + LLama-7B', 0, 64.63],
                   ['LLama-13B + LLama-7B', 20, 63.90],
                    ['LLama-13B + LLama-7B', 40, 64.77],
                    ['LLama-13B + LLama-7B', 60, 63.17],
                    ['LLama-13B + LLama-7B', 80, 62.59],
                    ['LLama-13B + LLama-7B', 100, 61.28]]

    std_scores = [['LLama-7B + human', 0, 2.06],
                   ['LLama-7B + human', 20, 1.57],
                    ['LLama-7B + human', 40, 1.10],
                    ['LLama-7B + human', 60, 1.65],
                    ['LLama-7B + human', 80, 1.33],
                    ['LLama-7B + human', 100, 0.91],

                    ['LLama-13B + human', 0, 3.15],
                   ['LLama-13B + human', 20, 2.48],
                    ['LLama-13B + human', 40, 3.96],
                    ['LLama-13B + human', 60, 1.33],
                    ['LLama-13B + human', 80, 1.40],
                    ['LLama-13B + human', 100, 1.26],

                    ['LLama-7B + LLama-13B', 0, 3.39],
                   ['LLama-7B + LLama-13B', 20, 2.06],
                    ['LLama-7B + LLama-13B', 40, 3.16],
                    ['LLama-7B + LLama-13B', 60, 4.11],
                    ['LLama-7B + LLama-13B', 80, 2.59],
                    ['LLama-7B + LLama-13B', 100, 3.12],

                    ['LLama-13B + LLama-7B', 0, 3.15],
                   ['LLama-13B + LLama-7B', 20, 3.96],
                    ['LLama-13B + LLama-7B', 40, 3.76],
                    ['LLama-13B + LLama-7B', 60, 2.06],
                    ['LLama-13B + LLama-7B', 80, 2.15],
                    ['LLama-13B + LLama-7B', 100, 0.67]]

    return mean_scores, std_scores

def table_3():
    mean_scores = [['Flan-T5-Large + human', 0, 84.79],
                   ['Flan-T5-Large + human', 20, 86.87],
                    ['Flan-T5-Large + human', 40, 88.71],
                    ['Flan-T5-Large + human', 60, 90.77],
                    ['Flan-T5-Large + human', 80, 93.20],
                    ['Flan-T5-Large + human', 100, 95.42],

                    ['Flan-T5-XL + human', 0, 92.38],
                   ['Flan-T5-XL + human', 20, 92.52],
                    ['Flan-T5-XL + human', 40, 92.43],
                    ['Flan-T5-XL + human', 60, 92.23],
                    ['Flan-T5-XL + human', 80, 92.41],
                    ['Flan-T5-XL + human', 100, 92.22],

                    ['Flan-T5-Large + Flan-T5-XL', 0, 84.79],
                   ['Flan-T5-Large + Flan-T5-XL', 20, 85.79],
                    ['Flan-T5-Large + Flan-T5-XL', 40, 86.80],
                    ['Flan-T5-Large + Flan-T5-XL', 60, 87.46],
                    ['Flan-T5-Large + Flan-T5-XL', 80, 88.52],
                    ['Flan-T5-Large + Flan-T5-XL', 100, 89.72],

                    ['Flan-T5-XL + Flan-T5-Large', 0, 92.38],
                   ['Flan-T5-XL + Flan-T5-Large', 20, 90.92],
                    ['Flan-T5-XL + Flan-T5-Large', 40, 89.74],
                    ['Flan-T5-XL + Flan-T5-Large', 60, 87.98],
                    ['Flan-T5-XL + Flan-T5-Large', 80, 86.70],
                    ['Flan-T5-XL + Flan-T5-Large', 100, 85.20]]

    std_scores = [['Flan-T5-Large + human', 0, 0.41],
                   ['Flan-T5-Large + human', 20, 0.77],
                    ['Flan-T5-Large + human', 40, 0.95],
                    ['Flan-T5-Large + human', 60, 0.45],
                    ['Flan-T5-Large + human', 80, 0.47],
                    ['Flan-T5-Large + human', 100, 0.18],

                    ['Flan-T5-XL + human', 0, 0.17],
                   ['Flan-T5-XL + human', 20, 0.20],
                    ['Flan-T5-XL + human', 40, 0.29],
                    ['Flan-T5-XL + human', 60, 0.62],
                    ['Flan-T5-XL + human', 80, 1.13],
                    ['Flan-T5-XL + human', 100, 1.07],

                    ['Flan-T5-Large + Flan-T5-XL', 0, 0.41],
                   ['Flan-T5-Large + Flan-T5-XL', 20, 0.48],
                    ['Flan-T5-Large + Flan-T5-XL', 40, 0.84],
                    ['Flan-T5-Large + Flan-T5-XL', 60, 0.20],
                    ['Flan-T5-Large + Flan-T5-XL', 80, 0.39],
                    ['Flan-T5-Large + Flan-T5-XL', 100, 0.69],

                    ['Flan-T5-XL + Flan-T5-Large', 0, 0.17],
                   ['Flan-T5-XL + Flan-T5-Large', 20, 0.39],
                    ['Flan-T5-XL + Flan-T5-Large', 40, 0.40],
                    ['Flan-T5-XL + Flan-T5-Large', 60, 0.90],
                    ['Flan-T5-XL + Flan-T5-Large', 80, 1.61],
                    ['Flan-T5-XL + Flan-T5-Large', 100, 1.62]]

    return mean_scores, std_scores

def table_4():
    mean_scores = [['LLama-7B + human', 0, 9.63],
                   ['LLama-7B + human', 20, 11.98],
                    ['LLama-7B + human', 40, 13.85],
                    ['LLama-7B + human', 60, 16.33],
                    ['LLama-7B + human', 80, 18.73],
                    ['LLama-7B + human', 100, 21.05],

                    ['LLama-13B + human', 0, 16.45],
                   ['LLama-13B + human', 20, 18.45],
                    ['LLama-13B + human', 40, 20.34],
                    ['LLama-13B + human', 60, 22.42],
                    ['LLama-13B + human', 80, 24.92],
                    ['LLama-13B + human', 100, 26.89],

                    ['LLama-7B + LLama-13B', 0, 9.63],
                   ['LLama-7B + LLama-13B', 20, 10.21],
                    ['LLama-7B + LLama-13B', 40, 10.69],
                    ['LLama-7B + LLama-13B', 60, 11.25],
                    ['LLama-7B + LLama-13B', 80, 11.93],
                    ['LLama-7B + LLama-13B', 100, 12.26],

                    ['LLama-13B + LLama-7B', 0, 16.45],
                   ['LLama-13B + LLama-7B', 20, 15.87],
                    ['LLama-13B + LLama-7B', 40, 15.57],
                    ['LLama-13B + LLama-7B', 60, 14.89],
                    ['LLama-13B + LLama-7B', 80, 14.68],
                    ['LLama-13B + LLama-7B', 100, 14.28]]

    std_scores = [['LLama-7B + human', 0, 1.53],
                   ['LLama-7B + human', 20, 0.80],
                    ['LLama-7B + human', 40, 1.02],
                    ['LLama-7B + human', 60, 0.57],
                    ['LLama-7B + human', 80, 0.79],
                    ['LLama-7B + human', 100, 0.65],

                    ['LLama-13B + human', 0, 1.81],
                   ['LLama-13B + human', 20, 2.16],
                    ['LLama-13B + human', 40, 1.60],
                    ['LLama-13B + human', 60, 2.46],
                    ['LLama-13B + human', 80, 2.08],
                    ['LLama-13B + human', 100, 2.34],

                    ['LLama-7B + LLama-13B', 0, 1.53],
                   ['LLama-7B + LLama-13B', 20, 1.06],
                    ['LLama-7B + LLama-13B', 40, 0.82],
                    ['LLama-7B + LLama-13B', 60, 0.50],
                    ['LLama-7B + LLama-13B', 80, 1.16],
                    ['LLama-7B + LLama-13B', 100, 0.95],

                    ['LLama-13B + LLama-7B', 0, 1.81],
                   ['LLama-13B + LLama-7B', 20, 1.63],
                    ['LLama-13B + LLama-7B', 40, 1.44],
                    ['LLama-13B + LLama-7B', 60, 1.90],
                    ['LLama-13B + LLama-7B', 80, 1.88],
                    ['LLama-13B + LLama-7B', 100, 1.71]]

    return mean_scores, std_scores

def table_5():
    mean_scores = [['Random', 0, 58.91],
                   ['Random', 20, 60.52],
                    ['Random', 40, 59.78],
                    ['Random', 60, 61.48],
                    ['Random', 80, 62.36],
                    ['Random', 100, 62.97],

                    ['Teacher Conf', 0, 58.91],
                   ['Teacher Conf', 20, 58.66],
                    ['Teacher Conf', 40, 60.12],
                    ['Teacher Conf', 60, 57.35],
                    ['Teacher Conf', 80, 61.43],
                    ['Teacher Conf', 100, 62.97],

                    ['Simulated Student Conf (Pre)', 0, 58.91],
                   ['Simulated Student Conf (Pre)', 20, 64.19],
                    ['Simulated Student Conf (Pre)', 40, 66.67],
                    ['Simulated Student Conf (Pre)', 60, 66.81],
                    ['Simulated Student Conf (Pre)', 80, 65.36],
                    ['Simulated Student Conf (Pre)', 100, 62.97],

                    ['Simulated Student Conf (Post)', 0, 58.91],
                   ['Simulated Student Conf (Post)', 20, 64.77],
                    ['Simulated Student Conf (Post)', 40, 68.27],
                    ['Simulated Student Conf (Post)', 60, 69.72],
                    ['Simulated Student Conf (Post)', 80, 68.27],
                    ['Simulated Student Conf (Post)', 100, 62.97],

                    ['Simulated Utility', 0, 58.91],
                   ['Simulated Utility', 20, 67.83],
                    ['Simulated Utility', 40, 71.32],
                    ['Simulated Utility', 60, 71.18],
                    ['Simulated Utility', 80, 69.87],
                    ['Simulated Utility', 100, 62.97],

                    ['True Student Conf (Pre)', 0, 58.91],
                   ['True Student Conf (Pre)', 20, 68.27],
                    ['True Student Conf (Pre)', 40, 80.20],
                    ['True Student Conf (Pre)', 60, 74.38],
                    ['True Student Conf (Pre)', 80, 68.56],
                    ['True Student Conf (Pre)', 100, 62.97],

                    ['True Student Conf (Post)', 0, 58.91],
                   ['True Student Conf (Post)', 20, 65.65],
                    ['True Student Conf (Post)', 40, 72.63],
                    ['True Student Conf (Post)', 60, 80.06],
                    ['True Student Conf (Post)', 80, 72.2],
                    ['True Student Conf (Post)', 100, 62.97],

                    ['True Utility', 0, 58.91],
                   ['True Utility', 20, 76.56],
                    ['True Utility', 40, 80.79],
                    ['True Utility', 60, 81.51],
                    ['True Utility', 80, 78.60],
                    ['True Utility', 100, 62.97]]

    std_scores = [['Random', 0, 2.00],
                   ['Random', 20, 1.64],
                    ['Random', 40, 1.86],
                    ['Random', 60, 2.03],
                    ['Random', 80, 3.13],
                    ['Random', 100, 4.20],

                    ['Teacher Conf', 0, 2.00],
                   ['Teacher Conf', 20, 2.41],
                    ['Teacher Conf', 40, 2.91],
                    ['Teacher Conf', 60, 3.31],
                    ['Teacher Conf', 80, 3.91],
                    ['Teacher Conf', 100, 4.20],

                    ['Simulated Student Conf (Pre)', 0, 2.00],
                   ['Simulated Student Conf (Pre)', 20, 2.00],
                    ['Simulated Student Conf (Pre)', 40, 0.25],
                    ['Simulated Student Conf (Pre)', 60, 1.57],
                    ['Simulated Student Conf (Pre)', 80, 2.41],
                    ['Simulated Student Conf (Pre)', 100, 4.20],

                    ['Simulated Student Conf (Post)', 0, 2.00],
                   ['Simulated Student Conf (Post)', 20, 1.76],
                    ['Simulated Student Conf (Post)', 40, 0.67],
                    ['Simulated Student Conf (Post)', 60, 2.02],
                    ['Simulated Student Conf (Post)', 80, 2.63],
                    ['Simulated Student Conf (Post)', 100, 4.20],

                    ['Simulated Utility', 0, 2.00],
                   ['Simulated Utility', 20, 1.53],
                    ['Simulated Utility', 40, 1.33],
                    ['Simulated Utility', 60, 1.16],
                    ['Simulated Utility', 80, 2.43],
                    ['Simulated Utility', 100, 4.20],

                    ['True Student Conf (Pre)', 0, 2.00],
                   ['True Student Conf (Pre)', 20, 1.65],
                    ['True Student Conf (Pre)', 40, 1.26],
                    ['True Student Conf (Pre)', 60, 2.84],
                    ['True Student Conf (Pre)', 80, 3.88],
                    ['True Student Conf (Pre)', 100, 4.20],

                    ['True Student Conf (Post)', 0, 2.00],
                   ['True Student Conf (Post)', 20, 1.40],
                    ['True Student Conf (Post)', 40, 1.10],
                    ['True Student Conf (Post)', 60, 0.91],
                    ['True Student Conf (Post)', 80, 4.40],
                    ['True Student Conf (Post)', 100, 4.20],

                    ['True Utility', 0, 2.00],
                   ['True Utility', 20, 0.50],
                    ['True Utility', 40, 1.16],
                    ['True Utility', 60, 1.76],
                    ['True Utility', 80, 3.30],
                    ['True Utility', 100, 4.20]]

    return mean_scores, std_scores

def table_5_part1():
    mean_scores = [['Random', 0, 58.91],
                   ['Random', 20, 60.52],
                    ['Random', 40, 59.78],
                    ['Random', 60, 61.48],
                    ['Random', 80, 62.36],
                    ['Random', 100, 62.97],

                    ['Teacher Conf', 0, 58.91],
                   ['Teacher Conf', 20, 58.66],
                    ['Teacher Conf', 40, 60.12],
                    ['Teacher Conf', 60, 57.35],
                    ['Teacher Conf', 80, 61.43],
                    ['Teacher Conf', 100, 62.97],

                    ['Expected Utility', 0, 58.91],
                   ['Expected Utility', 20, 67.83],
                    ['Expected Utility', 40, 71.32],
                    ['Expected Utility', 60, 71.18],
                    ['Expected Utility', 80, 69.87],
                    ['Expected Utility', 100, 62.97],

                    ['True Utility', 0, 58.91],
                   ['True Utility', 20, 76.56],
                    ['True Utility', 40, 80.79],
                    ['True Utility', 60, 81.51],
                    ['True Utility', 80, 78.60],
                    ['True Utility', 100, 62.97]]

    std_scores = [['Random', 0, 2.00],
                   ['Random', 20, 1.64],
                    ['Random', 40, 1.86],
                    ['Random', 60, 2.03],
                    ['Random', 80, 3.13],
                    ['Random', 100, 2.47],

                    ['Teacher Conf', 0, 2.00],
                   ['Teacher Conf', 20, 2.41],
                    ['Teacher Conf', 40, 2.91],
                    ['Teacher Conf', 60, 3.31],
                    ['Teacher Conf', 80, 3.91],
                    ['Teacher Conf', 100, 2.47],

                    ['Expected Utility', 0, 2.00],
                   ['Expected Utility', 20, 1.53],
                    ['Expected Utility', 40, 1.33],
                    ['Expected Utility', 60, 1.16],
                    ['Expected Utility', 80, 2.43],
                    ['Expected Utility', 100, 2.47],

                    ['True Utility', 0, 2.00],
                   ['True Utility', 20, 0.50],
                    ['True Utility', 40, 1.16],
                    ['True Utility', 60, 1.76],
                    ['True Utility', 80, 3.30],
                    ['True Utility', 100, 2.47]]

    return mean_scores, std_scores

def table_5_part2():
    mean_scores = [['Expected Student Conf (Pre)', 0, 58.91],
                   ['Expected Student Conf (Pre)', 20, 64.19],
                    ['Expected Student Conf (Pre)', 40, 66.67],
                    ['Expected Student Conf (Pre)', 60, 66.81],
                    ['Expected Student Conf (Pre)', 80, 65.36],
                    ['Expected Student Conf (Pre)', 100, 62.97],

                    ['Expected Student Conf (Post)', 0, 58.91],
                   ['Expected Student Conf (Post)', 20, 64.77],
                    ['Expected Student Conf (Post)', 40, 68.27],
                    ['Expected Student Conf (Post)', 60, 69.72],
                    ['Expected Student Conf (Post)', 80, 68.27],
                    ['Expected Student Conf (Post)', 100, 62.97],

                    ['Expected Utility', 0, 58.91],
                   ['Expected Utility', 20, 67.83],
                    ['Expected Utility', 40, 71.32],
                    ['Expected Utility', 60, 71.18],
                    ['Expected Utility', 80, 69.87],
                    ['Expected Utility', 100, 62.97]]

    std_scores = [['Expected Student Conf (Pre)', 0, 2.00],
                   ['Expected Student Conf (Pre)', 20, 2.00],
                    ['Expected Student Conf (Pre)', 40, 0.25],
                    ['Expected Student Conf (Pre)', 60, 1.57],
                    ['Expected Student Conf (Pre)', 80, 2.41],
                    ['Expected Student Conf (Pre)', 100, 2.47],

                    ['Expected Student Conf (Post)', 0, 2.00],
                   ['Expected Student Conf (Post)', 20, 1.76],
                    ['Expected Student Conf (Post)', 40, 0.67],
                    ['Expected Student Conf (Post)', 60, 2.02],
                    ['Expected Student Conf (Post)', 80, 2.63],
                    ['Expected Student Conf (Post)', 100, 2.47],

                    ['Expected Utility', 0, 2.00],
                   ['Expected Utility', 20, 1.53],
                    ['Expected Utility', 40, 1.33],
                    ['Expected Utility', 60, 1.16],
                    ['Expected Utility', 80, 2.43],
                    ['Expected Utility', 100, 2.47]]

    return mean_scores, std_scores

def table_6():
    mean_scores = [['Random', 0, 68.12],
                   ['Random', 20, 67.69],
                    ['Random', 40, 65.65],
                    ['Random', 60, 64.05],
                    ['Random', 80, 62.88],
                    ['Random', 100, 61.86],

                    ['Expected Utility', 0, 68.12],
                   ['Expected Utility', 20, 70.89],
                    ['Expected Utility', 40, 71.91],
                    ['Expected Utility', 60, 72.63],
                    ['Expected Utility', 80, 69.00],
                    ['Expected Utility', 100, 61.86],

                    ['True Utility', 0, 68.12],
                   ['True Utility', 20, 79.91],
                    ['True Utility', 40, 80.93],
                    ['True Utility', 60, 80.64],
                    ['True Utility', 80, 78.17],
                    ['True Utility', 100, 61.86]]

    std_scores = [['Random', 0, 2.62],
                   ['Random', 20, 2.73],
                    ['Random', 40, 3.39],
                    ['Random', 60, 3.64],
                    ['Random', 80, 1.16],
                    ['Random', 100, 0.67],

                    ['Expected Utility', 0, 2.62],
                   ['Expected Utility', 20, 3.28],
                    ['Expected Utility', 40, 2.84],
                    ['Expected Utility', 60, 2.24],
                    ['Expected Utility', 80, 1.16],
                    ['Expected Utility', 100, 0.67],

                    ['True Utility', 0, 2.62],
                   ['True Utility', 20, 2.00],
                    ['True Utility', 40, 2.06],
                    ['True Utility', 60, 2.24],
                    ['True Utility', 80, 2.00],
                    ['True Utility', 100, 0.67]]

    return mean_scores, std_scores

def table_7():
    mean_scores = [['Random', 0, 58.91],
                   ['Random', 20, 60.52],
                    ['Random', 40, 59.78],
                    ['Random', 60, 61.48],
                    ['Random', 80, 62.36],
                    ['Random', 100, 62.97],

                    ['Least Confidence', 0, 58.91],
                   ['Least Confidence', 20, 60.55],
                    ['Least Confidence', 40, 60.55],
                    ['Least Confidence', 60, 60.84],
                    ['Least Confidence', 80, 62.15],
                    ['Least Confidence', 100, 62.97],

                    ['Expected Utility', 0, 58.91],
                   ['Expected Utility', 20, 62.30],
                    ['Expected Utility', 40, 62.45],
                    ['Expected Utility', 60, 62.45],
                    ['Expected Utility', 80, 62.95],
                    ['Expected Utility', 100, 62.97]]

    std_scores = [['Random', 0, 2.00],
                   ['Random', 20, 1.51],
                    ['Random', 40, 1.97],
                    ['Random', 60, 2.48],
                    ['Random', 80, 1.97],
                    ['Random', 100, 2.48],

                    ['Least Confidence', 0, 2.00],
                   ['Least Confidence', 20, 1.82],
                    ['Least Confidence', 40, 1.33],
                    ['Least Confidence', 60, 2.24],
                    ['Least Confidence', 80, 3.10],
                    ['Least Confidence', 100, 2.48],

                    ['Expected Utility', 0, 2.00],
                   ['Expected Utility', 20, 0.50],
                    ['Expected Utility', 40, 1.51],
                    ['Expected Utility', 60, 3.88],
                    ['Expected Utility', 80, 2.78],
                    ['Expected Utility', 100, 2.48]]

    return mean_scores, std_scores

def table_8():
    mean_scores = [['Unpersonalized-Rationales', 0, 58.51],
                   ['Unpersonalized-Rationales', 20, 66.52],
                    ['Unpersonalized-Rationales', 40, 69.14],
                    ['Unpersonalized-Rationales', 60, 70.16],
                    ['Unpersonalized-Rationales', 80, 67.98],
                    ['Unpersonalized-Rationales', 100, 60.41],

                    ['Unpersonalized-CoT', 0, 58.51],
                   ['Unpersonalized-CoT', 20, 67.83],
                    ['Unpersonalized-CoT', 40, 71.32],
                    ['Unpersonalized-CoT', 60, 71.18],
                    ['Unpersonalized-CoT', 80, 69.87],
                    ['Unpersonalized-CoT', 100, 62.96],

                    ['ToM', 0, 58.51],
                   ['ToM', 20, 69.29],
                    ['ToM', 40, 71.62],
                    ['ToM', 60, 72.63],
                    ['ToM', 80, 68.56],
                    ['ToM', 100, 62.74],

                   #  ['ToM-Contrastive', 0, 58.91],
                   # ['ToM-Contrastive', 20, 67.10],
                   #  ['ToM-Contrastive', 40, 73.07],
                   #  ['ToM-Contrastive', 60, 72.34],
                   #  ['ToM-Contrastive', 80, 68.56],
                   #  ['ToM-Contrastive', 100, 62.74],

                   #  ['Human', 0, 58.91],
                   # ['Human', 20, 72.34],
                   #  ['Human', 40, 77.73],
                   #  ['Human', 60, 81.51],
                   #  ['Human', 80, 82.10],
                   #  ['Human', 100, 81.37]
                    ]

    std_scores = [['Unpersonalized-Rationales', 0, 2.00],
                   ['Unpersonalized-Rationales', 20, 2.97],
                    ['Unpersonalized-Rationales', 40, 1.76],
                    ['Unpersonalized-Rationales', 60, 1.10],
                    ['Unpersonalized-Rationales', 80, 0.50],
                    ['Unpersonalized-Rationales', 100, 0.50],

                    ['Unpersonalized-CoT', 0, 2.00],
                   ['Unpersonalized-CoT', 20, 1.53],
                    ['Unpersonalized-CoT', 40, 1.33],
                    ['Unpersonalized-CoT', 60, 1.16],
                    ['Unpersonalized-CoT', 80, 2.43],
                    ['Unpersonalized-CoT', 100, 2.47],

                    ['ToM', 0, 2.00],
                   ['ToM', 20, 1.26],
                    ['ToM', 40, 1.16],
                    ['ToM', 60, 1.33],
                    ['ToM', 80, 1.90],
                    ['ToM', 100, 2.81]
                    ]

    return mean_scores, std_scores


def plot_line_chart_with_error_bars_and_scatter(mean_scores, std_scores, title='', xaxis_title='', yaxis_title='',
                                                marker_color=px.colors.qualitative.Plotly,
                                                plot_bgcolor='rgba(255, 255, 255, 0.95)',
                                                xaxis_showgrid=False, yaxis_showgrid=True,
                                                yaxis_gridcolor='rgba(200, 200, 200, 0.4)',
                                                xaxis_tickfont_size=30, yaxis_tickfont_size=30,
                                                width=800, height=600,
                                                savename=None):

    mean_scores = pd.DataFrame(mean_scores, columns=['method', 'num_datasets', 'score'])
    std_scores = pd.DataFrame(std_scores, columns=['method', 'num_datasets', 'score'])

    # Get unique categories
    categories = mean_scores['method'].unique()

    # Create a Plotly Figure object
    fig = go.Figure(layout=go.Layout(width=width, height=height))
    marker_shapes = ['circle', 'square', 'diamond', 'cross']

    # Add a line and scatter plot for each unique value in the 'method' column with error bars based on the std of the scores
    for i, method in enumerate(categories):
        print(i)
        method_mean_data = mean_scores[mean_scores['method'] == method]
        method_std_data = std_scores[std_scores['method'] == method]
        
        fig.add_trace(go.Scatter(x=method_mean_data['num_datasets'],
                         y=method_mean_data['score'],
                         mode='lines+markers',
                         text=[str(j) for j in method_mean_data['score']],
                         textposition='bottom right',
                         name=f'{method}',
                         marker=dict(symbol=marker_shapes[i], size=10, color=marker_color[i]),
                         error_y=dict(type='data',
                                      array=method_std_data['score'],
                                      visible=True),
                         line=dict(color=marker_color[i], width=3)))



    # Set axis labels and other style elements
    fig.update_layout(
        title_text=title,
        title_x=0.5,
        plot_bgcolor=plot_bgcolor,
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=1.02,
            font=dict(size=20),
            xanchor='right',
            x=1
        ),
        xaxis=dict(
            showgrid=xaxis_showgrid,
            zeroline=True,
            showline=True,
            linewidth=2,
            linecolor='black',
            mirror=False,
            ticks='outside',
            tickfont=dict(size=xaxis_tickfont_size, family='Arial, bold'),
            title=dict(text=xaxis_title, font=dict(family='Arial, bold')),
            titlefont=dict(size=35)
        ),
        yaxis=dict(
            showgrid=yaxis_showgrid,
            gridcolor=yaxis_gridcolor,
            zeroline=True,
            showline=True,
            linewidth=2,
            linecolor='black',
            mirror=False,
            ticks='outside',
            tickfont=dict(size=yaxis_tickfont_size, family='Arial, bold'),
            title=dict(text=yaxis_title, font=dict(family='Arial, bold')),
            titlefont=dict(size=35)
        ),
    )

    if savename is None:
        # Show the plot
        fig.show()
    else:
        pio.write_image(fig, savename, format='pdf')

mean_scores, std_scores = table_1()
plot_line_chart_with_error_bars_and_scatter(mean_scores, std_scores, savename="../plots/plot_rq1_1.pdf", xaxis_title='Intervention Budget (%)', yaxis_title='Accuracy')

mean_scores, std_scores = table_2()
plot_line_chart_with_error_bars_and_scatter(mean_scores, std_scores, savename="../plots/plot_rq1_2.pdf", xaxis_title='Intervention Budget (%)', yaxis_title='Accuracy')

mean_scores, std_scores = table_3()
plot_line_chart_with_error_bars_and_scatter(mean_scores, std_scores, savename="../plots/plot_rq1_3.pdf", xaxis_title='Intervention Budget (%)', yaxis_title='Accuracy')

mean_scores, std_scores = table_4()
plot_line_chart_with_error_bars_and_scatter(mean_scores, std_scores, savename="../plots/plot_rq1_4.pdf", xaxis_title='Intervention Budget (%)', yaxis_title='Accuracy')

mean_scores, std_scores = table_5_part1()
plot_line_chart_with_error_bars_and_scatter(mean_scores, std_scores, savename="../plots/plot_rq2_part1.pdf", xaxis_title='Intervention Budget (%)', yaxis_title='Accuracy')

mean_scores, std_scores = table_5_part2()
plot_line_chart_with_error_bars_and_scatter(mean_scores, std_scores, savename="../plots/plot_rq2_part2.pdf", xaxis_title='Intervention Budget (%)', yaxis_title='Accuracy')

mean_scores, std_scores = table_6()
plot_line_chart_with_error_bars_and_scatter(mean_scores, std_scores, savename="../plots/plot_rq2_2.pdf", xaxis_title='Intervention Budget (%)', yaxis_title='Accuracy')

mean_scores, std_scores = table_7()
plot_line_chart_with_error_bars_and_scatter(mean_scores, std_scores, savename="../plots/plot_rq2_3.pdf", xaxis_title='Intervention Budget (%)', yaxis_title='Accuracy')

mean_scores, std_scores = table_8()
plot_line_chart_with_error_bars_and_scatter(mean_scores, std_scores, savename="../plots/plot_rq3.pdf", xaxis_title='Intervention Budget (%)', yaxis_title='Accuracy')






