import os

'''
=====================================================================================================================
                                                    简单任务
=====================================================================================================================
'''
# # GPT 3x to x
# target = '3x_to_x'
# dir_suffix = '3x_to_x_task_demo'
# lr = 2e-5
# gpu_id = 2
# batch_size = 100
# scheduler = 'StepLR'
# model = 'GPT'
# data_size = 1000 # 各类数据总共的数据量

# # xm0表示x mod seq_len = 0为测试集，xel表示x else，即训练集
# dname = ['train', 'test']
# dmode = ['train', 'test']
# dtrain = [1, 0]
# dshow = [1, 1]
# dpercent = [9, 1] # 90%训练集，10%测试集，即训练集大小为900，测试集大小为100

# dn = ' '.join(map(str, dname))
# dp = ' '.join(map(str, dpercent))
# dmode = ' '.join(map(str, dmode))
# dtrain = ' '.join(map(str, dtrain))
# dshow = ' '.join(map(str, dshow))

# L = 4
# suffix = f'{L}L1H'

# # 正常训练
# os.system(f'CUDA_VISIBLE_DEVICES={gpu_id} /bin/python -m main -data_size {data_size} -seed 1 -func {target} -lr {lr} -m {model}\
#                 -scheduler {scheduler} -ne 300 -nl {L} -nh 1 -bs {batch_size} -dir_suffix {dir_suffix} \
#                 -dmode {dmode} -dp {dp} -dn {dn} -dtrain {dtrain} -dshow {dshow} -suffix {suffix}\
#                 -ple 1 -pae 10 -plae 10 -sme 500 -lds 100 -ldr 0.95')



'''
=====================================================================================================================
                                                    多anchor任务
=====================================================================================================================
'''
# # GPT 复合函数
# target = 'composition'
# dir_suffix = 'composition_task_demo'
# lr = 2e-5
# gpu_id = 2
# batch_size = 100
# scheduler = 'GradualWarmupScheduler_CosineAnnealingLR'
# model = 'GPT'
# data_size = 9600

# # xm0表示x mod seq-1 = 0为测试集，xel表示x else，即训练集
# dname = ['13_xm0', '23_xm0', '43_xm0', '31_xm0', '32_xm0', '34_xm0'] + ['12_xm0', '14_xm0', '21_xm0', '41_xm0', '24_xm0', '42_xm0'] + ['11_xm0', '22_xm0', '33_xm0', '44_xm0']\
#        +['13_xel', '23_xel', '43_xel', '31_xel', '32_xel', '34_xel'] + ['12_xel', '14_xel', '21_xel', '41_xel', '24_xel', '42_xel'] + ['11_xel', '22_xel', '33_xel', '44_xel']
# dmode = ['13_xm0', '23_xm0', '43_xm0', '31_xm0', '32_xm0', '34_xm0'] + ['12_xm0', '14_xm0', '21_xm0', '41_xm0', '24_xm0', '42_xm0'] + ['11_xm0', '22_xm0', '33_xm0', '44_xm0']\
#        +['13_xel', '23_xel', '43_xel', '31_xel', '32_xel', '34_xel'] + ['12_xel', '14_xel', '21_xel', '41_xel', '24_xel', '42_xel'] + ['11_xel', '22_xel', '33_xel', '44_xel']
# dtrain = [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0] \
#        + [1, 1, 1, 1, 1, 0] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1]
# dshow = [1, 0, 0, 1, 0, 1] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 1] \
#       + [1, 0, 0, 1, 0, 1] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 1]
# dpercent = [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1] \
#          + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1]

# dn = ' '.join(map(str, dname))
# dp = ' '.join(map(str, dpercent))
# dmode = ' '.join(map(str, dmode))
# dtrain = ' '.join(map(str, dtrain))
# dshow = ' '.join(map(str, dshow))

# L = 4
# suffix = f'{L}L1H'

# # 正常训练
# os.system(f'CUDA_VISIBLE_DEVICES={gpu_id} /bin/python -m main -data_size {data_size} -seed 1 -func {target} -lr {lr} -m {model}\
#                 -scheduler {scheduler} -ne 100 -nl {L} -nh 1 -bs {batch_size} -dir_suffix {dir_suffix} \
#                 -dmode {dmode} -dp {dp} -dn {dn} -dtrain {dtrain} -dshow {dshow} -suffix {suffix}\
#                 -ple 1 -pae 10 -plae 10 -sme 500\
#                 --optim_T_max 4000 --optim_eta_min 1e-5 --optim_multiplier 5 --optim_total_epoch 400')





'''
=====================================================================================================================
                                                    思维链任务
=====================================================================================================================
'''
# # GPT chain search
# seed_list = [1]
# target = 'chain_search'
# dir_suffix = '思维链'
# lr = 2e-5
# gpu_id = 1
# batch_size = 100
# scheduler = 'GradualWarmupScheduler_CosineAnnealingLR'
# model = 'GPT'
# data_size = 10000

# dname = ['train', 'test']
# dmode = ['train', 'test']
# dtrain = [0, 1]
# dshow = [1, 1]
# dpercent = [9, 1]

# dn = ' '.join(map(str, dname))
# dp = ' '.join(map(str, dpercent))
# dmode = ' '.join(map(str, dmode))
# dtrain = ' '.join(map(str, dtrain))
# dshow = ' '.join(map(str, dshow))

# L, H = 3, 1
# suffix = f'{L}L{H}H'

# # 正常训练
# os.system(f'CUDA_VISIBLE_DEVICES={gpu_id} /bin/python -m main -data_size {data_size} -seed 1 -func {target} -lr {lr} -m {model}\
#                 -scheduler {scheduler} -ne 4000 -nl {L} -nh {H} -bs {batch_size} -dir_suffix {dir_suffix} \
#                 -dmode {dmode} -dp {dp} -dn {dn} -dtrain {dtrain} -dshow {dshow} -suffix {suffix}\
#                 -ple 1 -pae 10 -plae 10 -sme 50 -sl 13\
#                 --optim_T_max 4000 --optim_eta_min 1e-5 --optim_multiplier 5 --optim_total_epoch 400')




'''
=====================================================================================================================
                                                    初始凝聚
=====================================================================================================================
'''
# GPT 3x to x
# target = '3x_to_x'
# dir_suffix = '3x_to_x_task_condense'
# lr = 2e-5
# gpu_id = 0
# batch_size = 100
# scheduler = 'StepLR'
# model = 'GPT_condense'
# data_size = 10000 # 各类数据总共的数据量

# # xm0表示x mod seq_len = 0为测试集，xel表示x else，即训练集
# dname = ['train', 'test']
# dmode = ['train', 'test']
# dtrain = [1, 0]
# dshow = [1, 1]
# dpercent = [9, 1] # 90%训练集，10%测试集，即训练集大小为900，测试集大小为100

# dn = ' '.join(map(str, dname))
# dp = ' '.join(map(str, dpercent))
# dmode = ' '.join(map(str, dmode))
# dtrain = ' '.join(map(str, dtrain))
# dshow = ' '.join(map(str, dshow))

# L = 4
# suffix = f'{L}L256H_dk1'

# # 正常训练
# os.system(f'CUDA_VISIBLE_DEVICES={gpu_id} python3 -m main -data_size {data_size} -seed 1 -func {target} -lr {lr} -m {model}\
#                 -scheduler {scheduler} -ne 300 -nl {L} -nh 256 -bs {batch_size} -dir_suffix {dir_suffix} \
#                 -dmode {dmode} -dp {dp} -dn {dn} -dtrain {dtrain} -dshow {dshow} -suffix {suffix}\
#                 -ple 1 -pae 10 -plae 10 -sme 1 -lds 100 -ldr 0.95 -dm 32 -dk 1 -dv 1 -d_ff 32' )



# '''
# =====================================================================================================================
#                                                     复合任务对称or推断任务
# =====================================================================================================================
# '''
# GPT 复合函数
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--std_rate', type=float, default=0.5)
parser.add_argument('--optim_multiplier', type=float, default=10)
parser.add_argument('--gpu_id', type=int, default=4)
args = parser.parse_args()

std_rate = args.std_rate
optim_multiplier = args.optim_multiplier
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
target = 'composition'
# std_rate=0.6
# beta2=0.999
# eps=1e-8
# weight_decay=1e-2
# optim_multiplier=10
dir_suffix = f'diff_lr_5e-4_composition_task_34_unseen_43_unseen_diff_ini_{std_rate}_optim_multiplier_{optim_multiplier}'
lr = 1e-5
gpu_id = args.gpu_id
batch_size = 2048
scheduler = 'GradualWarmupScheduler_CosineAnnealingLR'
# scheduler = 'StepLR'
model = 'GPT_normal_init'
# model='GPT'
data_size = 900000

# xm0表示x mod seq-1 = 0为测试集，xel表示x else，即训练集
dname = ['13_xm0', '23_xm0', '43_xm0', '31_xm0', '32_xm0', '34_xm0'] + ['12_xm0', '14_xm0', '21_xm0', '41_xm0', '24_xm0', '42_xm0'] + ['11_xm0', '22_xm0', '33_xm0', '44_xm0']\
       +['13_xel', '23_xel', '43_xel', '31_xel', '32_xel', '34_xel'] + ['12_xel', '14_xel', '21_xel', '41_xel', '24_xel', '42_xel'] + ['11_xel', '22_xel', '33_xel', '44_xel']
dmode = ['13_xm0', '23_xm0', '43_xm0', '31_xm0', '32_xm0', '34_xm0'] + ['12_xm0', '14_xm0', '21_xm0', '41_xm0', '24_xm0', '42_xm0'] + ['11_xm0', '22_xm0', '33_xm0', '44_xm0']\
       +['13_xel', '23_xel', '43_xel', '31_xel', '32_xel', '34_xel'] + ['12_xel', '14_xel', '21_xel', '41_xel', '24_xel', '42_xel'] + ['11_xel', '22_xel', '33_xel', '44_xel']

dtrain = [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0] \
       + [1, 1, 0, 1, 1, 1] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1]

###数据集更改过了！！！！！！！！！！！！！！！！！！！

dshow = [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0] \
      + [0, 0, 1, 0, 0, 0] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0]
dpercent = [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1] \
         + [9, 9, 9, 9, 9, 9] + [9, 9, 9, 9, 9, 9] + [9, 9, 9, 9]

dn = ' '.join(map(str, dname))
dp = ' '.join(map(str, dpercent))
dmode = ' '.join(map(str, dmode))
dtrain = ' '.join(map(str, dtrain))
dshow = ' '.join(map(str, dshow))



proj_name='refine_lr__epoch_207_data_90w_warmup_normal_init_34_wrong_43_unseen_diff_ini'

# proj_name='test'

for seed in [1,2,3,4,5]:
       for L in [2,3,4,5,6,7]:
              if seed==1 and L==2:
                     continue 

              suffix = f'{L}L1H_seed{seed}'
              # 正常训练
              os.system(f'CUDA_VISIBLE_DEVICES={gpu_id} python3 -m main -data_size {data_size} -seed {seed} -func {target} -lr {lr} -m {model}\
                            -scheduler {scheduler} -ne 210 -nl {L} -nh 1 -bs {batch_size} -dir_suffix {dir_suffix} -pname {proj_name} -dk 200 -dv 200\
                            -dmode {dmode} -dp {dp} -dn {dn} -dtrain {dtrain} -dshow {dshow} -suffix {suffix}\
                            -ple 1 -pae 3 -plae 3 -sme 20 -sr {std_rate} \
                            --optim_T_max 200 --optim_eta_min 1e-5 --optim_multiplier {optim_multiplier} --optim_total_epoch 10')




# '''
# =====================================================================================================================
#                                                     复合任务对称or推断任务, 查看数据复杂度对训练快慢的影响
# =====================================================================================================================
# '''
# # GPT 复合函数
# import argparse

# parser = argparse.ArgumentParser()
# parser.add_argument('--std_rate', type=float, default=0.3)
# parser.add_argument('--optim_multiplier', type=float, default=10)
# parser.add_argument('--gpu_id', type=int, default=0)
# parser.add_argument('--random_data_num', type=int, default=0)
# parser.add_argument('--L', type=int, default=2)
# args = parser.parse_args()

# std_rate = args.std_rate
# optim_multiplier = args.optim_multiplier
# random_data_num=args.random_data_num
# # import os
# # os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
# target = 'composition_random'
# # std_rate=0.6
# # beta2=0.999
# # eps=1e-8
# # weight_decay=1e-2
# # optim_multiplier=10
# dir_suffix = f'diff_lr_5e-4_composition_task_34_unseen_43_unseen_random_num_{random_data_num}_diff_ini_{std_rate}_optim_multiplier_{optim_multiplier}'
# lr = 1e-5
# gpu_id = args.gpu_id
# batch_size = 300
# scheduler = 'GradualWarmupScheduler_CosineAnnealingLR'
# # scheduler = 'StepLR'
# model = 'GPT_normal_init'
# # model='GPT'
# data_size = 100000

# # xm0表示x mod seq-1 = 0为测试集，xel表示x else，即训练集
# dname = ['13_xm0', '23_xm0', '43_xm0', '31_xm0', '32_xm0', '34_xm0'] + ['12_xm0', '14_xm0', '21_xm0', '41_xm0', '24_xm0', '42_xm0'] + ['11_xm0', '22_xm0', '33_xm0', '44_xm0']\
#        +['13_xel', '23_xel', '43_xel', '31_xel', '32_xel', '34_xel'] + ['12_xel', '14_xel', '21_xel', '41_xel', '24_xel', '42_xel'] + ['11_xel', '22_xel', '33_xel', '44_xel']
# dmode = ['13_xm0', '23_xm0', '43_xm0', '31_xm0', '32_xm0', '34_xm0'] + ['12_xm0', '14_xm0', '21_xm0', '41_xm0', '24_xm0', '42_xm0'] + ['11_xm0', '22_xm0', '33_xm0', '44_xm0']\
#        +['13_xel', '23_xel', '43_xel', '31_xel', '32_xel', '34_xel'] + ['12_xel', '14_xel', '21_xel', '41_xel', '24_xel', '42_xel'] + ['11_xel', '22_xel', '33_xel', '44_xel']

# dtrain = [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0] \
#        + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1]

# ###数据集更改过了！！！！！！！！！！！！！！！！！！！

# dshow = [0, 0, 1, 0, 0, 1] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 1] \
#       + [0, 0, 1, 0, 0, 1] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 1]
# dpercent = [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1] \
#          + [9, 9, 9, 9, 9, 9] + [9, 9, 9, 9, 9, 9] + [9, 9, 9, 9]

# dn = ' '.join(map(str, dname))
# dp = ' '.join(map(str, dpercent))
# dmode = ' '.join(map(str, dmode))
# dtrain = ' '.join(map(str, dtrain))
# dshow = ' '.join(map(str, dshow))

# L=args.L


# proj_name='data_complexity_epoch_207_data_90w_warmup_normal_init_34_wrong_43_unseen_diff_ini'

# # proj_name='test'

# for seed in [2,3,4,5,6,7,8,9]:
#        # if seed==2:
#        #        L_list=[6,7]
#        # else:
#        # L_list=[2,3,4,5,6,7]
#        # L_list=[2,4,6]
#        # L_list=[2,3,4]
#        # for L in L_list:
#        # for L in [2,4,6, 8]:
#        # for L in [3,5,7]:
#        # 
#        # for L in [2,7]:
#        # for L in [3,6]:
#        # for L in [4,5]:
#        # L = 3
#        suffix = f'{L}L1H_seed{seed}'
#        # 正常训练
#        os.system(f'CUDA_VISIBLE_DEVICES={gpu_id} python3 -m main -data_size {data_size} -seed {seed} -func {target} -lr {lr} -m {model}\
#                      -scheduler {scheduler} -ne 21 -nl {L} -nh 1 -bs {batch_size} -dir_suffix {dir_suffix} -pname {proj_name} -dk 200 -dv 200\
#                      -dmode {dmode} -dp {dp} -dn {dn} -dtrain {dtrain} -dshow {dshow} -suffix {suffix}\
#                      -ple 1 -pae 3 -plae 3 -sme 20 -sr {std_rate} -rdm {random_data_num} \
#                      --optim_T_max 200 --optim_eta_min 1e-5 --optim_multiplier {optim_multiplier} --optim_total_epoch 10')
    

# '''
# =====================================================================================================================
#                                                     很小初始化找一个研究的对象
# =====================================================================================================================
# '''
# # GPT 复合函数
# # import argparse

# # parser = argparse.ArgumentParser()
# # parser.add_argument('--std_rate', type=float, default=0.6)
# # parser.add_argument('--optim_multiplier', type=float, default=10)
# # parser.add_argument('--gpu_id', type=int, default=0)
# # args = parser.parse_args()

# std_rate = 3
# optim_multiplier = 10
# # import os
# # os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
# target = 'composition'
# # std_rate=0.6
# # beta2=0.999
# # eps=1e-8
# # weight_decay=1e-2
# # optim_multiplier=10
# dir_suffix = f'no_34_diff_lr_5e-4_composition_task_34_unseen_43_unseen_diff_ini_{std_rate}_optim_multiplier_{optim_multiplier}'
# lr = 1e-5
# gpu_id = 1
# batch_size = 2048
# scheduler = 'GradualWarmupScheduler_CosineAnnealingLR'
# # scheduler = 'StepLR'
# model = 'GPT_normal_init'
# # model='GPT'
# data_size = 900000

# # xm0表示x mod seq-1 = 0为测试集，xel表示x else，即训练集
# dname = ['13_xm0', '23_xm0', '43_xm0', '31_xm0', '32_xm0', '34_xm0'] + ['12_xm0', '14_xm0', '21_xm0', '41_xm0', '24_xm0', '42_xm0'] + ['11_xm0', '22_xm0', '33_xm0', '44_xm0']\
#        +['13_xel', '23_xel', '43_xel', '31_xel', '32_xel', '34_xel'] + ['12_xel', '14_xel', '21_xel', '41_xel', '24_xel', '42_xel'] + ['11_xel', '22_xel', '33_xel', '44_xel']
# dmode = ['13_xm0', '23_xm0', '43_xm0', '31_xm0', '32_xm0', '34_xm0'] + ['12_xm0', '14_xm0', '21_xm0', '41_xm0', '24_xm0', '42_xm0'] + ['11_xm0', '22_xm0', '33_xm0', '44_xm0']\
#        +['13_xel', '23_xel', '43_xel', '31_xel', '32_xel', '34_xel'] + ['12_xel', '14_xel', '21_xel', '41_xel', '24_xel', '42_xel'] + ['11_xel', '22_xel', '33_xel', '44_xel']

# dtrain = [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0] \
#        + [1, 1, 0, 1, 1, 0] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1]

# ###数据集更改过了！！！！！！！！！！！！！！！！！！！

# dshow = [0, 0, 1, 0, 0, 1] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 1] \
#       + [0, 0, 1, 0, 0, 1] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 1]
# dpercent = [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1] \
#          + [9, 9, 9, 9, 9, 9] + [9, 9, 9, 9, 9, 9] + [9, 9, 9, 9]

# dn = ' '.join(map(str, dname))
# dp = ' '.join(map(str, dpercent))
# dmode = ' '.join(map(str, dmode))
# dtrain = ' '.join(map(str, dtrain))
# dshow = ' '.join(map(str, dshow))



# proj_name='refine_lr__epoch_207_data_90w_warmup_normal_init_34_wrong_43_unseen_diff_ini'

# # proj_name='test'

# # for seed in [2,3,4,5,6,7]:
# #        if seed==2:
# #               L_list=[6,7]
# #        else:
# #               L_list=[2,3,4,5,6,7]
# #        for L in L_list:
#        # for L in [2,4,6, 8]:
#        # for L in [3,5,7]:
#        # 
#        # for L in [2,7]:
#        # for L in [3,6]:
#        # for L in [4,5]:
#        # L = 3
# L=2
# seed=1
# suffix = f'{L}L1H_seed{seed}'
# # 正常训练
# os.system(f'CUDA_VISIBLE_DEVICES={gpu_id} python3 -m main -data_size {data_size} -seed {seed} -func {target} -lr {lr} -m {model}\
#               -scheduler {scheduler} -ne 210 -nl {L} -nh 1 -bs {batch_size} -dir_suffix {dir_suffix} -pname {proj_name} -dk 200 -dv 200\
#               -dmode {dmode} -dp {dp} -dn {dn} -dtrain {dtrain} -dshow {dshow} -suffix {suffix}\
#               -ple 1 -pae 3 -plae 3 -sme 20 -sr {std_rate} \
#               --optim_T_max 200 --optim_eta_min 1e-5 --optim_multiplier {optim_multiplier} --optim_total_epoch 10')

# '''
# =====================================================================================================================
#                                                     复合任务对称or推断任务 每个epoch保存,看看condense形成
# =====================================================================================================================
# '''
# target = 'composition'
# std_rate=0.8
# beta2=0.999
# eps=1e-8
# weight_decay=1e-2
# dir_suffix = f'diff_lr_1e-5_composition_task_34_unseen_43_unseen_diff_ini_{std_rate}_test_eps_{eps}_wd_{weight_decay}_beta2_{beta2}_for_test_diff_epoch_condense'
# lr = 1e-5
# gpu_id = 4
# batch_size = 2048
# scheduler = 'GradualWarmupScheduler_CosineAnnealingLR'
# # scheduler = 'StepLR'
# model = 'GPT_normal_init'
# # model='GPT'
# data_size = 900000

# # xm0表示x mod seq-1 = 0为测试集，xel表示x else，即训练集
# dname = ['13_xm0', '23_xm0', '43_xm0', '31_xm0', '32_xm0', '34_xm0'] + ['12_xm0', '14_xm0', '21_xm0', '41_xm0', '24_xm0', '42_xm0'] + ['11_xm0', '22_xm0', '33_xm0', '44_xm0']\
#        +['13_xel', '23_xel', '43_xel', '31_xel', '32_xel', '34_xel'] + ['12_xel', '14_xel', '21_xel', '41_xel', '24_xel', '42_xel'] + ['11_xel', '22_xel', '33_xel', '44_xel']
# dmode = ['13_xm0', '23_xm0', '43_xm0', '31_xm0', '32_xm0', '34_xm0'] + ['12_xm0', '14_xm0', '21_xm0', '41_xm0', '24_xm0', '42_xm0'] + ['11_xm0', '22_xm0', '33_xm0', '44_xm0']\
#        +['13_xel', '23_xel', '43_xel', '31_xel', '32_xel', '34_xel'] + ['12_xel', '14_xel', '21_xel', '41_xel', '24_xel', '42_xel'] + ['11_xel', '22_xel', '33_xel', '44_xel']

# dtrain = [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0] \
#        + [1, 1, 0, 1, 1, 1] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1]

# ###数据集更改过了！！！！！！！！！！！！！！！！！！！

# dshow = [0, 0, 1, 0, 0, 1] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 1] \
#       + [0, 0, 1, 0, 0, 1] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 1]
# dpercent = [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1] \
#          + [9, 9, 9, 9, 9, 9] + [9, 9, 9, 9, 9, 9] + [9, 9, 9, 9]

# dn = ' '.join(map(str, dname))
# dp = ' '.join(map(str, dpercent))
# dmode = ' '.join(map(str, dmode))
# dtrain = ' '.join(map(str, dtrain))
# dshow = ' '.join(map(str, dshow))



# proj_name='refine_adam_epoch_207_data_90w_warmup_normal_init_34_wrong_43_unseen_diff_ini'

# # proj_name='test'


# for L in [2]:
# # for L in [2,4,6, 8]:
# # for L in [3,5,7]:
# # 
# # for L in [2,7]:
# # for L in [3,6]:
# # for L in [4,5]:
# # L = 3
#        suffix = f'{L}L1H_seed1'
#        # 正常训练
#        os.system(f'CUDA_VISIBLE_DEVICES={gpu_id} python3 -m main -data_size {data_size} -seed 1 -func {target} -lr {lr} -m {model}\
#                      -scheduler {scheduler} -ne 20 -nl {L} -nh 1 -bs {batch_size} -dir_suffix {dir_suffix} -pname {proj_name} -dk 200 -dv 200\
#                      -dmode {dmode} -dp {dp} -dn {dn} -dtrain {dtrain} -dshow {dshow} -suffix {suffix}\
#                      -ple 1 -pae 1 -plae 1 -sme 1 -sr {std_rate} -beta2 {beta2} -eps {eps} -wd {weight_decay} \
#                      --optim_T_max 200 --optim_eta_min 1e-5 --optim_multiplier 20 --optim_total_epoch 10')



'''
=====================================================================================================================
                                                    复合任务对称or推断任务
=====================================================================================================================
'''
# # GPT 复合函数
# import argparse

# parser = argparse.ArgumentParser()
# parser.add_argument('--std_rate', type=float, default=0.5)
# parser.add_argument('--optim_multiplier', type=float, default=20)
# parser.add_argument('--gpu_id', type=int, default=5)

# args = parser.parse_args()

# std_rate = args.std_rate
# optim_multiplier = args.optim_multiplier
# # import os
# # os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
# target = 'composition'
# # std_rate=0.6
# # beta2=0.999
# # eps=1e-8
# # weight_decay=1e-2
# # optim_multiplier=10
# dir_suffix = f'diff_lr_5e-4_composition_task_34_unseen_43_unseen_diff_ini_{std_rate}_optim_multiplier_{optim_multiplier}'
# lr = 1e-5
# gpu_id = args.gpu_id
# batch_size = 2048
# scheduler = 'GradualWarmupScheduler_CosineAnnealingLR'
# # scheduler = 'StepLR'
# model = 'GPT_normal_init'
# # model='GPT'
# data_size = 900000

# # 初始化列表
# dname, dmode, dtrain, dshow, dpercent = [], [], [], [], []

# # 定义前缀和后缀
# # prefixes = ['13', '23', '43', '31', '32', '34', '12', '14', '21', '41', '24', '42', '11', '22', '33', '44']
# prefixes = [f"{i}{j}" for i in range(1, 5) for j in range(1, 5) ]
# suffixes = ['xm0', 'xel']

# # 生成dname和dmode
# for suffix in suffixes:
#     for prefix in prefixes:
#         dname.append(f"{prefix}_{suffix}")
# dmode = dname.copy()


# # 初始化train_values和show_values
# dtrain = [0 if 'xm0' in name else 1 for name in dname]
# dshow = [0] * len(dname)

# # 特定列表，需要更新的名字
# specific_list = ['43_xel', '34_xel']  # 示例，根据需要进行替换

# # 更新train_values和show_values
# for name in specific_list:
#     index = dname.index(name)
#     # 假设这里我们要将train_values对应位置改为2，show_values对应位置改为1
#     # 实际上根据需要来设置这些值
#     dtrain[index] = 0  # 示例值

# specific_list2 = ['43_xel']  # 示例，根据需要进行替换

# # 更新train_values和show_values
# for name in specific_list2:
#     index = dname.index(name)
#     # 假设这里我们要将train_values对应位置改为2，show_values对应位置改为1
#     # 实际上根据需要来设置这些值
# #     dtrain[index] = 0  # 示例值
#     dshow[index] = 1  # 示例值


# # 定义dtrain, dshow, dpercent的值
# # train_values = [0]*6 + [0]*6 + [0]*4 + [1]*6 + [1]*6 + [1]*4
# # show_values = [0, 0, 1, 0, 0, 1] + [0]*6 + [0, 0, 0, 1] + [0, 0, 1, 0, 0, 1] + [0]*6 + [0, 0, 0, 1]
# dpercent = [1]*int(len(dname)/2) + [9]*int(len(dname)/2)



# # # xm0表示x mod seq-1 = 0为测试集，xel表示x else，即训练集
# # dname = ['13_xm0', '23_xm0', '43_xm0', '31_xm0', '32_xm0', '34_xm0'] + ['12_xm0', '14_xm0', '21_xm0', '41_xm0', '24_xm0', '42_xm0'] + ['11_xm0', '22_xm0', '33_xm0', '44_xm0']\
# #        +['13_xel', '23_xel', '43_xel', '31_xel', '32_xel', '34_xel'] + ['12_xel', '14_xel', '21_xel', '41_xel', '24_xel', '42_xel'] + ['11_xel', '22_xel', '33_xel', '44_xel']
# # # dmode = ['13_xm0', '23_xm0', '43_xm0', '31_xm0', '32_xm0', '34_xm0'] + ['12_xm0', '14_xm0', '21_xm0', '41_xm0', '24_xm0', '42_xm0'] + ['11_xm0', '22_xm0', '33_xm0', '44_xm0']\
# # #        +['13_xel', '23_xel', '43_xel', '31_xel', '32_xel', '34_xel'] + ['12_xel', '14_xel', '21_xel', '41_xel', '24_xel', '42_xel'] + ['11_xel', '22_xel', '33_xel', '44_xel']

# # dmode=dname
# # dtrain = [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 0] \
# #        + [1, 1, 0, 1, 1, 1] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1]

# # ###数据集更改过了！！！！！！！！！！！！！！！！！！！

# # dshow = [0, 0, 1, 0, 0, 1] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 1] \
# #       + [0, 0, 1, 0, 0, 1] + [0, 0, 0, 0, 0, 0] + [0, 0, 0, 1]
# # dpercent = [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1] \
# #          + [9, 9, 9, 9, 9, 9] + [9, 9, 9, 9, 9, 9] + [9, 9, 9, 9]

# dn = ' '.join(map(str, dname))
# dp = ' '.join(map(str, dpercent))
# dmode = ' '.join(map(str, dmode))
# dtrain = ' '.join(map(str, dtrain))
# dshow = ' '.join(map(str, dshow))



# proj_name='phase_diagram_for_no34_43_4head_epoch_207_data_90w_warmup_normal_init_34_wrong_43_unseen_diff_ini'

# # proj_name='test'

# for seed in [1, 2, 3]:
#        for L in [2, 3, 4, 5, 6, 7]:
#        # for L in [2,4,6, 8]:
#        # for L in [3,5,7]:
#        # 
#        # for L in [2,7]:
#        # for L in [3,6]:
#        # for L in [4,5]:
#        # L = 3
#               suffix = f'{L}L1H_seed{seed}'
#               # 正常训练
#               os.system(f'CUDA_VISIBLE_DEVICES={gpu_id} python3 -m main -data_size {data_size} -seed {seed} -func {target} -lr {lr} -m {model}\
#                             -scheduler {scheduler} -ne 210 -nl {L} -nh 4 -bs {batch_size} -dir_suffix {dir_suffix} -pname {proj_name} -dk 64 -dv 64\
#                             -dmode {dmode} -dp {dp} -dn {dn} -dtrain {dtrain} -dshow {dshow} -suffix {suffix}\
#                             -ple 1 -pae 3 -plae 3 -sme 20 -sr {std_rate} \
#                             --optim_T_max 200 --optim_eta_min 1e-5 --optim_multiplier {optim_multiplier} --optim_total_epoch 10')

