#!/bin/bash
export CUDA_VISIBLE_DEVICES='4,5,6,7'
export NUM_GPUS=4
export MASTER_ADDR='localhost'
export WORLD_SIZE=${NUM_GPUS}
export MASTER_PORT=6012
prefix='LoT_ViT_CIFAR'
teacher_network='ViT-L_16'
teacher_pretrain='../model/ViT-L_16.npz'
student_network='ViT-L_16'
student_pretrain='../model/ViT-L_16.npz'
dataset='cifar100'
shuffle=1
lr=0.02
alpha=1
seed=1
per_gpu_batch_size=128
gradient_accumulation_steps=8
num_steps=5000
warmup_steps=200
experiment_name=${teacher_network}_${student_network}_${dataset}_lr${lr}_numsteps${num_steps}_warmupsteps${warmup_steps}_numgpus${NUM_GPUS}_port${MASTER_PORT}_aplha${alpha}_per_gpu_batch_size${per_gpu_batch_size}_gradient_accumulation_steps${gradient_accumulation_steps}_seed${seed}
log_folder_name=logs/${prefix}_${dataset}
if [ ! -d ${log_folder_name} ]; then
    mkdir -p ${log_folder_name}
fi
ckpt_folder_name=ckpt/${prefix}_${dataset}
if [ ! -d ${ckpt_folder_name} ]; then
    mkdir -p ${ckpt_folder_name}
fi
save=${ckpt_folder_name}/${experiment_name}

log_filename=${log_folder_name}/${experiment_name}.log
nohup torchrun --nproc_per_node ${NUM_GPUS} --nnodes 1 --node_rank 0 --master_addr ${MASTER_ADDR} --master_port ${MASTER_PORT} vit/train_lot.py \
    --exp_name ${experiment_name} \
    --teacher_network ${teacher_network} \
    --teacher_pretrain ${teacher_pretrain} \
    --student_network "${student_network}" \
    --student_pretrain ${student_pretrain} \
    --dataset ${dataset} \
    --seed ${seed} \
    --learning_rate ${lr} \
    --alpha ${alpha} \
    --save ${save} \
    --num_steps ${num_steps} \
    --warmup_steps ${warmup_steps} \
    --train_batch_size ${per_gpu_batch_size} \
    --gradient_accumulation_steps ${gradient_accumulation_steps} \
    --shuffle ${shuffle} \
> ${log_filename} 2>&1 &
