# set -x
function killp(){
    pid=$(lsof -ti :$1)
    if [ -n "$pid" ]; then
        echo "Found process with PID $pid on port $1. Killing process..."
        kill $pid
        echo "Process killed."
    else
        echo "No process found on port $1."
    fi
    sleep 5
}
function tmux_send(){
    tmux_name=$1
    comm=$2
    result=$(tmux ls | grep $tmux_name)
    if [ "$result" != "" ] && [ $? -eq 0 ]; then
        echo "there is already a tmux named $tmux_name"
        tmux send-keys -t $tmux_name "conda activate llora" C-m
        tmux send-keys -t $tmux_name "CUDA_HOME=/usr/local/cuda-11.8" C-m
        tmux send-keys -t $tmux_name "cd /xx/analysis" C-m
        tmux send-keys -t $tmux_name "$comm" C-m
    else
        echo "new a tmux named $tmux_name"
        tmux new -d -s $tmux_name
        tmux send-keys -t $tmux_name "conda activate llora" C-m
        tmux send-keys -t $tmux_name "CUDA_HOME=/usr/local/cuda-11.8" C-m
        tmux send-keys -t $tmux_name "cd /xx/analysis" C-m
        tmux send-keys -t $tmux_name "$comm" C-m
    fi
}

function single_tiny_expert(){
    killp 11454
    killp 11455
    base_large_model="meta-llama/Llama-2-13b-hf"
    base_small_model="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T" #"meta-llama/Llama-2-7b-hf"
    systemp=1
    if [ "$1" = "gsm" ]; then
        expert_model="/xx/models/tinyllama2-gsm-7b"
    elif [ "$1" = "truthfulqa" ]; then
        expert_model="habanoz/TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-4epochs-oasst1-top1-instruct-V1"
        systemp=0
    elif [ "$1" = "mmlu" ]; then
        expert_model="/data/llama-1B-tmp"
    elif [ "$1" = "cnn" ]; then
        expert_model="/xx/models/tinyllama2-cnn-7b"
    elif [ "$1" = "triviaqa" ]; then    
        expert_model="/xx/models/tinyllama2-triviaqa-7b"
    fi
    # expert_model="/data/llama-1B-tmp" #"/xx/models/tinyllama2-cnn-7b" #"/data/llama-1B-tmp" #"habanoz/TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-4epochs-oasst1-top1-instruct-V1" #"/xx/models/tinyllama2-gsm-7b" #"/xx/models/tinyllama2-triviaqa-7b" #"/xx/models/tinyllama2-cnn-7b" #"/data/llama-1B-tmp" #"/xx/models/tinyllama2-cnn-7b" #"xxx/llama-1.1B-fft" #"/xx/models/tinyllama2-triviaqa-7b" #"habanoz/TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-4epochs-oasst1-top1-instruct-V1" #"/xx/models/tinyllama2-gsm-7b"  #"/xx/models/llama2-cnn-7b" #"/xx/models/llama2-triviaqa-7b" #"codellama/CodeLlama-7b-Python-hf" #"/xx/alt/models/llama2-gsm-7b" #"meta-llama/Llama-2-7b-chat-hf" #"/data/.cache/huggingface/hub/models--xxx--llama2_7b_mmlu/snapshots/28baabb52dee6492484304339e5a154cee12ee41" #"/xx/models/llama2-mmlu-7b" #"xxx/llama2_7b_mmlu" #"meta-llama/Llama-2-7b-chat-hf" #"/xx/models/llama2-math-7b" #"/xx/alt/models/llama2-gsm-7b"
    model_list="[$base_large_model,$base_small_model,$expert_model]"
    max_token=$4 #512
    max_seqs=$5
    temperature=0.05  #code 0.9 other 0.05
    topp=1.0 #code 0.95 other 1.0
    #--system-prompt-type 0 
    icl=0
    upa=$3
    downa=$2
    # tmux_send "socket" "CUDA_VISIBLE_DEVICES=0 python gather_socket.py --model-str-list $model_list --alpha 1.0  --upa $upa --downa $downa"
    tmux_send "2" "echo tiny $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $base_large_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature $temperature --top-p $topp --system-prompt-type $systemp"
    tmux_send "3" "echo tiny $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $base_small_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature $temperature --top-p $topp --system-prompt-type $systemp"
    tmux_send "4" "echo tiny $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $expert_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature $temperature --top-p $topp --system-prompt-type $systemp"
}

function single_expert(){
    killp 11454
    killp 11455
    base_large_model="meta-llama/Llama-2-13b-hf"
    base_small_model="meta-llama/Llama-2-7b-hf"
    if [ "$1" = "gsm" ]; then
        expert_model="/xx/alt/models/llama2-gsm-7b"
    elif [ "$1" = "truthfulqa" ]; then
        expert_model="meta-llama/Llama-2-7b-chat-hf"
    elif [ "$1" = "mmlu" ]; then
        expert_model="/data/.cache/huggingface/hub/models--xxx--llama2_7b_mmlu/snapshots/28baabb52dee6492484304339e5a154cee12ee41"
    elif [ "$1" = "cnn" ]; then
        expert_model="/xx/models/llama2-cnn3-7b"
    elif [ "$1" = "triviaqa" ]; then    
        expert_model="/xx/models/llama2-triviaqa-7b"
    fi
    # expert_model="/xx/models/llama2-triviaqa-7b"
    # expert_model= #"/xx/models/llama2-triviaqa-7b" #"meta-llama/Llama-2-7b-chat-hf" #"/xx/alt/models/llama2-gsm-7b" #"/xx/models/llama2-cnn3-7b" #"/data/.cache/huggingface/hub/models--xxx--llama2_7b_mmlu/snapshots/28baabb52dee6492484304339e5a154cee12ee41" #"meta-llama/Llama-2-7b-chat-hf" #"/xx/models/llama2-triviaqa-7b" #"/xx/models/llama2-mmlu2-7b" #"/data/.cache/huggingface/hub/models--xxx--llama2_7b_mmlu/snapshots/28baabb52dee6492484304339e5a154cee12ee41" #"/xx/models/llama2-cnn3-7b/" #"/xx/models/llama2-triviaqa-7b" #"codellama/CodeLlama-7b-Python-hf" #"/xx/alt/models/llama2-gsm-7b" #"meta-llama/Llama-2-7b-chat-hf" #"/data/.cache/huggingface/hub/models--xxx--llama2_7b_mmlu/snapshots/28baabb52dee6492484304339e5a154cee12ee41" #"/xx/models/llama2-mmlu-7b" #"xxx/llama2_7b_mmlu" #"meta-llama/Llama-2-7b-chat-hf" #"/xx/models/llama2-math-7b" #"/xx/alt/models/llama2-gsm-7b"
    model_list="[$base_large_model,$base_small_model,$expert_model]"
    max_token=$4 #512
    max_seqs=$5
    temperature=0.05  #code 0.9 other 0.05
    topp=1.0 #code 0.95 other 1.0
    icl=0
    upa=$3
    downa=$2
    tmux_send "socket" "CUDA_VISIBLE_DEVICES=0 python gather_socket.py --model-str-list $model_list --alpha 1.0  --upa $upa --downa $downa"
    tmux_send "2" "echo $1 $2 $3 $4 $5 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $base_large_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature $temperature --top-p $topp --icl $icl"
    tmux_send "3" "echo $1 $2 $3 $4 $5 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $base_small_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature $temperature --top-p $topp --icl $icl"
    tmux_send "4" "echo $1 $2 $3 $4 $5 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $expert_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature $temperature --top-p $topp --icl $icl"
}

function two_expert(){
    killp 11454
    killp 11455
    base_large_model="meta-llama/Llama-2-13b-hf"
    base_small_model="meta-llama/Llama-2-7b-hf"
    expert_model1="/xx/alt/models/llama2-gsm-7b" #"/data/outs/logits-tuning/llama_merged_ab_2024-05-15" #
    expert_model2="/xx/models/llama2-triviaqa-7b" #"meta-llama/Llama-2-7b-chat-hf" #"/xx/models/llama2-triviaqa-7b" #"meta-llama/Llama-2-7b-chat-hf" #"/data/outs/logits-tuning/llama_merged_cd_2024-05-15" #"/xx/models/llama2-cnn3-7b" #"/data/.cache/huggingface/hub/models--xxx--llama2_7b_mmlu/snapshots/28baabb52dee6492484304339e5a154cee12ee41" #"/xx/models/llama2-mmlu-7b"
    model_list="[$base_large_model,$base_small_model,$expert_model1,$expert_model2]"
    max_token=$2
    max_seqs=$3
    icl=0
    tmux_send "socket" "CUDA_VISIBLE_DEVICES=0 python gather_socket_two.py --model-str-list $model_list --alpha 1.0"
    tmux_send "1" "echo 7b $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $base_large_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "2" "echo 7b $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $base_small_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "3" "echo 7b $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $expert_model1 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "4" "echo 7b $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $expert_model2 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
}

function three_expert(){
    killp 11454
    killp 11455
    base_large_model="meta-llama/Llama-2-13b-hf"
    base_small_model="meta-llama/Llama-2-7b-hf"
    expert_model1="/xx/alt/models/llama2-gsm-7b"
    expert_model2="meta-llama/Llama-2-7b-chat-hf"
    expert_model3="/data/.cache/huggingface/hub/models--xxx--llama2_7b_mmlu/snapshots/28baabb52dee6492484304339e5a154cee12ee41" #"/xx/models/llama2-mmlu-7b"
    model_list="[$base_large_model,$base_small_model,$expert_model1,$expert_model2,$expert_model3]"
    max_token=40
    max_seqs=130
    tmux_send "socket" "CUDA_VISIBLE_DEVICES=0 python gather_socket_three.py --model-str-list $model_list --alpha 1.0"
    tmux_send "1" "CUDA_VISIBLE_DEVICES=0 python mmlu_eval_multi.py --model-name $base_large_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "2" "CUDA_VISIBLE_DEVICES=0 python mmlu_eval_multi.py --model-name $base_small_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "3" "CUDA_VISIBLE_DEVICES=0 python mmlu_eval_multi.py --model-name $expert_model1 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "4" "CUDA_VISIBLE_DEVICES=0 python mmlu_eval_multi.py --model-name $expert_model2 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "5" "CUDA_VISIBLE_DEVICES=0 python mmlu_eval_multi.py --model-name $expert_model3 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
}

function three_expert_gsm(){
    killp 11454
    killp 11455
    base_large_model="meta-llama/Llama-2-13b-hf"
    base_small_model="meta-llama/Llama-2-7b-hf"
    expert_model1="/xx/alt/models/llama2-gsm-7b"
    expert_model2="meta-llama/Llama-2-7b-chat-hf"
    expert_model3="/data/.cache/huggingface/hub/models--xxx--llama2_7b_mmlu/snapshots/28baabb52dee6492484304339e5a154cee12ee41" #"/xx/models/llama2-mmlu-7b"
    model_list="[$base_large_model,$base_small_model,$expert_model1,$expert_model2,$expert_model3]"
    max_token=256
    max_seqs=130
    tmux_send "socket" "CUDA_VISIBLE_DEVICES=0 python gather_socket_three.py --model-str-list $model_list --alpha 1.0"
    tmux_send "1" "CUDA_VISIBLE_DEVICES=0 python gsm_eval_multi.py --model-name $base_large_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "2" "CUDA_VISIBLE_DEVICES=0 python gsm_eval_multi.py --model-name $base_small_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "3" "CUDA_VISIBLE_DEVICES=0 python gsm_eval_multi.py --model-name $expert_model1 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "4" "CUDA_VISIBLE_DEVICES=0 python gsm_eval_multi.py --model-name $expert_model2 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "5" "CUDA_VISIBLE_DEVICES=0 python gsm_eval_multi.py --model-name $expert_model3 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
}


function three_expert_truthfulqa(){
    killp 11454
    killp 11455
    base_large_model="meta-llama/Llama-2-13b-hf"
    base_small_model="meta-llama/Llama-2-7b-hf"
    expert_model1="/xx/alt/models/llama2-gsm-7b"
    expert_model2="meta-llama/Llama-2-7b-chat-hf"
    expert_model3="/data/.cache/huggingface/hub/models--xxx--llama2_7b_mmlu/snapshots/28baabb52dee6492484304339e5a154cee12ee41" #"/xx/models/llama2-mmlu-7b"
    model_list="[$base_large_model,$base_small_model,$expert_model1,$expert_model2,$expert_model3]"
    max_token=40
    max_seqs=130
    tmux_send "socket" "CUDA_VISIBLE_DEVICES=0 python gather_socket_three.py --model-str-list $model_list --alpha 1.0"
    tmux_send "1" "CUDA_VISIBLE_DEVICES=0 python truthfulqa_eval_multi.py --model-name $base_large_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05"
    tmux_send "2" "CUDA_VISIBLE_DEVICES=0 python truthfulqa_eval_multi.py --model-name $base_small_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05"
    tmux_send "3" "CUDA_VISIBLE_DEVICES=0 python truthfulqa_eval_multi.py --model-name $expert_model1 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05"
    tmux_send "4" "CUDA_VISIBLE_DEVICES=0 python truthfulqa_eval_multi.py --model-name $expert_model2 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05"
    tmux_send "5" "CUDA_VISIBLE_DEVICES=0 python truthfulqa_eval_multi.py --model-name $expert_model3 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05"
}


function three_expert_triviaqa(){
    killp 11454
    killp 11455
    base_large_model="meta-llama/Llama-2-13b-hf"
    base_small_model="meta-llama/Llama-2-7b-hf" #TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
    expert_model1="/xx/alt/models/llama2-gsm-7b"
    expert_model2="meta-llama/Llama-2-7b-chat-hf"
    expert_model3="squaresnapper/llama-2-7b-cnn-dailymail" #"/xx/models/llama2-triviaqa-7b" #"/xx/models/llama2-mmlu-7b"
    model_list="[$base_large_model,$base_small_model,$expert_model1,$expert_model2,$expert_model3]"
    max_token=10
    max_seqs=50
    tmux_send "socket" "CUDA_VISIBLE_DEVICES=0 python gather_socket_three.py --model-str-list $model_list --alpha 1.0"
    tmux_send "1" "CUDA_VISIBLE_DEVICES=0 python truthfulqa_eval_multi.py --model-name $base_large_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "2" "CUDA_VISIBLE_DEVICES=0 python truthfulqa_eval_multi.py --model-name $base_small_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "3" "CUDA_VISIBLE_DEVICES=0 python truthfulqa_eval_multi.py --model-name $expert_model1 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "4" "CUDA_VISIBLE_DEVICES=0 python truthfulqa_eval_multi.py --model-name $expert_model2 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "5" "CUDA_VISIBLE_DEVICES=0 python truthfulqa_eval_multi.py --model-name $expert_model3 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
}

function two_tiny_expert_tru(){
    killp 11454
    killp 11455
    base_large_model="meta-llama/Llama-2-13b-hf"
    base_small_model="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
    expert_model1="/xx/models/tinyllama2-gsm-7b"
    expert_model2="habanoz/TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-4epochs-oasst1-top1-instruct-V1" #"/xx/models/tinyllama2-cnn-7b"
    # expert_model3="squaresnapper/llama-2-7b-cnn-dailymail" #"/xx/models/llama2-triviaqa-7b" #"/xx/models/llama2-mmlu-7b"
    model_list="[$base_large_model,$base_small_model,$expert_model1,$expert_model2]"
    max_token=$2
    max_seqs=$3
    tmux_send "socket" "CUDA_VISIBLE_DEVICES=0 python gather_socket_two.py --model-str-list $model_list --alpha 1.0"
    tmux_send "5" "echo tinytru $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $base_large_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0 --system-prompt-type 0"
    tmux_send "2" "echo tinytru $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $base_small_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0 --system-prompt-type 0"
    tmux_send "3" "echo tinytru $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $expert_model1 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0 --system-prompt-type 0"
    tmux_send "4" "echo tinytru $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $expert_model2 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0 --system-prompt-type 0"
    # tmux_send "5" "CUDA_VISIBLE_DEVICES=0 python cnn_eval_multi.py --model-name $expert_model3 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
}

function two_tiny_expert_cnn(){
    killp 11454
    killp 11455
    base_large_model="meta-llama/Llama-2-13b-hf"
    base_small_model="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
    expert_model1="/xx/models/tinyllama2-gsm-7b"
    expert_model2="/xx/models/tinyllama2-cnn-7b" #"habanoz/TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-4epochs-oasst1-top1-instruct-V1" #"/xx/models/tinyllama2-cnn-7b"
    # expert_model3="squaresnapper/llama-2-7b-cnn-dailymail" #"/xx/models/llama2-triviaqa-7b" #"/xx/models/llama2-mmlu-7b"
    model_list="[$base_large_model,$base_small_model,$expert_model1,$expert_model2]"
    max_token=$2
    max_seqs=$3
    tmux_send "socket" "CUDA_VISIBLE_DEVICES=0 python gather_socket_two.py --model-str-list $model_list --alpha 1.0"
    tmux_send "6" "echo tinycnn $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $base_large_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0 --system-prompt-type 0"
    tmux_send "2" "echo tinycnn $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $base_small_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0 --system-prompt-type 0"
    tmux_send "3" "echo tinycnn $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $expert_model1 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0 --system-prompt-type 0"
    tmux_send "4" "echo tinycnn $1 & CUDA_VISIBLE_DEVICES=0 python $1_eval_multi.py --model-name $expert_model2 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0 --system-prompt-type 0"
    # tmux_send "5" "CUDA_VISIBLE_DEVICES=0 python cnn_eval_multi.py --model-name $expert_model3 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
}


function four_expert_gsm(){
    killp 11454
    killp 11455
    base_large_model="meta-llama/Llama-2-13b-hf"
    base_small_model="meta-llama/Llama-2-7b-hf"
    expert_model1="/xx/alt/models/llama2-gsm-7b"
    expert_model2="meta-llama/Llama-2-7b-chat-hf"
    expert_model3="/xx/models/llama2-triviaqa-7b" #"/data/.cache/huggingface/hub/models--xxx--llama2_7b_mmlu/snapshots/28baabb52dee6492484304339e5a154cee12ee41" #"/xx/models/llama2-mmlu-7b"
    expert_model4="/xx/models/llama2-cnn3-7b"
    model_list="[$base_large_model,$base_small_model,$expert_model1,$expert_model2,$expert_model3,$expert_model4]"
    max_token=40
    max_seqs=100
    tmux_send "socket" "CUDA_VISIBLE_DEVICES=0 python gather_socket_four.py --model-str-list $model_list --alpha 1.0 --fuse-type $1"
    tmux_send "1" "echo $1 & CUDA_VISIBLE_DEVICES=0 python mmlu_eval_multi.py --model-name $base_large_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "2" "echo $1 & CUDA_VISIBLE_DEVICES=0 python mmlu_eval_multi.py --model-name $base_small_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "3" "echo $1 & CUDA_VISIBLE_DEVICES=0 python mmlu_eval_multi.py --model-name $expert_model1 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "4" "echo $1 & CUDA_VISIBLE_DEVICES=0 python mmlu_eval_multi.py --model-name $expert_model2 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "5" "echo $1 & CUDA_VISIBLE_DEVICES=0 python mmlu_eval_multi.py --model-name $expert_model3 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "6" "echo $1 & CUDA_VISIBLE_DEVICES=0 python mmlu_eval_multi.py --model-name $expert_model4 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
}
function tiny_four_expert_gsm(){
    killp 11454
    killp 11455
    base_large_model="meta-llama/Llama-2-13b-hf"
    base_small_model="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
    expert_model1="/xx/models/tinyllama2-gsm-7b"
    expert_model2="habanoz/TinyLlama-1.1B-intermediate-step-715k-1.5T-lr-5-4epochs-oasst1-top1-instruct-V1"
    expert_model3="/xx/models/tinyllama2-triviaqa-7b" #"/data/.cache/huggingface/hub/models--xxx--llama2_7b_mmlu/snapshots/28baabb52dee6492484304339e5a154cee12ee41" #"/xx/models/llama2-mmlu-7b"
    expert_model4="/xx/models/tinyllama2-cnn-7b"
    model_list="[$base_large_model,$base_small_model,$expert_model1,$expert_model2,$expert_model3,$expert_model4]"
    max_token=256
    max_seqs=50
    tmux_send "socket" "CUDA_VISIBLE_DEVICES=0 python gather_socket_four.py --model-str-list $model_list --alpha 1.0 --fuse-type $1"
    tmux_send "1" "echo $1 & CUDA_VISIBLE_DEVICES=0 python cnn_eval_multi.py --model-name $base_large_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "2" "echo $1 & CUDA_VISIBLE_DEVICES=0 python cnn_eval_multi.py --model-name $base_small_model --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "3" "echo $1 & CUDA_VISIBLE_DEVICES=0 python cnn_eval_multi.py --model-name $expert_model1 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "4" "echo $1 & CUDA_VISIBLE_DEVICES=0 python cnn_eval_multi.py --model-name $expert_model2 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "5" "echo $1 & CUDA_VISIBLE_DEVICES=0 python cnn_eval_multi.py --model-name $expert_model3 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
    tmux_send "6" "echo $1 & CUDA_VISIBLE_DEVICES=0 python cnn_eval_multi.py --model-name $expert_model4 --tensor-parallel-size 1 --max-num-seqs $max_seqs --max-tokens $max_token --batch-size 2048 --temperature 0.05 --top-p 1.0"
}
