import os
import sys
import yaml
import tiktoken

LITELLM_API_KEY = "easy2hard"

LITELLM_API_BASE = "http://0.0.0.0:4000"

TEST_MODEL_NAMES = [
    "gpt-35",
    "claude-haiku",
    "gemini-1.0",
    "llama3-8b",
    "qwen-7b",
    "mistral-7b",
]

PRODUCTION_MODEL_NAMES = [
    "gpt-4",
    "claude-opus",
    "gemini-1.5",
    "llama3-70b",
    "qwen-110b",
    "mixtral-8x22b",
]

DEFALUT_MODEL_NAMES = TEST_MODEL_NAMES

MODEL_NAME_DICT = {
    "gpt-35": "azure/gpt-35-turbo-1106",
    "claude-haiku": "claude-3-haiku-20240307",
    "gemini-1.0": "gemini/gemini-1.0-pro",
    "llama3-8b": "ollama_chat/llama3:instruct",
    "qwen-7b": "ollama_chat/qwen:7b-chat",
    "mistral-7b": "ollama_chat/mistral:7b-instruct",
    "gpt-4": "azure/gpt-4-0409",
    "claude-opus": "claude-3-opus-20240229",
    "gemini-1.5": "gemini/gemini-1.5-pro-latest",
    "llama3-70b": "ollama_chat/llama3:70b-instruct-q5_K_M",
    "qwen-110b": "ollama_chat/qwen:110b-chat-v1.5-q5_K_M",
    "mixtral-8x22b": "ollama_chat/mixtral:8x22b-instruct-v0.1-q5_K_M",
}


VPS_CONFIGS = yaml.safe_load(
    open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "vps.yaml"))
)


MAX_CONCURRENT_CALLS = 32

OPENAI_TOKENIZER = tiktoken.get_encoding("cl100k_base")


GEMINI_SAFTY_SETTING = [
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    },
]

OLLAMA_RPM_TPM_DICT = {
    "llama3-8b": (80, 40000),
    "qwen-7b": (80, 40000),
    "mistral-7b": (80, 40000),
    "llama3-70b": (8, 4000),
    "qwen-110b": (8, 4000),
    "mixtral-8x22b": (8, 4000),
}


OLLAMA_PULL_TIMNEOUT = 120

OLLAMA_MAX_RETRY = 5

TEST_PROMPT = "Hello, how are you?"

TEST_NUM_CALLS = 32
