import numpy as np

def offline_mabs_ltc_ctl(data, K, epsilon, alpha, delta, c, k_, LTC=True):
    # Initialize
    N = np.zeros(K, dtype=int)  # Number of pulls for each arm
    empirical_means = np.zeros(K)  # Empirical mean rewards for each arm
    penalties = np.zeros(K)  # Penalties for each arm

    # Count the number of pulls for each arm
    for a in range(K):
        N[a] = len(data[a])

    # Process each arm
    for a in range(K):
        if N[a] < 3 * np.log(1 / delta) / alpha:
            empirical_means[a] = 0
            penalties[a] = 1
        else:
            empirical_means[a] = np.mean(data[a])
            gamma = ((1 / epsilon) * np.sqrt(np.log(2 * K / delta) / N[a])) ** (1 - 1/k_)
            if LTC:
                penalties[a] = c * (alpha / epsilon) ** (1 - 1 / k) + c * gamma
            else:
                penalties[a] = c * alpha ** (1 - 1 / k) + c * gamma

    # Calculate the final estimated rewards
    final_estimates = empirical_means - penalties

    # Return the arm with the maximum estimated reward
    best_arm = np.argmax(final_estimates)
    return best_arm, final_estimates


K = 10  
epsilon = 0.5 
alpha = 0.1
delta = 0.05
c = 1.0 
k = 2 
