import numpy as np
from abc import ABC, abstractmethod

from comab.environment import greedy_allocation


class CoMABAlgo(ABC):
    def __init__(self, K, N, p, **kwargs):
        self._K = K  # number of arms (auctions)
        self._N = N  # number of players
        self._p = p  # number of competitors per arm
        self._P = max(self._p)
        self.n = np.zeros(self._K, dtype=int)  # the next arm to play

    @abstractmethod
    def update(self, arms_with_observation, observed_gains, observed_costs, t):
        pass


class IndependentRandomExploration(CoMABAlgo):
    def __init__(self, K, N, p, **kwargs):
        super().__init__(K, N, p, **kwargs)

    def update(self, arms_with_observation, observed_gains, observed_costs, t):
        k = np.random.randint(0, self._K)
        self.n *= 0
        self.n[k] = np.random.randint(1, self._N)


class Greedy(CoMABAlgo):
    def __init__(self, K, N, p, reward_estimator, **kwargs):
        super().__init__(K, N, p, **kwargs)
        self._reward_estimator = reward_estimator

    def update(self, arms_with_observation, observed_gains, observed_costs, t):
        self._reward_estimator.update_estimator(self.n, arms_with_observation, observed_gains, observed_costs, t)
        _r_hat = self._reward_estimator.r_hat()
        self.n = greedy_allocation(_r_hat, self._p, self._N)
