from abc import ABC, abstractmethod

import numpy as np


class AbstractAgent(ABC):

    def __init__(self, env, name='Unknown'):
        self.env = env
        self.name = name
        self.reset()

    def reset(self):
        self.t = 0
        self.T = 0
        self.observations = np.zeros(0)
        self.weak_regret = np.zeros(0)
        self.strong_regret = np.zeros(0)
        self.actions = np.zeros(0).reshape((2, 0))
        self.reset_learning()

    @property
    def cumulative_weak_regret(self):
        return np.cumsum(self.weak_regret)

    @property
    def cumulative_strong_regret(self):
        return np.cumsum(self.strong_regret)

    def play(self, steps=0, T=0, reset=True):
        if reset:
            self.reset()
        self.T += steps
        self.T = max(self.T, T)

        while self.t < self.T:
            action = self.sample_action()
            observation = self.env.action(action)
            self.learn(action, observation)
            self.actions = np.append(
                self.actions, [[action[0]], [action[1]]], axis=1)
            self.observations = np.append(self.observations, observation)
            self.weak_regret = np.append(
                self.weak_regret, self.env.weak_regret(action))
            self.strong_regret = np.append(
                self.strong_regret, self.env.strong_regret(action))
            self.t += 1

    def print(self):
        print('actions:', self.actions)
        print('observations:', self.observations)
        print('weak_regret:', self.weak_regret)
        print('cumulative_weak_regret:', self.cumulative_weak_regret)
        print('strong_regret', self.strong_regret)
        print('cumulative_strong_regret:', self.cumulative_strong_regret)

    @abstractmethod
    def sample_action(self):
        pass

    @abstractmethod
    def learn(self, action, observation):
        pass

    @abstractmethod
    def reset_learning(self):
        pass
