# Hedge with fixed learning rate η
# The best tuning for η given range σ, cardinality K and horizon T is
# η = sqrt(2*log(K)/T)/σ for a regret bound of σ*sqrt(2T*log(K))
mutable struct Hedge_η
    L :: Array{Float64, 1}
    η :: Float64

    function Hedge_η(K, η)
        new(zeros(K), η)
    end
end

function act(h::Hedge_η, m=0)
    u = exp.(-h.η.*(h.L .+ m .- minimum(h.L .+ m)))
    u ./ sum(u)
end

function incur!(h::Hedge_η, ∇, m=0)
    h.L .+= ∇
end



# Hedge with learning rate of order 1/√t
# The constant factor is tuned (in terms of the range σ and K)
# to get the best leading factor in the regret bound, namely
# ∀T: Regret_T ≤ 2σ*sqrt(T*log(K))  (note: √2 times the bound for fixed T)

mutable struct Hedge
    L :: Array{Float64, 1}
    t :: Int64
    σ :: Float64 # maximum range

    function Hedge(K, σ)
        new(zeros(K), 0, σ)
    end
end

function act(h::Hedge, m=0)
    K = length(h.L)
    η = sqrt(log(K)/(h.t+1))/h.σ
    u = exp.(-η.*(h.L .+ m .- minimum(h.L .+ m)))
    u ./ sum(u)
end

function incur!(h::Hedge, ∇, m=0)
    @assert all(abs.(∇) .≤ h.σ)
    h.L .+= ∇
    h.t += 1
end
