function [Qstar,r,P] = Q_generation(S,A,d,gamma)
    
    % r generation
    rlow = (randi(100,[S,A])-10)/100 + 20*rand([S,A]).*(rand([S,A])>0.99);
    Sigmar = eye([d,d]);
    [Utmp, ~, Vtmp] = svds(rlow,d);
    Ur = Utmp(:,1:d);
    Vr = Vtmp(:,1:d);
    r = Ur*Sigmar*Vr';

    % Low rank transitions
    Pddd = zeros([d,d,d]);
    Pdss = zeros([d,S]);
    Psds = zeros([S,d]);
    Pdaa = zeros([d,A]);
    
    for i = 1:d
        for j = 1:d
            tmp = randi(15,[1,d]).*(rand([1,d])>0.5);
            tmp(i) = tmp(i) + 15;
            Pddd(i,j,:) = tmp/sum(tmp);
        end
    end
    
    for i = 1:S
        Pdss(:,i) = randi(15,[1,d]).*(rand([1,d])>0.2);
        ind = ceil(i/S*d);
        Pdss(ind,i) = Pdss(ind,i) + d*10;
        Pdss(:,i) = Pdss(:,i)/sum(Pdss(:,i));
    end
    
    for i = 1:d
        Psds(:,i) = randi(25,[1,S]).*(rand([1,S])>0.3);
        ind = ceil(i/d*S);
        Psds(ind,i) = Psds(ind,i) + d*25;
        Psds(:,i) = Psds(:,i)/sum(Psds(:,i));
    end
    
    for i = 1:A
        Pdaa(:,i) = randi(15,[1,d]).*(rand([1,d])>0.4);
        ind = ceil(i/A*d);
        Pdaa(ind,i) = Pdaa(ind,i) + d*10;
    
        Pdaa(:,i) = Pdaa(:,i)/sum(Pdaa(:,i));
    end
    
    P = zeros([S,A,S]);
    
    for a = 1:A
        for sp = 1:S
            for i = 1:d
                for j = 1:d
                    for ip = 1:d
                        P(:,a,sp) = P(:,a,sp) + Pdss(i,:)'*Pdaa(j,a)*Psds(sp,ip)*Pddd(i,j,ip);
                    end
                end
            end
        end
    end
    
    V = zeros(S);
    Qprev = zeros([S,A]);
    tol = 1e-6;
    error = inf;
    t = 0;
    
    while(error > tol)
        t = t+1;
        Qstar = r;
        for i = 1:S
            Qstar = Qstar + gamma*P(:,:,i)*V(i);
        end
        V = max(Qstar')';
        error(t) = max(abs(Qprev(:) - Qstar(:)));
        Qprev = Qstar;
    end
end

