Source code for Environment.MAB

# -*- coding: utf-8 -*-
""" :class:`MAB`, :class:`MarkovianMAB`, :class:`ChangingAtEachRepMAB`, :class:`IncreasingMAB`, :class:`PieceWiseStationaryMAB` and :class:`NonStationaryMAB` classes to wrap the arms of some Multi-Armed Bandit problems.

Such class has to have *at least* these methods:

- ``draw(armId, t)`` to draw *one* sample from that ``armId`` at time ``t``,
- and ``reprarms()`` to pretty print the arms (for titles of a plot),
- and more, see below.

.. warning:: FIXME it is still a work in progress, I need to add continuously varying environments. See https://github.com/SMPyBandits/SMPyBandits/issues/71
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.9"

import numpy as np
import matplotlib.pyplot as plt

try:
    from .pykov import Chain
except ImportError as e:
    try:
        from pykov import Chain
    except ImportError:
        print("Warning: 'pykov' module seems to not be available. But it is shipped with SMPyBandits. Weird.")
        print("Dou you want to try to install it from https://github.com/riccardoscalco/Pykov ?")
        print("Warning: the 'MarkovianMAB' class will not work...")

# Local imports
try:
    from .plotsettings import signature, wraptext, wraplatex, palette, makemarkers, legend, show_and_save
except ImportError:
    from plotsettings import signature, wraptext, wraplatex, palette, makemarkers, legend, show_and_save


[docs]class MAB(object):
    """ Basic Multi-Armed Bandit problem, for stochastic and i.i.d. arms.

    - configuration can be a dict with 'arm_type' and 'params' keys. 'arm_type' is a class from the Arms module, and 'params' is a dict, used as a list/tuple/iterable of named parameters given to 'arm_type'. Example::

        configuration = {
            'arm_type': Bernoulli,
            'params':   [0.1, 0.5, 0.9]
        }

        configuration = {  # for fixed variance Gaussian
            'arm_type': Gaussian,
            'params':   [0.1, 0.5, 0.9]
        }

    - But it can also accept a list of already created arms::

        configuration = [
            Bernoulli(0.1),
            Bernoulli(0.5),
            Bernoulli(0.9),
        ]

    - Both will create three Bernoulli arms, of parameters (means) 0.1, 0.5 and 0.9.
    """

[docs]    def __init__(self, configuration):
        """New MAB."""
        print("\n\nCreating a new MAB problem ...")  # DEBUG
        self.isChangingAtEachRepetition   = False  #: Flag to know if the problem is changing at each repetition or not.
        self.isDynamic   = False  #: Flag to know if the problem is static or not.
        self.isMarkovian = False  #: Flag to know if the problem is Markovian or not.
        self.arms = []  #: List of arms
        self._sparsity = None

        if isinstance(configuration, dict):
            print("  Reading arms of this MAB problem from a dictionnary 'configuration' = {} ...".format(configuration))  # DEBUG
            arm_type = configuration["arm_type"]
            print(" - with 'arm_type' =", arm_type)  # DEBUG
            params = configuration["params"]
            print(" - with 'params' =", params)  # DEBUG
            # Each 'param' could be one value (eg. 'mean' = probability for a Bernoulli) or a tuple (eg. '(mu, sigma)' for a Gaussian) or a dictionnary
            for param in params:
                self.arms.append(arm_type(*param) if isinstance(param, (dict, tuple, list)) else arm_type(param))
            # XXX try to read sparsity
            self._sparsity = configuration["sparsity"] if "sparsity" in configuration else None
        else:
            print("  Taking arms of this MAB problem from a list of arms 'configuration' = {} ...".format(configuration))  # DEBUG
            for arm in configuration:
                self.arms.append(arm)

        # Compute the means and stats
        print(" - with 'arms' =", self.arms)  # DEBUG
        self.means = np.array([arm.mean for arm in self.arms])  #: Means of arms
        print(" - with 'means' =", self.means)  # DEBUG
        self.nbArms = len(self.arms)  #: Number of arms
        print(" - with 'nbArms' =", self.nbArms)  # DEBUG
        if self._sparsity is not None:
            print(" - with 'sparsity' =", self._sparsity)  # DEBUG
        self.maxArm = np.max(self.means)  #: Max mean of arms
        print(" - with 'maxArm' =", self.maxArm)  # DEBUG
        self.minArm = np.min(self.means)  #: Min mean of arms
        print(" - with 'minArm' =", self.minArm)  # DEBUG
        # Print lower bound and HOI factor
        print("\nThis MAB problem has: \n - a [Lai & Robbins] complexity constant C(mu) = {:.3g} ... \n - a Optimal Arm Identification factor H_OI(mu) = {:.2%} ...".format(self.lowerbound(), self.hoifactor()))  # DEBUG
        print(" - with 'arms' represented as:", self.reprarms(1, latex=True))  # DEBUG

[docs]    def new_order_of_arm(self, arms):
        """ Feed a new order of the arms to the environment.

        - Updates :attr:`means` correctly.
        - Return the new position(s) of the best arm (to count and plot ``BestArmPulls`` correctly).

        .. warning:: This is a very limited support of non-stationary environment: only permutations of the arms are allowed, see :class:`NonStationaryMAB` for more.
        """
        assert sorted([arm.mean for arm in self.arms]) == sorted([arm.mean for arm in arms]), "Error: the new list of arms = {} does not have the same means as the previous ones."  # DEBUG
        assert set(self.arms) == set(arms), "Error: the new list of arms = {} does not have the same means as the previous ones."  # DEBUG
        self.arms = arms
        self.means = np.array([arm.mean for arm in self.arms])
        self.maxArm = np.max(self.means)
        self.minArm = np.min(self.means)
        return np.nonzero(np.isclose(self.means, self.maxArm))[0]

[docs]    def __repr__(self):
        return "{}(nbArms: {}, arms: {}, minArm: {:.3g}, maxArm: {:.3g})".format(self.__class__.__name__, self.nbArms, self.arms, self.minArm, self.maxArm)

[docs]    def reprarms(self, nbPlayers=None, openTag='', endTag='^*', latex=True):
        """ Return a str representation of the list of the arms (like `repr(self.arms)` but better)

        - If nbPlayers > 0, it surrounds the representation of the best arms by openTag, endTag (for plot titles, in a multi-player setting).

        - Example: openTag = '', endTag = '^*' for LaTeX tags to put a star exponent.
        - Example: openTag = '<red>', endTag = '</red>' for HTML-like tags.
        - Example: openTag = r'\textcolor{red}{', endTag = '}' for LaTeX tags.
        """
        if nbPlayers is None:
            text = repr(self.arms)
        else:
            assert nbPlayers >= 0, "Error, the 'nbPlayers' argument for reprarms method of a MAB object has to be a non-negative integer."  # DEBUG
            append_to_repr = ""

            means = self.means
            bestmean = np.max(means)
            bestArms = np.argsort(means)[-min(nbPlayers, self.nbArms):]
            repr_arms = [repr(arm) for arm in self.arms]

            # WARNING improve display for Gaussian arms that all have same variance
            if all("Gaussian" in str(type(arm)) for arm in self.arms) and len({arm.sigma for arm in self.arms}) == 1:
                sigma = self.arms[0].sigma
                repr_arms = [s.replace(', {:.3g}'.format(sigma), '') for s in repr_arms]
                append_to_repr = r", \sigma^2={:.3g}".format(sigma) if latex else ", sigma2={:.3g}".format(sigma)

            if nbPlayers == 0: bestArms = []
            text = '[{}]'.format(', '.join(
                openTag + repr_arms[armId] + endTag
                if (nbPlayers > 0 and (armId in bestArms or np.isclose(arm.mean, bestmean)))
                else repr_arms[armId]
                for armId, arm in enumerate(self.arms))
            )
            text += append_to_repr
        return wraplatex('$' + text + '$') if latex else wraptext(text)

    # --- Draw samples

[docs]    def draw(self, armId, t=1):
        """ Return a random sample from the armId-th arm, at time t. Usually t is not used."""
        return self.arms[armId].draw(t)

[docs]    def draw_nparray(self, armId, shape=(1,)):
        """ Return a numpy array of random sample from the armId-th arm, of a certain shape."""
        return self.arms[armId].draw_nparray(shape)

[docs]    def draw_each(self, t=1):
        """ Return a random sample from each arm, at time t. Usually t is not used."""
        return np.array([self.draw(armId, t) for armId in range(self.nbArms)])

[docs]    def draw_each_nparray(self, shape=(1,)):
        """ Return a numpy array of random sample from each arm, of a certain shape."""
        return np.array([self.draw_nparray(armId, shape) for armId in range(self.nbArms)])

    #
    # --- Helper to compute sets Mbest and Mworst

[docs]    def Mbest(self, M=1):
        """ Set of M best means."""
        sortedMeans = np.sort(self.means)
        return sortedMeans[-M:]

[docs]    def Mworst(self, M=1):
        """ Set of M worst means."""
        sortedMeans = np.sort(self.means)
        return sortedMeans[:-M]

[docs]    def sumBestMeans(self, M=1):
        """ Sum of the M best means."""
        return np.sum(self.Mbest(M=M))

    #
    # --- Helper to compute vector of min arms, max arms, all arms

[docs]    def get_minArm(self, horizon=None):
        """Return the vector of min mean of the arms.

        - It is a vector of length horizon.
        """
        return np.full(horizon, self.minArm)
        # return self.minArm  # XXX Nope, it's not a constant!

[docs]    def get_maxArm(self, horizon=None):
        """Return the vector of max mean of the arms.

        - It is a vector of length horizon.
        """
        return np.full(horizon, self.maxArm)
        # return self.maxArm  # XXX Nope, it's not a constant!

[docs]    def get_maxArms(self, M=1, horizon=None):
        """Return the vector of sum of the M-best means of the arms.

        - It is a vector of length horizon.
        """
        return np.full(horizon, self.sumBestMeans(M))

[docs]    def get_allMeans(self, horizon=None):
        """Return the vector of means of the arms.

        - It is a numpy array of shape (nbArms, horizon).
        """
        # allMeans = np.tile(self.means, (horizon, 1)).T
        allMeans = np.zeros((self.nbArms, horizon))
        for t in range(horizon):
            allMeans[:, t] = self.means
        return allMeans

    #
    # --- Estimate sparsity

    @property
    def sparsity(self):
        """ Estimate the sparsity of the problem, i.e., the number of arms with positive means."""
        if self._sparsity is not None:
            return self._sparsity
        else:
            return np.count_nonzero(self.means > 0)

[docs]    def str_sparsity(self):
        """ Empty string if ``sparsity = nbArms``, or a small string ', $s={}$' if the sparsity is strictly less than the number of arm."""
        s, K = self.sparsity, self.nbArms
        assert 0 <= s <= K, "Error: sparsity s = {} has to be 0 <= s <= K = {}...".format(s, K)
        # WARNING
        # disable this feature when not working on sparse simulations
        # return ""
        # or bring back this feature when working on sparse simulations
        return "" if s == K else ", $s={}$".format(s)

    #
    # --- Compute lower bounds

[docs]    def lowerbound(self):
        r""" Compute the constant :math:`C(\mu)`, for the [Lai & Robbins] lower-bound for this MAB problem (complexity), using functions from ``kullback.py`` or ``kullback.so`` (see :mod:`Arms.kullback`). """
        return sum(a.oneLR(self.maxArm, a.mean) for a in self.arms if a.mean != self.maxArm)

[docs]    def lowerbound_sparse(self, sparsity=None):
        """ Compute the constant :math:`C(\mu)`, for [Kwon et al, 2017] lower-bound for sparse bandits for this MAB problem (complexity)

        - I recomputed suboptimal solution to the optimization problem, and found the same as in [["Sparse Stochastic Bandits", by J. Kwon, V. Perchet & C. Vernade, COLT 2017](https://arxiv.org/abs/1706.01383)].
        """
        if hasattr(self, "sparsity") and sparsity is None:
            sparsity = self._sparsity
        if sparsity is None:
            sparsity = self.nbArms

        try:
            try:
                from Policies.OSSB import solve_optimization_problem__sparse_bandits
            except ImportError:  # WARNING ModuleNotFoundError is only Python 3.6+
                from SMPyBandits.Policies.OSSB import solve_optimization_problem__sparse_bandits
            ci = solve_optimization_problem__sparse_bandits(self.means, sparsity=sparsity, only_strong_or_weak=False)
            # now we use these ci to compute the lower-bound
            gaps = [self.maxArm - a.mean for a in self.arms]
            lowerbound = sum( delta * c for (delta, c) in zip(gaps, ci) )
        except (ImportError, ValueError, AssertionError):  # WARNING this is durty!
            lowerbound = np.nan
        return lowerbound

[docs]    def hoifactor(self):
        """ Compute the HOI factor H_OI(mu), the Optimal Arm Identification (OI) factor, for this MAB problem (complexity). Cf. (3.3) in Navikkumar MODI's thesis, "Machine Learning and Statistical Decision Making for Green Radio" (2017)."""
        return sum(a.oneHOI(self.maxArm, a.mean) for a in self.arms if a.mean != self.maxArm) / float(self.nbArms)

[docs]    def lowerbound_multiplayers(self, nbPlayers=1):
        """ Compute our multi-players lower bound for this MAB problem (complexity), using functions from :mod:`kullback`. """
        sortedMeans = sorted(self.means)
        assert nbPlayers <= len(sortedMeans), "Error: this lowerbound_multiplayers() for a MAB problem is only valid when there is less users than arms. Here M = {} > K = {} ...".format(nbPlayers, len(sortedMeans))  # DEBUG
        # FIXME it is highly suboptimal to have a lowerbound = 0 if nbPlayers == nbArms ! We have to finish the theoretical analysis!
        bestMeans = sortedMeans[-nbPlayers:]
        worstMeans = sortedMeans[:-nbPlayers]
        worstOfBestMean = bestMeans[0]

        # Our lower bound is this:
        oneLR = self.arms[0].oneLR
        centralized_lowerbound = sum(oneLR(worstOfBestMean, oneOfWorstMean) for oneOfWorstMean in worstMeans)
        print(" -  For {} players, Anandtharam et al. centralized lower-bound gave = {:.3g} ...".format(nbPlayers, centralized_lowerbound))  # DEBUG

        our_lowerbound = nbPlayers * centralized_lowerbound
        print(" -  For {} players, our lower bound gave = {:.3g} ...".format(nbPlayers, our_lowerbound))  # DEBUG

        # The initial lower bound in Theorem 6 from [Anandkumar et al., 2010]
        kl = self.arms[0].kl
        anandkumar_lowerbound = sum(sum((worstOfBestMean - oneOfWorstMean) / kl(oneOfWorstMean, oneOfBestMean) for oneOfWorstMean in worstMeans) for oneOfBestMean in bestMeans)
        print(" -  For {} players, the initial lower bound in Theorem 6 from [Anandkumar et al., 2010] gave = {:.3g} ...".format(nbPlayers, anandkumar_lowerbound))  # DEBUG

        # Check that our bound is better (ie bigger)
        if anandkumar_lowerbound > our_lowerbound:
            print("Error, our lower bound is worse than the one in Theorem 6 from [Anandkumar et al., 2010], but it should always be better...")
        return our_lowerbound, anandkumar_lowerbound, centralized_lowerbound

[docs]    def upperbound_collisions(self, nbPlayers, times):
        """ Compute Anandkumar et al. multi-players upper bound for this MAB problem (complexity), for UCB only. Warning: it is HIGHLY asymptotic! """
        sortedMeans = sorted(self.means)
        assert nbPlayers <= len(sortedMeans), "Error: this lowerbound_multiplayers() for a MAB problem is only valid when there is less users than arms. Here M = {} > K = {} ...".format(nbPlayers, len(sortedMeans))  # DEBUG
        bestMeans = sortedMeans[-nbPlayers:][::-1]

        def worstMeans_of_a(a):
            """ Give the worst min if their is a arms."""
            return sortedMeans[:-(a + 1)]

        # First, the bound in Lemma 2 from [Anandkumar et al., 2010] uses this Upsilon(U, U)
        Upsilon = binomialCoefficient(nbPlayers, 2 * nbPlayers - 1)
        print(" -  For {} players, Upsilon(M,M) = (2M-1 choose M) = {} ...".format(nbPlayers, Upsilon))

        # First, the constant term
        from math import pi
        boundOnExpectedTprime_cstTerm = nbPlayers * sum(
            sum(
                (1 + pi**2 / 3.)
                for (b, mu_star_b) in enumerate(worstMeans_of_a(a))
            )
            for (a, mu_star_a) in enumerate(bestMeans)
        )
        print(" -  For {} players, the bound with (1 + pi^2 / 3) = {:.3g} ...".format(nbPlayers, boundOnExpectedTprime_cstTerm))

        # And the term to multiply with log(t)
        boundOnExpectedTprime_logT = nbPlayers * sum(
            sum(
                8. / (mu_star_b - mu_star_a)**2
                for (b, mu_star_b) in enumerate(worstMeans_of_a(a))
            )
            for (a, mu_star_a) in enumerate(bestMeans)
        )
        print(" -  For {} players, the bound with (8 / (mu_b^* - mu_a^*)^2) = {:.3g} ...".format(nbPlayers, boundOnExpectedTprime_logT))

        # Add them up
        boundOnExpectedTprime = boundOnExpectedTprime_cstTerm + boundOnExpectedTprime_logT * np.log(2 + times)

        # The upper bound in Theorem 3 from [Anandkumar et al., 2010]
        upperbound = nbPlayers * (Upsilon + 1) * boundOnExpectedTprime
        print(" -  For {} players, Anandkumar et al. upper bound for the total cumulated number of collisions is {:.3g} here ...".format(nbPlayers, upperbound[-1]))  # DEBUG

        return upperbound

    # --- Plot methods

[docs]    def plotComparison_our_anandkumar(self, savefig=None):
        """Plot a comparison of our lowerbound and their lowerbound."""
        nbPlayers = self.nbArms
        lowerbounds = np.zeros((3, nbPlayers))
        for i in range(nbPlayers):
            lowerbounds[:, i] = self.lowerbound_multiplayers(i + 1)
        fig = plt.figure()
        X = np.arange(1, 1 + nbPlayers)
        plt.plot(X, lowerbounds[0, :], 'ro-', label="Besson & Kaufmann lowerbound")
        plt.plot(X, lowerbounds[1, :], 'bd-', label="Anandkumar et al. lowerbound")
        legend()
        plt.xlabel("Number $M$ of players in the multi-players game{}".format(signature))
        plt.ylabel("Lowerbound on the centralized cumulative normalized regret")
        plt.title("Comparison of our lowerbound and the one from [Anandkumar et al., 2010].\n{} arms: {}".format(self.nbArms, self.reprarms(0, latex=True)))
        show_and_save(showplot=True, savefig=savefig, fig=fig, pickleit=False)
        return fig

[docs]    def plotHistogram(self, horizon=10000, savefig=None, bins=50, alpha=0.9, density=None):
        """Plot a horizon=10000 draws of each arms."""
        arms = self.arms
        rewards = np.zeros((len(arms), horizon))
        colors = palette(len(arms))
        for armId, arm in enumerate(arms):
            if hasattr(arm, 'draw_nparray'):  # XXX Use this method to speed up computation
                rewards[armId] = arm.draw_nparray((horizon,))
            else:  # Slower
                for t in range(horizon):
                    rewards[armId, t] = arm.draw(t)
        # Now plot
        fig = plt.figure()
        for armId, arm in enumerate(arms):
            plt.hist(rewards[armId, :], bins=bins, density=density, color=colors[armId], label='$%s$' % repr(arm), alpha=alpha)
        legend()
        plt.xlabel("Rewards")
        if density:
            plt.ylabel("Empirical density of the rewards")
        else:
            plt.ylabel("Empirical count of observations of the rewards")
        plt.title("{} draws of rewards from these arms.\n{} arms: {}{}".format(horizon, self.nbArms, self.reprarms(latex=True), signature))
        show_and_save(showplot=True, savefig=savefig, fig=fig, pickleit=False)
        return fig


# --- MarkovianMAB

RESTED = True  #: Default is rested Markovian.


[docs]def dict_of_transition_matrix(mat):
    """ Convert a transition matrix (list of list or numpy array) to a dictionary mapping (state, state) to probabilities (as used by :class:`pykov.Chain`)."""
    if isinstance(mat, list):
        return {(i, j): mat[i][j] for i in range(len(mat)) for j in range(len(mat[i]))}
    else:
        return {(i, j): mat[i, j] for i in range(len(mat)) for j in range(len(mat[i]))}


[docs]def transition_matrix_of_dict(dic):
    """ Convert a dictionary mapping (state, state) to probabilities (as used by :class:`pykov.Chain`) to a transition matrix (numpy array)."""
    keys = list(dic.keys())
    xkeys = sorted(list({i for i, _ in keys}))
    ykeys = sorted(list({j for _, j in keys}))
    return np.array([[dic[(i, j)] for i in xkeys] for j in ykeys])


[docs]class MarkovianMAB(MAB):
    """ Classic MAB problem but the rewards are drawn from a rested/restless Markov chain.

    - configuration is a dict with ``rested`` and ``transitions`` keys.
    - ``rested`` is a Boolean. See [Kalathil et al., 2012](https://arxiv.org/abs/1206.3582) page 2 for a description.
    - ``transitions`` is list of K transition matrices *or* dictionary (to specify non-integer states), one for each arm.

    Example::

        configuration = {
            "arm_type": "Markovian",
            "params": {
                "rested": True,  # or False
                # Example from [Kalathil et al., 2012](https://arxiv.org/abs/1206.3582) Table 1
                "transitions": [
                    # 1st arm, Either a dictionary
                    {   # Mean = 0.375
                        (0, 0): 0.7, (0, 1): 0.3,
                        (1, 0): 0.5, (1, 1): 0.5,
                    },
                    # 2nd arm, Or a right transition matrix
                    [[0.2, 0.8], [0.6, 0.4]],  # Mean = 0.571
                ],
                # FIXME make this by default! include it in MAB.py and not in the configuration!
                "steadyArm": Bernoulli
            }
        }

    - This class requires the [pykov](https://github.com/riccardoscalco/Pykov) module to represent and use Markov chain.
    """

[docs]    def __init__(self, configuration):
        """New MarkovianMAB."""
        print("\n\nCreating a new MarkovianMAB problem ...")  # DEBUG
        self.isChangingAtEachRepetition   = False  #: The problem is not changing at each repetition.
        self.isDynamic   = False  #: The problem is static.
        self.isMarkovian = True  #: The problem is Markovian.
        self._sparsity = None

        assert isinstance(configuration, dict), "Error: 'configuration' for a MarkovianMAB must be a dictionary."  # DEBUG
        assert "params" in configuration and \
               isinstance(configuration["params"], dict) and \
               "transitions" in configuration["params"], \
            "Error: 'configuration.params' for a MarkovianMAB must be a dictionary with keys 'transition' and 'rested'."
        # Use input configuration
        transitions = configuration["params"]["transitions"]
        dict_transitions = []
        matrix_transitions = []
        for t in transitions:
            if isinstance(t, dict):
                dict_transitions.append(t)
                matrix_transitions.append(transition_matrix_of_dict(t))
            else:
                dict_transitions.append(dict_of_transition_matrix(t))
                matrix_transitions.append(np.asarray(t))

        self.matrix_transitions = matrix_transitions
        print(" - Using these transition matrices:", matrix_transitions)  # DEBUG
        self.dict_transitions = dict_transitions
        print(" - Using these transition dictionaries:", dict_transitions)  # DEBUG

        # FIXED this will fail harshly if Pykov is not installed/present
        self.chains = [Chain(d) for d in dict_transitions]
        print(" - For these Markov chains:", self.chains)  # DEBUG

        self.rested = configuration["params"].get("rested", RESTED)  #: Rested or not Markovian model?
        print(" - Rested:", self.rested)  # DEBUG

        self.nbArms = len(self.matrix_transitions)  #: Number of arms
        print(" - with 'nbArms' =", self.nbArms)  # DEBUG

        # # Make every transition matrix a right stochastic transition matrix
        # for c in self.chains:
        #     c.stochastic()

        # Means of arms = steady distribution
        self.states = [np.array(list(c.states())) for c in self.chains]
        print(" - and states:", self.states)  # DEBUG
        try:
            self.steadys = [np.array(list(c.steady().values())) for c in self.chains]
        except ValueError:
            for c in self.chains:
                if len(c.steady()) == 0:
                    print("[ERROR] the steady state of the Markov chain {} was not-found because it is non-ergodic...".format(c))
                    raise ValueError("The Markov chain {} is non-ergodic, and so does not have a steady state distribution... Please choose another transition matrix that as to be irreducible, aperiodic, and reversible.".format(c))
        # If the steady state exist, go on
        print(" - and steady state distributions:", self.steadys)  # DEBUG
        self.means = np.array([np.dot(s, p) for s, p in zip(self.states, self.steadys)])  #: Means of each arms, from their steady distributions.
        print(" - so it gives arms of means:", self.means)  # DEBUG

        self.arms = [configuration["params"]["steadyArm"](mean) for mean in self.means]
        print(" - so arms asymptotically equivalent to:", self.arms)  # DEBUG
        print(" - represented as:", self.reprarms(1, latex=True))  # DEBUG

        self.maxArm = np.max(self.means)  #: Max mean of arms
        print(" - with 'maxArm' =", self.maxArm)  # DEBUG
        self.minArm = np.min(self.means)  #: Min mean of arms
        print(" - with 'minArm' =", self.minArm)  # DEBUG

        #: States of each arm, initially they are all busy
        self.states = np.zeros(self.nbArms)
        print("DONE for creating this MarkovianMAB problem...")  # DEBUG

[docs]    def __repr__(self):
        return "{}(nbArms: {}, chains: {}, arms: {})".format(self.__class__.__name__, self.nbArms, self.matrix_transitions, self.arms)

[docs]    def reprarms(self, nbPlayers=None, openTag='', endTag='^*', latex=True):
        """ Return a str representation of the list of the arms (like `repr(self.arms)` but better).

        - For Markovian MAB, the chain and the steady Bernoulli arm is represented.
        - If nbPlayers > 0, it surrounds the representation of the best arms by openTag, endTag (for plot titles, in a multi-player setting).

        - Example: openTag = '', endTag = '^*' for LaTeX tags to put a star exponent.
        - Example: openTag = '<red>', endTag = '</red>' for HTML-like tags.
        - Example: openTag = r'\textcolor{red}{', endTag = '}' for LaTeX tags.
        """
        if nbPlayers is None:
            text = repr(self.matrix_transitions)
        else:
            assert nbPlayers >= 0, "Error, the 'nbPlayers' argument for reprarms method of a MAB object has to be a non-negative integer."  # DEBUG
            means = self.means
            bestArms = np.argsort(means)[-min(nbPlayers, self.nbArms):]
            if nbPlayers == 0: bestArms = []
            dollar = '$' if latex else ''
            text = r'{} Markovian rewards, {}[{}]{}'.format(
                "Rested" if self.rested else "Restless",
                dollar, ', '.join(
                    r"{}P: {}, \pi: {} ∼ {}{}".format(
                        openTag if armId in bestArms else "",
                        np.asarray(mat).tolist(), st, repr(arm),
                        endTag if armId in bestArms else ""
                    )
                    for armId, (arm, mat, st) in enumerate(zip(self.arms, self.matrix_transitions, self.steadys))
                ), dollar
            )
        return wraplatex(text) if latex else wraptext(text)

[docs]    def draw(self, armId, t=1):
        """ Move on the Markov chain and return its state as a reward (0 or 1, or else).

        - If *rested* Markovian, only the state of the Markov chain of arm `armId` changes. It is the simpler model, and the default model.
        - But if *restless* (non rested) Markovian, the states of all the Markov chain of all arms change (not only `armId`).
        """
        # 1. Get current state for that arm, and its Markov chain
        state, chain = self.states[armId], self.chains[armId]
        # 2. Sample from that Markov chain
        nextState = chain.move(state)
        # 3. Update the state
        self.states[armId] = nextState
        # print("- For the arm #{}, previously in the state {}, the Markov chain moved to state {} ...".format(armId, state, nextState))  # DEBUG

        if not self.rested:
            # print("- Non-rested Markovian model, every other arm is also moving...")  # DEBUG
            for armId2 in range(self.nbArms):
                # For each other arm, they evolve
                if armId2 != armId:
                    state, chain = self.states[armId2], self.chains[armId2]
                    nextState = chain.move(state)
                    # print("    - For the arm #{}, previously in the state {}, the Markov chain moved to state {} ...".format(armId, state, nextState))  # DEBUG
                    self.states[armId2] = nextState

        return float(nextState)


# --- ChangingAtEachRepMAB

VERBOSE = True
VERBOSE = False  #: Whether to be verbose when generating new arms for Dynamic MAB

[docs]class ChangingAtEachRepMAB(MAB):
    """Like a stationary MAB problem, but the arms are (randomly) regenerated for each repetition, with the :meth:`newRandomArms` method.

    - ``M.arms`` and ``M.means`` is changed after each call to :meth:`newRandomArms`, but not ``nbArm``. All the other methods are carefully written to still make sense (``Mbest``, ``Mworst``, ``minArm``, ``maxArm``).

    .. warning:: It works perfectly fine, but it is still experimental, be careful when using this feature.

    .. note:: Testing bandit algorithms against randomly generated problems at each repetitions is usually referred to as *"Bayesian problems"* in the literature: a prior is set on problems (eg. uniform on :math:`[0,1]^K` or less obvious for instance if a ``mingap`` is set), and the performance is assessed against this prior. It differs from the *frequentist* point of view of having one fixed problem and doing eg. ``n=1000`` repetitions on the same problem.
    """

[docs]    def __init__(self, configuration, verbose=VERBOSE):
        """New ChangingAtEachRepMAB."""
        self.isChangingAtEachRepetition   = True  #: The problem is changing at each repetition or not.
        self.isDynamic   = False  #: The problem is static.
        self.isMarkovian = False  #: The problem is not Markovian.
        self._sparsity = None

        assert isinstance(configuration, dict) \
            and "arm_type" in configuration and "params" in configuration \
            and "newMeans" in configuration["params"] and "args" in configuration["params"], \
            "Error: this ChangingAtEachRepMAB is not really a dynamic MAB, you should use a simple MAB instead!"  # DEBUG
        self._verbose = verbose

        print("  Special MAB problem, changing at every repetitions, read from a dictionnary 'configuration' = {} ...".format(configuration))  # DEBUG

        self.arm_type = arm_type = configuration["arm_type"]  #: Kind of arm (ChangingAtEachRepMAB are homogeneous)
        print(" - with 'arm_type' =", arm_type)  # DEBUG
        params = configuration["params"]
        print(" - with 'params' =", params)  # DEBUG
        self.newMeans = params["newMeans"]  #: Function to generate the means
        print(" - with 'newMeans' =", self.newMeans)  # DEBUG
        self.args = params["args"]  #: Args to give to function
        print(" - with 'args' =", self.args)  # DEBUG
        # XXX try to read sparsity
        self._sparsity = configuration["sparsity"] if "sparsity" in configuration else None
        print("\n\n ==> Creating the dynamic arms ...")  # DEBUG
        # Keep track of the successive mean vectors
        self._historyOfMeans = []  # Historic of the means vectors
        self._t = 0  # nb of calls to the function for generating new arms
        # Generate a first mean vector
        self.newRandomArms()
        print("   - drawing a random set of arms")
        self.nbArms = len(self.arms)  #: Means of arms
        print("   - with 'nbArms' =", self.nbArms)  # DEBUG
        print("   - with 'arms' =", self.arms)  # DEBUG
        print(" - Example of initial draw of 'means' =", self.means)  # DEBUG
        print("   - with 'maxArm' =", self.maxArm)  # DEBUG
        print("   - with 'minArm' =", self.minArm)  # DEBUG

[docs]    def __repr__(self):
        if self._arms is not None:
            return "{}(nbArms: {}, arms: {}, minArm: {:.3g}, maxArm: {:.3g})".format(self.__class__.__name__, self.nbArms, self._arms, self.minArm, self.maxArm)
        else:
            return "{}(nbArms: {}, armType: {})".format(self.__class__.__name__, self.nbArms, self.arm_type)

[docs]    def reprarms(self, nbPlayers=None, openTag='', endTag='^*', latex=True):
        """Cannot represent the dynamic arms, so print the ChangingAtEachRepMAB object"""
        # print("reprarms of a ChangingAtEachRepMAB object...")  # DEBUG
        # print("  It has self._historyOfMeans =\n{}".format(self._historyOfMeans))  # DEBUG
        # print("  It has self.means =\n{}".format(self.means))  # DEBUG
        text = "{text}, {K} with uniform means on [{dollar}{lower:.3g}, {upper:.3g}{dollar}]{mingap}{sparsity}".format(
            text="Bayesian MAB",
            K=str(self._arms[0]),
            lower=self.args["lower"],
            upper=self.args["lower"] + self.args["amplitude"],
            mingap="" if self.args["mingap"] is None or self.args["mingap"] == 0 else r", min gap=$%.3g$" % self.args["mingap"],
            sparsity="" if self._sparsity is None else ", sparsity = {dollar}{s}{dollar}".format(s=self._sparsity, dollar="$" if latex else ""),
            dollar="$" if latex else "",
        )
        return wraptext(text)

    #
    # --- Dynamic arms and means

[docs]    def newRandomArms(self, t=None, verbose=VERBOSE):
        """Generate a new list of arms, from ``arm_type(params['newMeans'](*params['args']))``."""
        one_draw_of_means = self.newMeans(**self.args)
        self._arms = [self.arm_type(mean) for mean in one_draw_of_means]
        self.nbArms = len(self._arms)  # useless
        self._t += 1  # new draw!
        self._historyOfMeans.append(one_draw_of_means)
        if verbose or self._verbose:
            print("\n  - Creating a new dynamic list of means = {} for arms: ChangingAtEachRepMAB = {} ...".format(np.array(one_draw_of_means), repr(self)))  # DEBUG
            # print("Currently self._t = {} and self._historyOfMeans = {} ...".format(self._t, self._historyOfMeans))  # DEBUG
        return one_draw_of_means

    # All these properties arms, means, minArm, maxArm cannot be attributes, as the means of arms change at every experiments

    @property
    def arms(self):
        """Return the *current* list of arms."""
        return self._arms

    @property
    def means(self):
        """ Return the list of means of arms for this ChangingAtEachRepMAB: after :math:`x` calls to :meth:`newRandomArms`, the return mean of arm :math:`k` is the mean of the :math:`x` means of that arm.

        .. warning:: Highly experimental!
        """
        return np.mean(np.array(self._historyOfMeans), axis=0)

    #
    # --- Helper to compute sets Mbest and Mworst

[docs]    def Mbest(self, M=1):
        """ Set of M best means (averaged on all the draws of new means)."""
        sortedMeans = np.mean(np.sort(np.array(self._historyOfMeans), axis=1), axis=0)
        return sortedMeans[-M:]

[docs]    def Mworst(self, M=1):
        """ Set of M worst means (averaged on all the draws of new means)."""
        sortedMeans = np.mean(np.sort(np.array(self._historyOfMeans), axis=1), axis=0)
        return sortedMeans[:-M]

    @property
    def minArm(self):
        """Return the smallest mean of the arms, for a dynamic MAB (averaged on all the draws of new means)."""
        return np.mean(np.min(np.array(self._historyOfMeans)))

    @property
    def maxArm(self):
        """Return the largest mean of the arms, for a dynamic MAB (averaged on all the draws of new means)."""
        return np.mean(np.max(np.array(self._historyOfMeans)))

    #
    # --- Compute lower bounds

[docs]    def lowerbound(self):
        """ Compute the constant C(mu), for [Lai & Robbins] lower-bound for this MAB problem (complexity), using functions from :mod:`kullback` (averaged on all the draws of new means)."""
        oneLR = self.arms[0].oneLR
        return np.mean([
                        sum(
                            oneLR(np.max(means), m)
                            for m in means
                            if m != np.max(means)
                        )
                        for means in self._historyOfMeans
                        ])

[docs]    def hoifactor(self):
        """ Compute the HOI factor H_OI(mu), the Optimal Arm Identification (OI) factor, for this MAB problem (complexity). Cf. (3.3) in Navikkumar MODI's thesis, "Machine Learning and Statistical Decision Making for Green Radio" (2017) (averaged on all the draws of new means)."""
        oneHOI = self.arms[0].oneHOI
        return np.mean([
                        sum(
                            oneHOI(np.max(means), m)
                            for m in means
                            if m != np.max(means)
                        ) / float(len(means))
                        for means in self._historyOfMeans
                        ])

[docs]    def lowerbound_multiplayers(self, nbPlayers=1):
        """ Compute our multi-players lower bound for this MAB problem (complexity), using functions from :mod:`kullback`. """
        oneLR = self.arms[0].oneLR
        kl = self.arms[0].kl

        avg_our_lowerbound, avg_anandkumar_lowerbound, avg_centralized_lowerbound = 0.0, 0.0, 0.0

        for means in self._historyOfMeans:
            sortedMeans = sorted(self.means)
            assert nbPlayers <= len(sortedMeans), "Error: this lowerbound_multiplayers() for a MAB problem is only valid when there is less users than arms. Here M = {} > K = {} ...".format(nbPlayers, len(sortedMeans))  # DEBUG
            # FIXME it is highly suboptimal to have a lowerbound = 0 if nbPlayers == nbArms ! We have to finish the theoretical analysis!
            bestMeans = sortedMeans[-nbPlayers:]
            worstMeans = sortedMeans[:-nbPlayers]
            worstOfBestMean = bestMeans[0]

            # Our lower bound is this:
            centralized_lowerbound = sum(oneLR(worstOfBestMean, oneOfWorstMean) for oneOfWorstMean in worstMeans)

            our_lowerbound = nbPlayers * centralized_lowerbound

            # The initial lower bound in Theorem 6 from [Anandkumar et al., 2010]
            anandkumar_lowerbound = sum(sum((worstOfBestMean - oneOfWorstMean) / kl(oneOfWorstMean, oneOfBestMean) for oneOfWorstMean in worstMeans) for oneOfBestMean in bestMeans)

            # Store them
            avg_our_lowerbound += our_lowerbound
            avg_anandkumar_lowerbound += anandkumar_lowerbound
            avg_centralized_lowerbound += centralized_lowerbound

        # Done, compute the averages of the lower-bounds
        avg_our_lowerbound /= float(len(self._historyOfMeans))
        avg_anandkumar_lowerbound /= float(len(self._historyOfMeans))
        avg_centralized_lowerbound /= float(len(self._historyOfMeans))

        print(" -  For {} players, Anandtharam et al. centralized lower-bound gave = {:.3g} ...".format(nbPlayers, avg_centralized_lowerbound))  # DEBUG
        print(" -  For {} players, our lower bound gave = {:.3g} ...".format(nbPlayers, avg_our_lowerbound))  # DEBUG
        print(" -  For {} players, the initial lower bound in Theorem 6 from [Anandkumar et al., 2010] gave = {:.3g} ...".format(nbPlayers, avg_anandkumar_lowerbound))  # DEBUG

        # Check that our bound is better (ie bigger)
        if avg_anandkumar_lowerbound > avg_our_lowerbound:
            print("Error, our lower bound is worse than the one in Theorem 6 from [Anandkumar et al., 2010], but it should always be better...")

        return avg_our_lowerbound, avg_anandkumar_lowerbound, avg_centralized_lowerbound


# --- PieceWiseStationaryMAB

[docs]class PieceWiseStationaryMAB(MAB):
    r"""Like a stationary MAB problem, but piece-wise stationary.

    - Give it a list of vector of means, and a list of change-point locations.

    - You can use :meth:`plotHistoryOfMeans` to see a nice plot of the history of means.

    .. note:: This is a generic class to implement one "easy" kind of non-stationary bandits, abruptly changing non-stationary bandits, if changepoints are fixed and decided in advanced.

    .. warning:: It works fine, but it is still experimental, be careful when using this feature.

    .. warning:: The number of arms is fixed, see https://github.com/SMPyBandits/SMPyBandits/issues/123 if you are curious about bandit problems with a varying number of arms (or sleeping bandits where some arms can be enabled or disabled at each time).
    """

[docs]    def __init__(self, configuration, verbose=VERBOSE):
        """New PieceWiseStationaryMAB."""
        self.isChangingAtEachRepetition   = False  #: The problem is not changing at each repetition.
        self.isDynamic   = True  #: The problem is dynamic.
        self.isMarkovian = False  #: The problem is not Markovian.
        self._sparsity = None

        assert isinstance(configuration, dict) \
            and "arm_type" in configuration and "params" in configuration \
            and "listOfMeans" in configuration["params"] \
            and "changePoints" in configuration["params"], \
            "Error: this PieceWiseStationaryMAB is not really a non-stationary MAB, you should use a simple MAB instead!"  # DEBUG
        self._verbose = verbose

        print("  Special MAB problem, with arm (possibly) changing at every time step, read from a dictionnary 'configuration' = {} ...".format(configuration))  # DEBUG

        self.arm_type = arm_type = configuration["arm_type"]  #: Kind of arm (PieceWiseStationaryMAB are homogeneous)
        print(" - with 'arm_type' =", arm_type)  # DEBUG
        params = configuration["params"]
        print(" - with 'params' =", params)  # DEBUG

        self.listOfMeans = np.array(params["listOfMeans"])  #: The list of means
        self.nbArms = len(self.listOfMeans[0])  #: Number of arms
        assert all(len(arms) == self.nbArms for arms in self.listOfMeans), "Error: the number of arms cannot be different between change-points."  # DEBUG
        print(" - with 'listOfMeans' =", self.listOfMeans)  # DEBUG

        self.changePoints = params["changePoints"]  #: List of the change points
        print(" - with 'changePoints' =", self.changePoints)  # DEBUG
        # XXX Maybe we need to add 0 in the list of changePoints
        if 0 not in self.changePoints and len(self.listOfMeans) == len(self.changePoints) - 1:
            self.changePoints = [0] + self.changePoints
        assert len(self.listOfMeans) == len(self.changePoints), "Error: the list of means {} does not has the same length as the list of change points {}...".format(self.listOfMeans, self.changePoints)  # DEBUG

        # XXX try to read sparsity
        self._sparsity = configuration["sparsity"] if "sparsity" in configuration else None

        print("\n\n ==> Creating the dynamic arms ...")  # DEBUG

        self.listOfArms = [
            [self.arm_type(mean) for mean in means]
            for means in self.listOfMeans
        ]

        self.currentInterval = 0  # current number of the interval we are in

        print("   - with 'nbArms' =", self.nbArms)  # DEBUG
        print("   - with 'arms' =", self.arms)  # DEBUG
        print(" - Initial draw of 'means' =", self.means)  # DEBUG

[docs]    def __repr__(self):
        if len(self.listOfArms) > 0:
            return "{}(nbArms: {}, arms: {})".format(self.__class__.__name__, self.nbArms, self.arms)
        else:
            return "{}(nbArms: {}, armType: {})".format(self.__class__.__name__, self.nbArms, self.arm_type)

[docs]    def reprarms(self, nbPlayers=None, openTag='', endTag='^*', latex=True):
        """Cannot represent the dynamic arms, so print the PieceWiseStationaryMAB object"""
        text = r"{text}, {arm} with $\Upsilon={M}$ break-points".format(
            text="Non-Stationary MAB",
            arm=str(self.arms[0]),
            M=len([tau for tau in self.changePoints if tau > 0]),
            # we do not count 0 and horizon
        )
        return wraptext(text)

[docs]    def newRandomArms(self, t=None, onlyOneArm=None, verbose=VERBOSE):
        """Fake function, there is nothing random here, it is just to tell the piece-wise stationary MAB problem to maybe use the next interval.
        """
        if t > 0 and t in self.changePoints:
            if verbose: print("  - BREAKPOINT For a PieceWiseStationaryMAB object, the function newRandomArms was called, with t = {}, and current interval was {}, so means was = {} and will be = {}...".format(t, self.currentInterval, self.listOfMeans[self.currentInterval], self.listOfMeans[self.currentInterval + 1]))  # DEBUG
            self.currentInterval += 1  # next interval!
        else:
            if verbose: print("  - For a PieceWiseStationaryMAB object, the function newRandomArms was called, with t = {}, and current interval is {}, so means is = {}...".format(t, self.currentInterval, self.listOfMeans[self.currentInterval]))  # DEBUG
        # return the latest generate means
        return self.listOfMeans[self.currentInterval]

    # --- Plot utility

[docs]    def plotHistoryOfMeans(self, horizon=None, savefig=None, forceTo01=False, showplot=True, pickleit=False):
        """Plot the history of means, as a plot with x axis being the time, y axis the mean rewards, and K curves one for each arm."""
        if horizon is None:
            horizon = max(self.changePoints)
        allMeans = self.get_allMeans(horizon=horizon)
        colors = palette(self.nbArms)
        markers = makemarkers(self.nbArms)
        # Now plot
        fig = plt.figure()
        for armId in range(self.nbArms):
            meanOfThisArm = allMeans[armId, :]
            plt.plot(meanOfThisArm, color=colors[armId], marker=markers[armId], markevery=(armId / 50., 0.1), label='Arm #{}'.format(armId), lw=4, alpha=0.9)
        legend()
        ymin, ymax = plt.ylim()
        if forceTo01:
            ymin, ymax = min(0, ymin), max(1, ymax)
            plt.ylim(ymin, ymax)
        if len(self.changePoints) > 20:
            print("WARNING: Adding vlines for the change points with more than 20 change points will be ugly on the plots...")  # DEBUG
        if len(self.changePoints) < 30:  # add the vlines only if not too many change points
            for tau in self.changePoints:
                if tau > 0 and tau < horizon:
                    plt.vlines(tau, ymin, ymax, linestyles='dotted', alpha=0.7)
        plt.xlabel(r"Time steps $t = 1...T$, horizon $T = {}${}".format(horizon, signature))
        plt.ylabel(r"Successive means of the $K = {}$ arms".format(self.nbArms))
        plt.title("History of means for {}".format(self.reprarms(latex=True)))
        show_and_save(showplot=showplot, savefig=savefig, fig=fig, pickleit=pickleit)
        return fig

    # All these properties arms, means, minArm, maxArm cannot be attributes, as the means of arms change at every experiments

    @property
    def arms(self):
        """Return the *current* list of arms. at time :math:`t` , the return mean of arm :math:`k` is the mean during the time interval containing :math:`t`."""
        return self.listOfArms[self.currentInterval]

    @property
    def means(self):
        """ Return the list of means of arms for this PieceWiseStationaryMAB: at time :math:`t` , the return mean of arm :math:`k` is the mean during the time interval containing :math:`t`.
        """
        return self.listOfMeans[self.currentInterval]

    #
    # --- Helper to compute values minArm and maxArm

    @property
    def minArm(self):
        """Return the smallest mean of the arms, for the current vector of means."""
        return np.min(self.means)

    @property
    def maxArm(self):
        """Return the largest mean of the arms, for the current vector of means."""
        return np.max(self.means)

    #
    # --- Helper to compute vector of min arms, max arms, all arms

[docs]    def get_minArm(self, horizon=None):
        """Return the smallest mean of the arms, for a piece-wise stationary MAB

        - It is a vector of length horizon.
        """
        if horizon is None:
            horizon = np.max(self.changePoints)
        mapOfMinArms = [np.min(means) for means in self.listOfMeans]
        meansOfMinArms = np.zeros(horizon)
        nbChangePoint = 0
        for t in range(horizon):
            if nbChangePoint < len(self.changePoints) - 1 and t >= self.changePoints[nbChangePoint + 1]:
                nbChangePoint += 1
            meansOfMinArms[t] = mapOfMinArms[nbChangePoint]
        return meansOfMinArms

[docs]    def get_minArms(self, M=1, horizon=None):
        """Return the vector of sum of the M-worst means of the arms, for a piece-wise stationary MAB.

        - It is a vector of length horizon.
        """
        if horizon is None:
            horizon = np.max(self.changePoints)
        def Mworst(unsorted_list):
            sorted_list = np.sort(unsorted_list)
            return np.sum(sorted_list[:-M])
        mapOfMworstMaxArms = [Mworst(means) for means in self.listOfMeans]
        meansOfMworstMaxArms = np.ones(horizon)
        nbChangePoint = 0
        for t in range(horizon):
            if nbChangePoint < len(self.changePoints) - 1 and t >= self.changePoints[nbChangePoint + 1]:
                nbChangePoint += 1
            meansOfMworstMaxArms[t] = mapOfMworstMaxArms[nbChangePoint]
        return meansOfMworstMaxArms

[docs]    def get_maxArm(self, horizon=None):
        """Return the vector of max mean of the arms, for a piece-wise stationary MAB.

        - It is a vector of length horizon.
        """
        if horizon is None:
            horizon = np.max(self.changePoints)
        mapOfMaxArms = [np.max(means) for means in self.listOfMeans]
        meansOfMaxArms = np.ones(horizon)
        nbChangePoint = 0
        for t in range(horizon):
            if nbChangePoint < len(self.changePoints) - 1 and t >= self.changePoints[nbChangePoint + 1]:
                nbChangePoint += 1
            meansOfMaxArms[t] = mapOfMaxArms[nbChangePoint]
        return meansOfMaxArms

[docs]    def get_maxArms(self, M=1, horizon=None):
        """Return the vector of sum of the M-best means of the arms, for a piece-wise stationary MAB.

        - It is a vector of length horizon.
        """
        if horizon is None:
            horizon = np.max(self.changePoints)
        def Mbest(unsorted_list):
            sorted_list = np.sort(unsorted_list)
            return np.sum(sorted_list[-M:])
        mapOfMBestMaxArms = [Mbest(means) for means in self.listOfMeans]
        meansOfMBestMaxArms = np.ones(horizon)
        nbChangePoint = 0
        for t in range(horizon):
            if nbChangePoint < len(self.changePoints) - 1 and t >= self.changePoints[nbChangePoint + 1]:
                nbChangePoint += 1
            meansOfMBestMaxArms[t] = mapOfMBestMaxArms[nbChangePoint]
        return meansOfMBestMaxArms

[docs]    def get_allMeans(self, horizon=None):
        """Return the vector of mean of the arms, for a piece-wise stationary MAB.

        - It is a numpy array of shape (nbArms, horizon).
        """
        if horizon is None:
            horizon = np.max(self.changePoints)
        meansOfArms = np.ones((self.nbArms, horizon))
        for armId in range(self.nbArms):
            nbChangePoint = 0
            for t in range(horizon):
                if nbChangePoint < len(self.changePoints) - 1 and t >= self.changePoints[nbChangePoint + 1]:
                    nbChangePoint += 1
                meansOfArms[armId][t] = self.listOfMeans[nbChangePoint][armId]
        return meansOfArms

    #
    # --- Compute lower bounds
    # TODO include knowledge of piece-wise stationarity in the lower-bounds

    # def lowerbound(self):
    #     """ Compute the constant C(mu), for [Lai & Robbins] lower-bound for this MAB problem (complexity), using functions from :mod:`kullback` (averaged on all the draws of new means)."""
    #     raise NotImplementedError

    # def hoifactor(self):
    #     """ Compute the HOI factor H_OI(mu), the Optimal Arm Identification (OI) factor, for this MAB problem (complexity). Cf. (3.3) in Navikkumar MODI's thesis, "Machine Learning and Statistical Decision Making for Green Radio" (2017) (averaged on all the draws of new means)."""
    #     raise NotImplementedError

    # def lowerbound_multiplayers(self, nbPlayers=1):
    #     """ Compute our multi-players lower bound for this MAB problem (complexity), using functions from :mod:`kullback`. """
    #     raise NotImplementedError


# --- PieceWiseStationaryMAB

[docs]class NonStationaryMAB(PieceWiseStationaryMAB):
    r"""Like a stationary MAB problem, but the arms *can* be modified *at each time step*, with the :meth:`newRandomArms` method.

    - ``M.arms`` and ``M.means`` is changed after each call to :meth:`newRandomArms`, but not ``nbArm``. All the other methods are carefully written to still make sense (``Mbest``, ``Mworst``, ``minArm``, ``maxArm``).

    .. note:: This is a generic class to implement different kinds of non-stationary bandits:

        - Abruptly changing non-stationary bandits, in different variants: changepoints are randomly drawn (once for all ``n`` repetitions or at different location fo each repetition).
        - Slowly varying non-stationary bandits, where the underlying mean of each arm is slowing randomly modified and a bound on the speed of change (e.g., Lipschitz constant of :math:`t \mapsto \mu_i(t)`) is known.

    .. warning:: It works fine, but it is still experimental, be careful when using this feature.

    .. warning:: The number of arms is fixed, see https://github.com/SMPyBandits/SMPyBandits/issues/123 if you are curious about bandit problems with a varying number of arms (or sleeping bandits where some arms can be enabled or disabled at each time).
    """

[docs]    def __init__(self, configuration, verbose=VERBOSE):
        """New NonStationaryMAB."""
        self.isChangingAtEachRepetition   = False  #: The problem is not changing at each repetition.
        self.isDynamic   = True  #: The problem is dynamic.
        self.isMarkovian = False  #: The problem is not Markovian.
        self._sparsity = None

        assert isinstance(configuration, dict) \
            and "arm_type" in configuration and "params" in configuration \
            and "newMeans" in configuration["params"] \
            and "changePoints" in configuration["params"] \
            and "args" in configuration["params"], \
            "Error: this NonStationaryMAB is not really a non-stationary MAB, you should use a simple MAB instead!"  # DEBUG
        self._verbose = verbose

        print("  NonStationary MAB problem, with arm (possibly) changing at every time step, read from a dictionnary 'configuration' = {} ...".format(configuration))  # DEBUG

        self.arm_type = arm_type = configuration["arm_type"]  #: Kind of arm (NonStationaryMAB are homogeneous)
        print(" - with 'arm_type' =", arm_type)  # DEBUG
        params = configuration["params"]
        print(" - with 'params' =", params)  # DEBUG
        self.newMeans = params["newMeans"]  #: Function to generate the means
        print(" - with 'newMeans' =", self.newMeans)  # DEBUG
        self.changePoints = params["changePoints"]  #: List of the change points
        print(" - with 'changePoints' =", self.changePoints)  # DEBUG
        self.onlyOneArm = params.get("onlyOneArm", None)  #: None by default, but can be "uniform" to only change *one* arm at each change point.
        print(" - with 'onlyOneArm' =", self.onlyOneArm)  # DEBUG
        self.args = params["args"]  #: Args to give to function
        print(" - with 'args' =", self.args)  # DEBUG
        # XXX try to read sparsity
        self._sparsity = configuration["sparsity"] if "sparsity" in configuration else None
        print("\n\n ==> Creating the dynamic arms ...")  # DEBUG
        # Keep track of the successive mean vectors
        self._historyOfMeans = dict()  # Historic of the means vectors, storing time of {changepoint: newMeans}
        self._historyOfChangePoints = []  # Historic of the change points
        self._t = 0  # nb of calls to the function for generating new arms
        # Generate a first mean vector
        self.newRandomArms(0)
        print("   - drawing a random set of arms")
        self.nbArms = len(self.arms)  #: Means of arms
        print("   - with 'nbArms' =", self.nbArms)  # DEBUG
        print("   - with 'arms' =", self.arms)  # DEBUG
        print(" - Example of initial draw of 'means' =", self.means)  # DEBUG

[docs]    def reprarms(self, nbPlayers=None, openTag='', endTag='^*', latex=True):
        """Cannot represent the dynamic arms, so print the NonStationaryMAB object"""
        # print("reprarms of a NonStationaryMAB object...")  # DEBUG
        # print("  It has self._historyOfMeans =\n{}".format(self._historyOfMeans))  # DEBUG
        # print("  It has self.means =\n{}".format(self.means))  # DEBUG
        text = "{text}, {arm} with uniform means on [{dollar}{lower:.3g}, {upper:.3g}{dollar}]{mingap}{sparsity}".format(
            text="Non-Stationary MAB",
            arm=str(self._arms[0]),
            lower=self.args["lower"],
            upper=self.args["lower"] + self.args["amplitude"],
            mingap="" if self.args["mingap"] is None or self.args["mingap"] == 0 else r", min gap=$%.3g$" % self.args["mingap"],
            sparsity="" if self._sparsity is None else ", sparsity = {dollar}{s}{dollar}".format(s=self._sparsity, dollar="$" if latex else ""),
            dollar="$" if latex else "",
        )
        return wraptext(text)

    #
    # --- Dynamic arms and means

[docs]    def newRandomArms(self, t=None, onlyOneArm=None, verbose=VERBOSE):
        """Generate a new list of arms, from ``arm_type(params['newMeans'](t, **params['args']))``.

        - If ``onlyOneArm`` is given and is an integer, the change of mean only occurs for this arm and the others stay the same.
        - If ``onlyOneArm="uniform"``, the change of mean only occurs for one arm and the others stay the same, and the changing arm is chosen uniformly at random.

        .. note:: Only the *means* of the arms change (and so, their order), not their family.

        .. warning:: TODO? So far the only change points we consider is when the means of arms change, but the family of distributions stay the same. I could implement a more generic way, for instance to be able to test algorithms that detect change between different families of distribution (e.g., from a Gaussian of variance=1 to a Gaussian of variance=2, with different or not means).
        """
        if ((t > 0 and t not in self.changePoints) or (t in self._historyOfChangePoints)):
            # return the latest generate means
            return self._historyOfMeans[self._historyOfChangePoints[-1]]
        self._historyOfChangePoints.append(t)
        one_draw_of_means = self.newMeans(**self.args)
        self._t += 1  # new draw!
        if onlyOneArm is not None and len(self._historyOfMeans) > 0:
            if onlyOneArm == "uniform":  # - Handling the option to change only one arm
                onlyOneArm = np.random.randint(self.nbArms)
            elif isinstance(onlyOneArm, int):  # - Or a set of arms
                onlyOneArm = np.random.choice(self.nbArms, min(onlyOneArm, self.nbArms), False)
            if np.ndim(onlyOneArm) == 0:
                onlyOneArm = [onlyOneArm]
            elif np.ndim(onlyOneArm) == 1 and np.size(onlyOneArm) == 1:
                onlyOneArm = [onlyOneArm[0]]  # force to extract the list then wrap it back
            # - If only one arm, and not the first random means, change only one
            # print("onlyOneArm =", onlyOneArm)  # DEBUG
            for arm in range(self.nbArms):
                if arm not in onlyOneArm:
                    one_draw_of_means[arm] = self._historyOfMeans[self._historyOfChangePoints[-2]][arm]
        self._historyOfMeans[t] = one_draw_of_means
        self._arms = [self.arm_type(mean) for mean in one_draw_of_means]
        self.nbArms = len(self._arms)  # useless
        if verbose or self._verbose:
            print("\n  - Creating a new dynamic list of means = {} for arms: NonStationaryMAB = {} ...".format(np.array(one_draw_of_means), repr(self)))  # DEBUG
            # print("Currently self._t = {} and self._historyOfMeans = {} ...".format(self._t, self._historyOfMeans))  # DEBUG
        return one_draw_of_means

[docs]    def get_minArm(self, horizon=None):
        """Return the smallest mean of the arms, for a non-stationary MAB

        - It is a vector of length horizon.
        """
        if horizon is None:
            horizon = np.max(self._historyOfChangePoints)
        mapOfMinArms = [np.min (self._historyOfMeans[tau]) for tau in sorted(self._historyOfChangePoints)]
        meansOfMinArms = np.zeros(horizon)
        nbChangePoint = 0
        for t in range(horizon):
            if nbChangePoint < len(self._historyOfChangePoints) - 1 and t >= self._historyOfChangePoints[nbChangePoint + 1]:
                nbChangePoint += 1
            meansOfMinArms[t] = mapOfMinArms[nbChangePoint]
        return meansOfMinArms

[docs]    def get_maxArm(self, horizon=None):
        """Return the vector of max mean of the arms, for a non-stationary MAB.

        - It is a vector of length horizon.
        """
        if horizon is None:
            horizon = np.max(self._historyOfChangePoints)
        mapOfMaxArms = [np.max(self._historyOfMeans[tau]) for tau in sorted(self._historyOfChangePoints)]
        meansOfMaxArms = np.ones(horizon)
        nbChangePoint = 0
        for t in range(horizon):
            if nbChangePoint < len(self._historyOfChangePoints) - 1 and t >= self._historyOfChangePoints[nbChangePoint + 1]:
                nbChangePoint += 1
            meansOfMaxArms[t] = mapOfMaxArms[nbChangePoint]
        return meansOfMaxArms

[docs]    def get_allMeans(self, horizon=None):
        """Return the vector of mean of the arms, for a non-stationary MAB.

        - It is a numpy array of shape (nbArms, horizon).
        """
        if horizon is None:
            horizon = np.max(self._historyOfChangePoints)
        mapOfArms = [self._historyOfMeans[tau] for tau in sorted(self._historyOfChangePoints)]
        meansOfArms = np.ones((self.nbArms, horizon))
        for armId in range(self.nbArms):
            nbChangePoint = 0
            for t in range(horizon):
                if nbChangePoint < len(self._historyOfChangePoints) - 1 and t >= self._historyOfChangePoints[nbChangePoint + 1]:
                    nbChangePoint += 1
                meansOfArms[armId][t] = mapOfArms[nbChangePoint][armId]
        return meansOfArms


# --- IncreasingMAB


[docs]def static_change_lower_amplitude(t, l_t, a_t):
    r"""A function called by :class:`IncreasingMAB` *at every time t*, to compute the (possibly) knew values for :math:`l_t` and :math:`a_t`.

    - First argument is a boolean, `True` if a change occurred, `False` otherwise.
    """
    return False, l_t, a_t


#: Default value for the :func:`doubling_change_lower_amplitude` function.
L0, A0, DELTA, T0, DELTA_T, ZOOM = None, None, 0, 100, 500, 1.1
L0, A0, DELTA, T0, DELTA_T, ZOOM = None, None, 0, 100, 500, 1.05
L0, A0, DELTA, T0, DELTA_T, ZOOM = None, None, 1, 2500, 5000, 2
L0, A0, DELTA, T0, DELTA_T, ZOOM = None, None, 0, -1, 1000, 2
L0, A0, DELTA, T0, DELTA_T, ZOOM = -1, 1, 0, -1, 1000, 2
L0, A0, DELTA, T0, DELTA_T, ZOOM = -1, 2, 0, -1, -1, 2


[docs]def doubling_change_lower_amplitude(t, l_t, a_t, l0=L0, a0=A0, delta=DELTA, T0=T0, deltaT=DELTA_T, zoom=ZOOM):
    r"""A function called by :class:`IncreasingMAB` *at every time t*, to compute the (possibly) knew values for :math:`l_t` and :math:`a_t`.

    - At time 0, it forces to use :math:`l_0, a_0` if they are given and not ``None``.
    - At step `T0`, it reduces :math:`l_t` by `delta` (typically from `0` to `-1`).
    - Every `deltaT` steps, it multiplies both  :math:`l_t` and :math:`a_t` by `zoom`.
    - First argument is a boolean, `True` if a change occurred, `False` otherwise.
    """
    if t == 0 and (l0 is not None or a0 is not None):
        different_starting = (l_t != l0) or (a_t != a0)
        if l0 is not None:
            l_t = l0
        if a0 is not None:
            a_t = a0
        return different_starting, l_t, a_t
    elif t > 0:
        if t == T0:
            return (delta != 0), l_t - delta, a_t
        elif deltaT > 0 and t % deltaT == 0:
            return (zoom != 1), zoom * l_t, zoom * a_t
    return False, l_t, a_t


default_change_lower_amplitude = doubling_change_lower_amplitude


[docs]class IncreasingMAB(MAB):
    """Like a stationary MAB problem, but the range of the rewards is increased from time to time, to test the :class:`Policy.WrapRange` policy.

    - M.arms and M.means is NOT changed after each call to ``newRandomArms()``, but not nbArm.

    .. warning:: It is purely experimental, be careful when using this feature.
    """

[docs]    def __init__(self, configuration):
        """New MAB."""
        super(IncreasingMAB, self).__init__(configuration)
        # XXX Expects a function of (t, lower, amplitude) that gives the new (lower, amplitude)
        self.isDynamic   = True  #: Flag to know if the problem is static or not.
        # WARNING the hash function used on configuration dictionary don't like to have non-hashable part in the dictionary keys, I need to fix that
        if isinstance(configuration, dict):
            self._change_lower_amplitude = configuration.get("change_lower_amplitude", default_change_lower_amplitude)
            if self._change_lower_amplitude is True:
                self._change_lower_amplitude = default_change_lower_amplitude
        else:
            self._change_lower_amplitude = default_change_lower_amplitude
        # Compute the first lower and amplitude values
        lowers, amplitudes = [], []
        for a in self.arms:
            l, a = a.lower_amplitude
            lowers.append(l)
            amplitudes.append(a)
        self._first_lowers = np.array(lowers)
        self._first_amplitudes = np.array(amplitudes)
        self._lowers = np.array(lowers)
        self._amplitudes = np.array(amplitudes)

[docs]    def draw(self, armId, t=1):
        """ Return a random sample from the armId-th arm, at time t. Usually t is not used."""
        l_t, a_t = self._lowers[armId], self._amplitudes[armId]
        haschanged, l_tp1, a_tp1 = self._change_lower_amplitude(t, l_t, a_t)
        reward = self.arms[armId].draw(t)
        if haschanged:
            print("Warning: for {}, current l_t, a_t values for arm {} have changed, from {}, {} to {}, {}...".format(self, self.arms[armId], l_t, a_t, l_tp1, a_tp1))  # DEBUG
            self._lowers[armId], self._amplitudes[armId] = l_tp1, a_tp1
        l_of_a, a_of_a = self._first_lowers[armId], self._first_amplitudes[armId]
        # scale it to [0, 1]?
        reward = (reward - l_of_a) / a_of_a
        # now unscale it in the new interval
        reward = l_tp1 + reward * a_tp1
        # finally, be done and return it
        assert l_tp1 <= reward <= l_tp1 + a_tp1, "Error: the new rescaled reward {:.3g} is not in [{:.3g}, {:.3g}]... that shouldn't be possible!".format(reward, l_tp1, l_tp1 + a_tp1)  # DEBUG
        return reward


# --- Utility functions

[docs]def binomialCoefficient(k, n):
    r""" Compute a binomial coefficient :math:`C^n_k` by a direct multiplicative method: :math:`C^n_k = {k \choose n}`.

    - Exact, using integers, not like https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.binom.html#scipy.special.binom which uses float numbers.
    - Complexity: :math`\mathcal{O}(1)` in memory, :math`\mathcal{O}(n)` in time.
    - From https://en.wikipedia.org/wiki/Binomial_coefficient#Binomial_coefficient_in_programming_languages
    - From: http://userpages.umbc.edu/~rcampbel/Computers/Python/probstat.html#ProbStat-Combin-Combinations

    - Examples:

    >>> binomialCoefficient(-3, 10)
    0
    >>> binomialCoefficient(1, -10)
    0
    >>> binomialCoefficient(1, 10)
    10
    >>> binomialCoefficient(5, 10)
    80
    >>> binomialCoefficient(5, 20)
    12960
    >>> binomialCoefficient(10, 30)
    10886400
    """
    if k < 0 or k > n:
        return 0
    if k == 0 or k == n:
        return 1
    k = min(k, n - k)  # take advantage of symmetry
    c = 1
    for i in range(k):
        c *= (n - i) // (i + 1)
    return c


# --- Debugging

if __name__ == "__main__":
    # Code for debugging purposes.
    from doctest import testmod
    print("\nTesting automatically all the docstring written in each functions of this module :")
    testmod(verbose=True)