Source code for PoliciesMultiPlayers.ALOHA

# -*- coding: utf-8 -*-
""" ALOHA: generalized implementation of the single-player policy from [Concurrent bandits and cognitive radio network, O.Avner & S.Mannor, 2014](https://arxiv.org/abs/1404.5421), for a generic single-player policy.

This policy uses the collision avoidance mechanism that is inspired by the classical ALOHA protocol, and any single-player policy.
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.9"

from random import random
import numpy as np
import numpy.random as rn

try:
    from .BaseMPPolicy import BaseMPPolicy
    from .ChildPointer import ChildPointer
    from .with_proba import with_proba
except ImportError:
    from BaseMPPolicy import BaseMPPolicy
    from ChildPointer import ChildPointer
    from with_proba import with_proba



# --- Functions to define [t, t + tnext] intervals

[docs]def tnext_beta(t, beta=0.5):
    r""" Simple function, as used in MEGA: ``upper_tnext(t)`` = :math:`t^{\beta}`. Default to :math:`t^{0.5}`.

    >>> tnext_beta(100, beta=0.1)  # doctest: +ELLIPSIS
    1.584...
    >>> tnext_beta(100, beta=0.5)
    10.0
    >>> tnext_beta(100, beta=0.9)  # doctest: +ELLIPSIS
    63.095...
    >>> tnext_beta(1000)  # doctest: +ELLIPSIS
    31.622...
    """
    return t ** beta


[docs]def make_tnext_beta(beta=0.5):
    r""" Returns the function :math:`t \mapsto t^{\beta}`.

    >>> tnext = make_tnext_beta(0.5)
    >>> tnext(100)
    10.0
    >>> tnext(1000)  # doctest: +ELLIPSIS
    31.622...
    """
    def tnext(t):
        return t ** beta
    return tnext


[docs]def tnext_log(t, scaling=1.):
    r""" Other function, not the one used in MEGA, but our proposal: ``upper_tnext(t)`` = :math:`\text{scaling} * \log(1 + t)`.

    >>> tnext_log(100, scaling=1)  # doctest: +ELLIPSIS
    4.615...
    >>> tnext_log(100, scaling=10)  # doctest: +ELLIPSIS
    46.151...
    >>> tnext_log(100, scaling=100)  # doctest: +ELLIPSIS
    461.512...
    >>> tnext_log(1000)  # doctest: +ELLIPSIS
    6.908...
    """
    return scaling * np.log(1 + t)


[docs]def make_tnext_log_scaling(scaling=1.):
    r""" Returns the function :math:`t \mapsto \text{scaling} * \log(1 + t)`.

    >>> tnext = make_tnext_log_scaling(1)
    >>> tnext(100)  # doctest: +ELLIPSIS
    4.615...
    >>> tnext(1000)  # doctest: +ELLIPSIS
    6.908...
    """
    def tnext(t):
        return scaling * np.log(1 + t)
    return tnext


# --- Class oneALOHA, for children

[docs]class oneALOHA(ChildPointer):
    """ Class that acts as a child policy, but in fact it pass all its method calls to the mother class, who passes it to its i-th player.

    - Except for the handleCollision method: the ALOHA collision avoidance protocol is implemented here.
    """

[docs]    def __init__(self, nbPlayers, mother, playerId, nbArms,
                 p0=0.5, alpha_p0=0.5, ftnext=tnext_beta, beta=None):
        super(oneALOHA, self).__init__(mother, playerId)
        self.nbPlayers = nbPlayers  #: Number of players
        # Parameters for the ALOHA protocol
        assert 0 <= p0 <= 1, "Error: parameter 'p0' for a ALOHA player should be in [0, 1]."  # DEBUG
        self.p0 = p0  #: Initial probability, should not be modified
        self.p = p0   #: Current probability, can be modified
        assert 0 < alpha_p0 <= 1, "Error: parameter 'alpha_p0' for a ALOHA player should be in (0, 1]."  # DEBUG
        self.alpha_p0 = alpha_p0  #: Parameter alpha for the recurrence equation for probability p(t)
        # Parameters for the ftnext function
        self.beta = beta  #: Parameter beta
        self._ftnext = ftnext  # Function to know how long arms are tagged as unavailable. Can be a callable or None
        # Find the name of the function
        if ftnext is None:
            if beta > 1:
                self._ftnext_name = "t^{%.3g}" % beta
            elif 0 < beta < 1:
                self._ftnext_name = r"\sqrt[%.3g]{t}" % (1. / beta)
            else:
                self._ftnext_name = "t"
        elif ftnext == tnext_log:
            self._ftnext_name = r"\log(t)"
        elif ftnext == tnext_beta:
            self._ftnext_name = r"\sqrt{t}"
        else:
            self._ftnext_name = self._ftnext.__name__.replace("tnext_", "")
        # Internal memory
        self.tnext = np.zeros(nbArms, dtype=int)  #: Only store the delta time
        self.t = -1  #: Internal time
        self.chosenArm = None  #: Last chosen arm

[docs]    def __str__(self):
        return r"#{}<ALOHA({}, $p_0={:.3g}$, $\alpha={:.3g}$, $f(t)={}$)>".format(self.playerId + 1, self.mother._players[self.playerId], self.p0, self.alpha_p0, self._ftnext_name)

[docs]    def startGame(self):
        """Start game."""
        self.mother._startGame_one(self.playerId)
        self.t = 0
        self.p = self.p0
        self.tnext.fill(0)
        self.chosenArm = None

[docs]    def ftnext(self, t):
        """Time until the arm is removed from list of unavailable arms."""
        if self.beta is not None:
            return t ** self.beta
        else:
            return self._ftnext(t)

[docs]    def getReward(self, arm, reward):
        """ Receive a reward on arm of index 'arm', as described by the ALOHA protocol.

        - If not collision, receive a reward after pulling the arm.
        """
        # print(" - A oneALOHA player received reward = {} on arm {}, at time t = {}...".format(reward, arm, self.t))  # DEBUG
        self.mother._getReward_one(self.playerId, arm, reward)
        self.p = self.p * self.alpha_p0 + (1 - self.alpha_p0)  # Update proba p

[docs]    def handleCollision(self, arm, reward=None):
        """ Handle a collision, on arm of index 'arm'.

        .. warning:: This method has to be implemented in the collision model, it is NOT implemented in the EvaluatorMultiPlayers.

        .. note:: We do not care on which arm the collision occured.
        """
        # print(" ---------> A oneALOHA player saw a collision on arm {}, at time t = {} ... Currently, p = {} ...".format(arm, self.t, self.p))  # DEBUG
        # self.getReward(arm, self.mother.lower)  # FIXED should we give a 0 reward ? Not in this model!
        # 1. With proba 1 - p, give up
        if with_proba(1 - self.p):
            # Random time offset until when this arm self.chosenArm is not sampled
            delta_tnext_k = rn.randint(low=0, high=1 + int(self.ftnext(self.t)))
            self.tnext[self.chosenArm] = self.t + 1 + delta_tnext_k
            # print("   - Reaction to collision on arm {}, at time t = {} : delta_tnext_k = {}, tnext[{}] = {} ...".format(arm, self.t, delta_tnext_k, self.chosenArm, self.tnext[self.chosenArm]))  # DEBUG
            self.p = self.p0  # Reinitialize the proba p
            self.chosenArm = None  # We give up this arm
        # 2. With proba p, persist: nothing to do
        # else:
        #     pass

[docs]    def choice(self):
        """ Identify the available arms, and use the underlying single-player policy (UCB, Thompson etc) to choose an arm from this sub-set of arms.
        """
        self.t += 1
        if self.chosenArm is not None:
            # We can still exploit that arm
            pass
        else:
            # We have to chose a new arm
            availableArms = np.nonzero(self.tnext <= self.t)[0]  # Identify available arms
            result = self.mother._choiceFromSubSet_one(self.playerId, availableArms)
            # print("\n - A oneALOHA player {} had to choose an arm among the set of available arms = {}, her choice was : {}, at time t = {} ...".format(self, availableArms, result, self.t))  # DEBUG
            self.chosenArm = result
        return self.chosenArm


# --- Class ALOHA

[docs]class ALOHA(BaseMPPolicy):
    """ ALOHA: implementation of the multi-player policy from [Concurrent bandits and cognitive radio network, O.Avner & S.Mannor, 2014](https://arxiv.org/abs/1404.5421), for a generic single-player policy.
    """

[docs]    def __init__(self, nbPlayers, nbArms, playerAlgo,
                 p0=0.5, alpha_p0=0.5, ftnext=tnext_beta, beta=None,
                 *args, **kwargs):  # Named argument to give them in any order
        """
        - nbPlayers: number of players to create (in self._players).
        - playerAlgo: class to use for every players.
        - nbArms: number of arms, given as first argument to playerAlgo.

        - p0: initial probability p(0); p(t) is the probability of persistance on the chosenArm at time t
        - alpha_p0: scaling in the update for p[t+1] <- alpha_p0 p[t] + (1 - alpha_p0)
        - ftnext: general function, default to t -> t^beta, to know from where to sample a random time t_next(k), until when the chosenArm is unavailable. t -> log(1 + t) is also possible.
        - (optional) beta: if present, overwrites ftnext, which will be t --> t^beta.

        - `*args`, `**kwargs`: arguments, named arguments, given to playerAlgo.

        Example:

        >>> from Policies import *
        >>> import random; random.seed(0); import numpy as np; np.random.seed(0)
        >>> nbArms = 17
        >>> nbPlayers = 6
        >>> p0, alpha_p0 = 0.6, 0.5
        >>> s = ALOHA(nbPlayers, nbArms, Thompson, p0=p0, alpha_p0=alpha_p0, ftnext=tnext_log)
        >>> [ child.choice() for child in s.children ]
        [6, 11, 8, 4, 8, 8]
        >>> s = ALOHA(nbPlayers, nbArms, UCBalpha, p0=p0, alpha_p0=alpha_p0, beta=0.5, alpha=1)
        >>> [ child.choice() for child in s.children ]
        [1, 0, 5, 2, 15, 3]

        - To get a list of usable players, use ``s.children``.
        - Warning: ``s._players`` is for internal use ONLY!
        """
        assert nbPlayers > 0, "Error, the parameter 'nbPlayers' for rhoRand class has to be > 0."
        self.nbPlayers = nbPlayers  #: Number of players
        self.nbArms = nbArms  #: Number of arms
        # Internal memory
        self._players = [None] * nbPlayers
        self.children = [None] * nbPlayers  #: List of children, fake algorithms
        for playerId in range(nbPlayers):
            # Initialize internal algorithm (eg. UCB, Thompson etc)
            self._players[playerId] = playerAlgo(nbArms, *args, **kwargs)
            # Initialize proxy child
            self.children[playerId] = oneALOHA(nbPlayers, self, playerId, nbArms, p0=p0, alpha_p0=alpha_p0, ftnext=ftnext, beta=beta)

[docs]    def __str__(self):
        return "ALOHA({} x {})".format(self.nbPlayers, str(self._players[0]))