Source code for Policies.TrekkingTSN

# -*- coding: utf-8 -*-
r""" TrekkingTSN: implementation of the decentralized multi-player policy from [R.Kumar, A.Yadav, S.J.Darak, M.K.Hanawal, Trekking based Distributed Algorithm for Opportunistic Spectrum Access in Infrastructure-less Network, 2018](XXX).

- Each player has 3 states, 1st is channel characterization, 2nd is Trekking phase
- 1st step
    + FIXME
- 2nd step:
    + FIXME
"""
from __future__ import division, print_function  # Python 2 compatibility, division

__author__ = "Lilian Besson"
__version__ = "0.9"

from enum import Enum  # For the different states
import numpy as np

try:
    from .BasePolicy import BasePolicy
except ImportError:
    from BasePolicy import BasePolicy


# --- Functions to compute the optimal choice of Time0 proposed in [Kumar et al., 2018]

[docs]def special_times(nbArms=10, theta=0.01, epsilon=0.1, delta=0.05): r""" Compute the lower-bound suggesting "large-enough" values for the different parameters :math:`T_{RH}`, :math:`T_{SH}` and :math:`T_{TR}` that should guarantee constant regret with probability at least :math:`1 - \delta`, if the gap :math:`\Delta` is larger than :math:`\epsilon` and the smallest mean is larger than :math:`\theta`. .. math:: T_{RH} &= \frac{\log(\frac{\delta}{3 K})}{\log(1 - \theta (1 - \frac{1}{K})^{K-1}))} \\ T_{SH} &= (2 K / \varepsilon^2) \log(\frac{2 K^2}{\delta / 3}) \\ T_{TR} &= \lceil\frac{\log((\delta / 3) K XXX)}{\log(1 - \theta)} \rceil \frac{(K - 1) K}{2}. - Cf. Theorem 1 of [Kumar et al., 2018](XXX). - Examples: >>> nbArms = 8 >>> theta = Delta = 0.07 >>> epsilon = theta >>> delta = 0.1 >>> special_times(nbArms=nbArms, theta=theta, epsilon=epsilon, delta=delta) # doctest: +ELLIPSIS (197, 26949, -280) >>> delta = 0.01 >>> special_times(nbArms=nbArms, theta=theta, epsilon=epsilon, delta=delta) # doctest: +ELLIPSIS (279, 34468, 616) >>> delta = 0.001 >>> special_times(nbArms=nbArms, theta=theta, epsilon=epsilon, delta=delta) # doctest: +ELLIPSIS (362, 41987, 1512) """ K = nbArms XXX = K # FIXME this is unspecified in their paper (see equation (5)) T_RH = int((np.log(delta / (3 * K))) / (np.log(1 - theta * (1 - 1/K)**(K-1)))) T_SH = int((2 * K / epsilon**2) * np.log(2 * K**2 / (delta / 3))) T_TR = int(np.ceil((np.log((delta / 3) * K * XXX)) / (np.log(1 - theta))) * ((K - 1) * K) / 2.) # XXX this T_TR CANNOT be negative ! assert T_RH > 0, "Error: time T_RH cannot be <= 0 but was found = {}...".format(T_RH) # DEBUG assert T_SH > 0, "Error: time T_SH cannot be <= 0 but was found = {}...".format(T_SH) # DEBUG # assert T_TR > 0, "Error: time T_TR cannot be <= 0 but was found = {}...".format(T_TR) # DEBUG return T_RH, T_SH, T_TR
[docs]def boundOnFinalRegret(T_RH, T_SH, T_TR, nbPlayers, nbArms): r""" Use the upper-bound on regret when :math:`T_{RH}`, :math:`T_{SH}` and :math:`T_{TR}` and :math:`M` are known. - The "constant" regret of course grows linearly with :math:`T_{RH}`, :math:`T_{SH}` and :math:`T_{TR}`, as: .. math:: \forall T \geq T_{RH} + T_{SH} + T_{TR}, \;\; R_T \leq M (T_{RH} + (1 - \frac{M}{K}) T_{SH} + T_{TR}). .. warning:: this bound is not a deterministic result, it is only value with a certain probability (at least :math:`1 - \delta`, if :math:`T_{RH}`, :math:`T_{SH}` and :math:`T_{TR}` is chosen as given by :func:`special_times`). - Cf. Theorem 1 of [Kumar et al., 2018](XXX). - Examples: >>> boundOnFinalRegret(197, 26949, -280, 2, 8) # doctest: +ELLIPSIS 40257.5 >>> boundOnFinalRegret(279, 34468, 616, 2, 8) # doctest: +ELLIPSIS 53492.0 >>> boundOnFinalRegret(362, 41987, 1512, 2, 8) # doctest: +ELLIPSIS 66728.5 + For :math:`M=5`: >>> boundOnFinalRegret(197, 26949, -280, 5, 8) # doctest: +ELLIPSIS 50114.375 >>> boundOnFinalRegret(279, 34468, 616, 5, 8) # doctest: +ELLIPSIS 69102.5 >>> boundOnFinalRegret(362, 41987, 1512, 5, 8) # doctest: +ELLIPSIS 88095.625 + For :math:`M=K=8`: >>> boundOnFinalRegret(197, 26949, -280, 8, 8) # doctest: +ELLIPSIS -664.0 # there is something wrong with T_TR ! >>> boundOnFinalRegret(279, 34468, 616, 8, 8) # doctest: +ELLIPSIS 7160.0 >>> boundOnFinalRegret(362, 41987, 1512, 8, 8) # doctest: +ELLIPSIS 14992.0 """ return nbPlayers * (T_RH + T_SH * (1. - float(nbPlayers)/nbArms) + T_TR)
#: Different states during the Musical Chair algorithm State = Enum('State', ['NotStarted', 'ChannelCharacterization', 'TrekkingTSN']) # --- Class TrekkingTSN
[docs]class TrekkingTSN(BasePolicy): """ TrekkingTSN: implementation of the single-player policy from [R.Kumar, A.Yadav, S.J.Darak, M.K.Hanawal, Trekking based Distributed Algorithm for Opportunistic Spectrum Access in Infrastructure-less Network, 2018](XXX). """
[docs] def __init__(self, nbArms, theta=0.01, epsilon=0.1, delta=0.05, lower=0., amplitude=1. ): # Named argument to give them in any order """ - nbArms: number of arms, Example: >>> nbArms = 8 >>> theta, epsilon, delta = 0.01, 0.1, 0.05 >>> player1 = TrekkingTSN(nbArms, theta=theta, epsilon=epsilon, delta=delta) For multi-players use: >>> configuration["players"] = Selfish(NB_PLAYERS, TrekkingTSN, nbArms, theta=theta, epsilon=epsilon, delta=delta).children """ super(TrekkingTSN, self).__init__(nbArms, lower=lower, amplitude=amplitude) self.state = State.NotStarted #: Current state # compute times T_RH, T_SH, T_TR = special_times(nbArms=nbArms, theta=theta, epsilon=epsilon, delta=delta) self.theta = theta #: Parameter :math:`\theta`. self.epsilon = epsilon #: Parameter :math:`\epsilon`. self.delta = delta #: Parameter :math:`\delta`. # Store parameters self.T_RH = T_RH #: Parameter :math:`T_{RH}` computed from :func:`special_times` self.T_SH = T_SH #: Parameter :math:`T_{SH}` computed from :func:`special_times` self.T_CC = T_RH + T_SH #: Parameter :math:`T_{CC} = T_{RH} + T_{SH}` self.T_TR = T_TR #: Parameter :math:`T_{TR}` computed from :func:`special_times` # Internal memory self.last_was_successful = False #: That's the l of the paper self.last_choice = None #: Keep memory of the last choice for CC phase self.cumulatedRewards = np.zeros(nbArms) #: That's the V_n of the paper self.nbObservations = np.zeros(nbArms, dtype=int) #: That's the S_n of the paper self.J = -1 self.lock_channel = False #: That's the L of the paper self.Y = np.zeros(nbArms, dtype=int) # That's the Yi of the paper self.M = np.zeros(nbArms) # That's the Mj of the paper self.index_sort = None # Implementation details self.t = -1 #: Internal times
[docs] def __str__(self): return r"TSN(${} = {:.3g}, {} = {:.3g}$)".format(r"T_{RH}", self.T_RH, r"T_{SH}", self.T_SH) # Use current estimate
[docs] def startGame(self): """ Just reinitialize all the internal memory, and decide how to start (state 1 or 2).""" self.t = -1 # -1 because t += 1 is done in self.choice() self.cumulatedRewards.fill(0) self.nbObservations.fill(0) self.J = -1 self.lock_channel = False self.Y.fill(0) self.M.fill(0) self.index_sort = None self.state = State.ChannelCharacterization
[docs] def choice(self): """ Choose an arm, as described by the Musical Chair algorithm.""" self.t += 1 i = self.last_choice # by default, stay on the current arm if self.state == State.ChannelCharacterization: if self.last_was_successful: # choose next channel sequentially i = (self.last_choice + 1) % self.nbArms else: # randomly choose channel i = np.random.randint(self.nbArms) elif self.state == State.TrekkingTSN: if self.t == self.T_CC or np.sum(self.Y) == 0: i = (self.J - 1) % self.nbArms elif self.lock_channel: i = self.last_choice elif self.Y[self.J] <= self.M[self.J]: i = self.last_choice else: # take next best channel i = (self.last_choice - 1) % self.nbArms self.J = self.last_choice # finally, in Trekking phase, increase Y_J by 1 self.Y[self.J] += 1 # Keep in memory the last choice self.last_choice = i # and use the permutation to aim at the sorted arm if self.state == State.TrekkingTSN and self.index_sort is not None: return self.index_sort[i] else: return i
[docs] def getReward(self, arm, reward): """ Receive a reward on arm of index 'arm', as described by the Musical Chair algorithm. - If not collision, receive a reward after pulling the arm. """ # print("- A TrekkingTSN player receive reward = {} on arm {}, in state {} and time t = {}...".format(reward, arm, self.state, self.t)) # DEBUG # If not collision, receive a reward after pulling the arm if self.state == State.ChannelCharacterization: # Count the observation, update arm cumulated reward self.nbObservations[arm] += 1 # One observation of this arm r_t = (reward - self.lower) / self.amplitude self.cumulatedRewards[arm] += r_t # More reward if r_t > 0: self.last_was_successful = True # else: # assert self.state == State.TrekkingTSN, "Error: state = {} should be TrekkingTSN here.".format(self.state) # DEBUG # pass # And if t = T_CC, we are do with the initial CC phase if self.t >= self.T_CC and self.state == State.ChannelCharacterization: self._endCCPhase()
[docs] def _endCCPhase(self): """ Small computation needed at the end of the initial CC phase.""" # print("\n- A TrekkingTSN player has to switch from ChannelCharacterization to TrekkingTSN ...") # DEBUG self.state = State.TrekkingTSN # Switch ONCE to state 2 # First, we compute the empirical means mu_i # empiricalMeans = (1. + self.cumulatedRewards) / (1. + self.nbObservations) empiricalMeans = self.cumulatedRewards / self.nbObservations # Finally, sort their index by empirical means, decreasing order self.index_sort = np.argsort(-empiricalMeans) self.J = self.index_sort[self.last_choice] for j in range(self.nbArms): self.M[j] = sum(np.ceil(np.log(self.delta / 3.) / (1 - empiricalMeans[self.index_sort[j]])) for i in range(j-1))
[docs] def handleCollision(self, arm, reward=None): """ Handle a collision, on arm of index 'arm'. - Warning: this method has to be implemented in the collision model, it is NOT implemented in the EvaluatorMultiPlayers. """ # print("- A TrekkingTSN player saw a collision on arm {}, in state {}, and time t = {} ...".format(arm, self.state, self.t)) # DEBUG if self.state == State.ChannelCharacterization: # count one more collision in this initial phase (no matter the arm) pass else: assert self.state == State.TrekkingTSN, "Error: state = {} should be TrekkingTSN here.".format(self.state) # DEBUG self.lock_channel = True if self.last_choice == self.J: print("Warning: TrekkingTSN algorithm saw a collision while playing {} and has J = {}, but will lock this channel (forever?), that's not smart... (L = {}).".format(self.last_choice, self.J, self.lock_channel)) # DEBUG self.last_choice = self.J