Source code for PoliciesMultiPlayers.Scenario1

# -*- coding: utf-8 -*-
""" Scenario1: make a set of M experts with the following behavior, for K = 2 arms: at every round, one of them is chosen uniformly to predict arm 0, and the rest predict 1.

- Reference: Beygelzimer, A., Langford, J., Li, L., Reyzin, L., & Schapire, R. E. (2011, April). Contextual Bandit Algorithms with Supervised Learning Guarantees. In AISTATS (pp. 19-26).
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.1"

import numpy as np

from .BaseMPPolicy import BaseMPPolicy
from .ChildPointer import ChildPointer

# --- Class for a child player

[docs]class OneScenario1(ChildPointer): """ OneScenario1: at every round, one of them is chosen uniformly to predict arm 0, and the rest predict 1. """
[docs] def __init__(self, mother, playerId): super(OneScenario1, self).__init__(mother, playerId)
[docs] def __str__(self): return "#{}<OneScenario1>".format(self.playerId + 1)
[docs] def __repr__(self): return "OneScenario1"
# --- Class for the mother
[docs]class Scenario1(BaseMPPolicy): """ Scenario1: make a set of M experts with the following behavior, for K = 2 arms: at every round, one of them is chosen uniformly to predict arm 0, and the rest predict 1. - Reference: Beygelzimer, A., Langford, J., Li, L., Reyzin, L., & Schapire, R. E. (2011, April). Contextual Bandit Algorithms with Supervised Learning Guarantees. In AISTATS (pp. 19-26). """
[docs] def __init__(self, nbPlayers, nbArms, lower=0., amplitude=1.): """ - nbPlayers: number of players to create (in self._players). Examples: >>> s = Scenario1(10) - To get a list of usable players, use ``s.children``. - Warning: ``s._players`` is for internal use """ assert nbPlayers > 0, "Error, the parameter 'nbPlayers' for Scenario1 class has to be > 0." assert nbArms >= 2, "Error, the parameter 'nbArms' for Scenario1 class can only be >= 2." # DEBUG # Attributes self.nbPlayers = nbPlayers self.nbArms = nbArms self.chosenOne = None # Internal object memory self.children = [None] * nbPlayers for playerId in range(nbPlayers): self.children[playerId] = OneScenario1(self, playerId)
# print(" - One new child, of index {}, and class {} ...".format(playerId, self.children[playerId])) # DEBUG
[docs] def __str__(self): return "Scenario1({})".format(self.nbPlayers)
[docs] def _startGame_one(self, playerId): self.chosenOne = np.random.randint(self.nbPlayers) # New random choice
[docs] def _getReward_one(self, playerId, arm, reward): pass
[docs] def _choice_one(self, playerId): if playerId == 0: # For the first player, chose a new chosenOne self.chosenOne = np.random.randint(self.nbPlayers) # New random choice # print(" Currently, the only sub-player that can pull arm #0 is", self.chosenOne, "and playerId =", playerId) # DEBUG if self.chosenOne == playerId: return 0 # Choose worse arm else: if self.nbArms > 2: return np.random.randint(low=1, high=1 + self.nbArms) # to be general for nbArms > 2 setting else: return 1 # Choose best arm