# -*- coding: utf-8 -*-
""" Scenario1: make a set of M experts with the following behavior, for K = 2 arms: at every round, one of them is chosen uniformly to predict arm 0, and the rest predict 1.
- Reference: Beygelzimer, A., Langford, J., Li, L., Reyzin, L., & Schapire, R. E. (2011, April). Contextual Bandit Algorithms with Supervised Learning Guarantees. In AISTATS (pp. 19-26).
"""
from __future__ import division, print_function # Python 2 compatibility
__author__ = "Lilian Besson"
__version__ = "0.1"
import numpy as np
from .BaseMPPolicy import BaseMPPolicy
from .ChildPointer import ChildPointer
# --- Class for a child player
[docs]class OneScenario1(ChildPointer):
""" OneScenario1: at every round, one of them is chosen uniformly to predict arm 0, and the rest predict 1.
"""
[docs] def __init__(self, mother, playerId):
super(OneScenario1, self).__init__(mother, playerId)
[docs] def __str__(self):
return "#{}<OneScenario1>".format(self.playerId + 1)
[docs] def __repr__(self):
return "OneScenario1"
# --- Class for the mother
[docs]class Scenario1(BaseMPPolicy):
""" Scenario1: make a set of M experts with the following behavior, for K = 2 arms: at every round, one of them is chosen uniformly to predict arm 0, and the rest predict 1.
- Reference: Beygelzimer, A., Langford, J., Li, L., Reyzin, L., & Schapire, R. E. (2011, April). Contextual Bandit Algorithms with Supervised Learning Guarantees. In AISTATS (pp. 19-26).
"""
[docs] def __init__(self, nbPlayers, nbArms, lower=0., amplitude=1.):
"""
- nbPlayers: number of players to create (in self._players).
Examples:
>>> s = Scenario1(10)
- To get a list of usable players, use ``s.children``.
- Warning: ``s._players`` is for internal use
"""
assert nbPlayers > 0, "Error, the parameter 'nbPlayers' for Scenario1 class has to be > 0."
assert nbArms >= 2, "Error, the parameter 'nbArms' for Scenario1 class can only be >= 2." # DEBUG
# Attributes
self.nbPlayers = nbPlayers
self.nbArms = nbArms
self.chosenOne = None
# Internal object memory
self.children = [None] * nbPlayers
for playerId in range(nbPlayers):
self.children[playerId] = OneScenario1(self, playerId)
# print(" - One new child, of index {}, and class {} ...".format(playerId, self.children[playerId])) # DEBUG
[docs] def __str__(self):
return "Scenario1({})".format(self.nbPlayers)
[docs] def _startGame_one(self, playerId):
self.chosenOne = np.random.randint(self.nbPlayers) # New random choice
[docs] def _getReward_one(self, playerId, arm, reward):
pass
[docs] def _choice_one(self, playerId):
if playerId == 0: # For the first player, chose a new chosenOne
self.chosenOne = np.random.randint(self.nbPlayers) # New random choice
# print(" Currently, the only sub-player that can pull arm #0 is", self.chosenOne, "and playerId =", playerId) # DEBUG
if self.chosenOne == playerId:
return 0 # Choose worse arm
else:
if self.nbArms > 2:
return np.random.randint(low=1, high=1 + self.nbArms) # to be general for nbArms > 2 setting
else:
return 1 # Choose best arm