# -*- coding: utf-8 -*-
""" CentralizedMultiplePlay: a multi-player policy where ONE policy is used by a centralized agent; asking the policy to select nbPlayers arms at each step.
"""
from __future__ import division, print_function # Python 2 compatibility
__author__ = "Lilian Besson"
__version__ = "0.2"
import numpy as np
try:
from .BaseMPPolicy import BaseMPPolicy
from .ChildPointer import ChildPointer
except ImportError:
from BaseMPPolicy import BaseMPPolicy
from ChildPointer import ChildPointer
# --- Class for a child player
[docs]class CentralizedChildPointer(ChildPointer):
""" Centralized version of the ChildPointer class."""
[docs] def __str__(self):
return "#{}<{}({})>".format(self.playerId + 1, self.mother.__class__.__name__, self.mother.player)
[docs] def __repr__(self):
return "{}({})".format(self.mother.__class__.__name__, self.mother.player)
# --- Class for the mother
[docs]class CentralizedMultiplePlay(BaseMPPolicy):
""" CentralizedMultiplePlay: a multi-player policy where ONE policy is used by a centralized agent; asking the policy to select nbPlayers arms at each step.
"""
[docs] def __init__(self, nbPlayers, nbArms, playerAlgo, uniformAllocation=False, *args, **kwargs):
"""
- nbPlayers: number of players to create (in self._players).
- playerAlgo: class to use for every players.
- nbArms: number of arms, given as first argument to playerAlgo.
- uniformAllocation: Should the affectations of users always be uniform, or fixed when UCB indexes have converged? First choice is more fair, but linear nb of switches, second choice is not fair, but cst nb of switches.
- `*args`, `**kwargs`: arguments, named arguments, given to playerAlgo.
Examples:
>>> from Policies import *
>>> s = CentralizedMultiplePlay(2, 3, UCB)
>>> [ child.choice() for child in s.children ]
[2, 0]
- To get a list of usable players, use ``s.children``.
- Warning: ``s._players`` is for internal use ONLY!
"""
assert nbPlayers > 0, "Error, the parameter 'nbPlayers' for CentralizedMultiplePlay class has to be > 0."
self.nbPlayers = nbPlayers #: Number of players
self.player = playerAlgo(nbArms, *args, **kwargs) #: Only one policy
self.children = [None] * nbPlayers #: But nbPlayers children, fake algorithms
self.nbArms = nbArms #: Number of arms
for playerId in range(nbPlayers):
self.children[playerId] = CentralizedChildPointer(self, playerId)
print(" - One new child, of index {}, and class {} ...".format(playerId, self.children[playerId])) # DEBUG
#: Option: in case of multiplay plays, should the affectations of users always be uniform, or fixed when UCB indexes have converged? First choice is more fair, but linear nb of switches, second choice is not fair, but cst nb of switches
self.uniformAllocation = uniformAllocation
# Internal memory
self.choices = (-10000) * np.ones(nbArms, dtype=int) #: Choices, given by first call to internal algorithm
self.affectation_order = np.random.permutation(nbPlayers) #: Affectation of choices to players
[docs] def __str__(self):
return "{}({} x {}{})".format(self.__class__.__name__, self.nbPlayers, str(self.player), ", shuffle" if self.uniformAllocation else "")
# --- Proxy methods
[docs] def _startGame_one(self, playerId):
"""Pass the call to the player algorithm."""
if playerId == 0: # For the first player, run the method
self.player.startGame()
# For the other players, nothing to do? Yes
self.affectation_order = np.random.permutation(self.nbPlayers)
[docs] def _getReward_one(self, playerId, arm, reward):
"""Pass the call to the player algorithm."""
self.player.getReward(arm, reward)
# if playerId != 0: # FIXME? We have to be sure that the internal player.t is not messed up
# if hasattr(self.player, 't'):
# self.player.t -= 1
[docs] def _choice_one(self, playerId):
"""Use the player algorithm for the 1st decision, for each players, then use it."""
if playerId == 0: # For the first player, run the method
# FIXED sort it then apply affectation_order, to fix its order ==> will have a fixed nb of switches for CentralizedMultiplePlay
if self.uniformAllocation:
self.choices = self.player.choiceMultiple(self.nbPlayers)
else:
self.choices = np.sort(self.player.choiceMultiple(self.nbPlayers))[self.affectation_order] # XXX Increasing order...
# self.choices = np.sort(self.player.choiceMultiple(self.nbPlayers))[self.affectation_order][::-1] # XXX Decreasing order...
# print("At time t = {} the {} centralized policy chosed arms = {} ...".format(self.player.t, self, self.choices)) # DEBUG
# For the all players, use the pre-computed result
return self.choices[playerId]
[docs] def _handleCollision_one(self, playerId, arm, reward=None):
"""Cannot be called!"""
raise ValueError("Error: a {} policy should always aim at orthogonal arms, so no collision should be observed, but player {} saw a collision on arm {} ...".format(self.__class__.__name__, playerId, arm))
[docs] def _estimatedOrder_one(self, playerId):
"""Use the centralized algorithm to estimate ranking of the arms."""
return self.player.estimatedOrder()