Source code for PoliciesMultiPlayers.CentralizedIMP

# -*- coding: utf-8 -*-
""" CentralizedIMP: a multi-player policy where ONE policy is used by a centralized agent; asking the policy to select nbPlayers arms at each step, using an hybrid strategy: choose nb-1 arms with maximal empirical averages, then 1 arm with maximal index. Cf. algorithm IMP-TS [Komiyama, Honda, Nakagawa, 2016, arXiv 1506.00779].
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.2"

import numpy as np

from .CentralizedMultiplePlay import CentralizedMultiplePlay


# --- Class for the mother

[docs]class CentralizedIMP(CentralizedMultiplePlay): """ CentralizedIMP: a multi-player policy where ONE policy is used by a centralized agent; asking the policy to select nbPlayers arms at each step, using an hybrid strategy: choose nb-1 arms with maximal empirical averages, then 1 arm with maximal index. Cf. algorithm IMP-TS [Komiyama, Honda, Nakagawa, 2016, arXiv 1506.00779]. """
[docs] def _choice_one(self, playerId): """Use `choiceIMP` for each player.""" if playerId == 0: # For the first player, run the method # FIXED sort it then apply affectation_order, to fix its order ==> will have a fixed nb of switches for CentralizedMultiplePlay if self.uniformAllocation: self.choices = self.player.choiceIMP(self.nbPlayers) else: self.choices = np.sort(self.player.choiceIMP(self.nbPlayers))[self.affectation_order] # XXX Increasing order... # self.choices = np.sort(self.player.choiceMultiple(self.nbPlayers))[self.affectation_order][::-1] # XXX Decreasing order... # print("At time t = {} the {} centralized policy chosed arms = {} ...".format(self.player.t, self, self.choices)) # DEBUG # For the all players, use the pre-computed result return self.choices[playerId]