# -*- coding: utf-8 -*-
""" Selfish: a multi-player policy where every player is selfish, playing on their side.
- without knowing how many players there is,
- and not even knowing that they should try to avoid collisions. When a collision happens, the algorithm simply receive a 0 reward for the chosen arm.
"""
from __future__ import division, print_function # Python 2 compatibility
__author__ = "Lilian Besson"
__version__ = "0.5"
try:
from .BaseMPPolicy import BaseMPPolicy
from .ChildPointer import ChildPointer
except ImportError:
from BaseMPPolicy import BaseMPPolicy
from ChildPointer import ChildPointer
[docs]class SelfishChildPointer(ChildPointer):
""" Selfish version of the ChildPointer class (just pretty printed)."""
[docs] def __str__(self):
m, p = str(self.mother.__class__.__name__), str(self.mother._players[self.playerId])
# XXX Small hack to give a better name to MEGA or MusicalChair
if p.startswith("MEGA") or p.startswith("MusicalChair"):
return "#{}<{}>".format(self.playerId + 1, p)
else:
return "#{}<{}-{}>".format(self.playerId + 1, m, p)
# PENALTY = -1
# PENALTY = 0
#: Customize here the value given to a user after a collision
#: XXX If it is None, then player.lower (default to 0) is used instead
PENALTY = None
[docs]class Selfish(BaseMPPolicy):
""" Selfish: a multi-player policy where every player is selfish, playing on their side.
- without nowing how many players there is, and
- not even knowing that they should try to avoid collisions. When a collision happens, the algorithm simply receives a 0 reward for the chosen arm (can be changed with penalty= argument).
"""
[docs] def __init__(self, nbPlayers, nbArms, playerAlgo, penalty=PENALTY, *args, **kwargs):
"""
- nbPlayers: number of players to create (in self._players).
- playerAlgo: class to use for every players.
- nbArms: number of arms, given as first argument to playerAlgo.
- `*args`, `**kwargs`: arguments, named arguments, given to playerAlgo.
Examples:
>>> from Policies import *
>>> import random; random.seed(0); import numpy as np; np.random.seed(0)
>>> nbArms = 17
>>> nbPlayers = 6
>>> s = Selfish(nbPlayers, nbArms, Uniform)
>>> [ child.choice() for child in s.children ]
[12, 13, 1, 8, 16, 15]
>>> [ child.choice() for child in s.children ]
[12, 9, 15, 11, 6, 16]
- To get a list of usable players, use ``s.children``.
- Warning: ``s._players`` is for internal use ONLY!
.. warning:: I want my code to stay compatible with Python 2, so I cannot use the `new syntax of keyword-only argument <https://www.python.org/dev/peps/pep-3102/>`_. It would make more sense to have ``*args, penalty=PENALTY, lower=0., amplitude=1., **kwargs`` instead of ``penalty=PENALTY, *args, **kwargs`` but I can't.
"""
assert nbPlayers > 0, "Error, the parameter 'nbPlayers' for Selfish class has to be > 0."
self.nbPlayers = nbPlayers #: Number of players
self.penalty = penalty #: Penalty = reward given in case of collision
self._players = [None] * nbPlayers
self.children = [None] * nbPlayers #: List of children, fake algorithms
self.nbArms = nbArms #: Number of arms
for playerId in range(nbPlayers):
self._players[playerId] = playerAlgo(nbArms, *args, **kwargs) # Create it here!
self.children[playerId] = SelfishChildPointer(self, playerId)
if hasattr(self._players[playerId], 'handleCollision'): # XXX they should not have such method!
print("Warning: Selfish found a player #{} which has a method 'handleCollision' : Selfish should NOT be used with bandit algorithms aware of collision-avoidance!".format(playerId)) # DEBUG
# raise ValueError("Invalid child policy {} for Selfish algorithm! It should not have a collision avoidance protocol!".format(self._players[playerId]))
[docs] def __str__(self):
return "Selfish({} x {})".format(self.nbPlayers, str(self._players[0]))
# --- Proxy methods
[docs] def _handleCollision_one(self, playerId, arm, reward=None):
"""Give a reward of 0, or player.lower, or self.penalty, in case of collision."""
# Selfish UCB indexes learn on the SUCCESSFUL TRANSMISSIONS (ie. ACK), not on the sensing!
if reward is not None:
print("Warning: Selfish internal indexes does NOT get updated by reward, but by 0, in case of collision, learning is done on SUCCESSFUL TRANSMISSIONS (ie. ACK), not sensing!") # DEBUG
player = self._players[playerId]
player.getReward(arm, getattr(player, 'lower', 0) if self.penalty is None else self.penalty)