Source code for Policies.BasePolicy

# -*- coding: utf-8 -*-
""" Base class for any policy.

- If rewards are not in [0, 1], be sure to give the lower value and the amplitude. Eg, if rewards are in [-3, 3], lower = -3, amplitude = 6.
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.9"

import numpy as np

#: If True, every time a reward is received, a warning message is displayed if it lies outsides of ``[lower, lower + amplitude]``.
CHECKBOUNDS = True
CHECKBOUNDS = False


[docs]class BasePolicy(object): """ Base class for any policy."""
[docs] def __init__(self, nbArms, lower=0., amplitude=1.): """ New policy.""" # Parameters assert nbArms > 0, "Error: the 'nbArms' parameter of a {} object cannot be <= 0.".format(self) # DEBUG self.nbArms = nbArms #: Number of arms self.lower = lower #: Lower values for rewards assert amplitude > 0, "Error: the 'amplitude' parameter of a {} object cannot be <= 0.".format(self) # DEBUG self.amplitude = amplitude #: Larger values for rewards # Internal memory self.t = 0 #: Internal time self.pulls = np.zeros(nbArms, dtype=int) #: Number of pulls of each arms self.rewards = np.zeros(nbArms) #: Cumulated rewards of each arms
[docs] def __str__(self): """ -> str""" return self.__class__.__name__
# --- Start game, and receive rewards
[docs] def startGame(self): """ Start the game (fill pulls and rewards with 0).""" self.t = 0 self.pulls.fill(0) self.rewards.fill(0)
if CHECKBOUNDS: # XXX useless checkBounds feature def getReward(self, arm, reward): """ Give a reward: increase t, pulls, and update cumulated sum of rewards for that arm (normalized in [0, 1]).""" self.t += 1 self.pulls[arm] += 1 # XXX we could check here if the reward is outside the bounds if not 0 <= reward - self.lower <= self.amplitude: print("Warning: {} received on arm {} a reward = {:.3g} that is outside the interval [{:.3g}, {:.3g}] : the policy will probably fail to work correctly...".format(self, arm, reward, self.lower, self.lower + self.amplitude)) # DEBUG # else: # print("Info: {} received on arm {} a reward = {:.3g} that is inside the interval [{:.3g}, {:.3g}]".format(self, arm, reward, self.lower, self.lower + self.amplitude)) # DEBUG reward = (reward - self.lower) / self.amplitude self.rewards[arm] += reward else: # It's faster to define two methods and pick one # (one test in init, that's it) # rather than doing the test in the method
[docs] def getReward(self, arm, reward): """ Give a reward: increase t, pulls, and update cumulated sum of rewards for that arm (normalized in [0, 1]).""" self.t += 1 self.pulls[arm] += 1 reward = (reward - self.lower) / self.amplitude self.rewards[arm] += reward
# --- Basic choice() and handleCollision() method
[docs] def choice(self): """ Not defined.""" raise NotImplementedError("This method choice() has to be implemented in the child class inheriting from BasePolicy.")
# def handleCollision(self, arm, reward=None): # """ Default to give a 0 reward (or ``self.lower``).""" # # print("DEBUG BasePolicy.handleCollision({}, {}) was called...".format(arm, reward)) # DEBUG # # self.getReward(arm, self.lower if reward is None else reward) # self.getReward(arm, self.lower) # # raise NotImplementedError("This method handleCollision() has to be implemented in the child class inheriting from BasePolicy.") # --- Others choice...() methods, partly implemented
[docs] def choiceWithRank(self, rank=1): """ Not defined.""" if rank == 1: return self.choice() else: raise NotImplementedError("This method choiceWithRank(rank) has to be implemented in the child class inheriting from BasePolicy.")
[docs] def choiceFromSubSet(self, availableArms='all'): """ Not defined.""" if availableArms == 'all': return self.choice() else: raise NotImplementedError("This method choiceFromSubSet(availableArms) has to be implemented in the child class inheriting from BasePolicy.")
[docs] def choiceMultiple(self, nb=1): """ Not defined.""" if nb == 1: return np.array([self.choice()]) else: raise NotImplementedError("This method choiceMultiple(nb) has to be implemented in the child class inheriting from BasePolicy.")
[docs] def choiceIMP(self, nb=1, startWithChoiceMultiple=True): """ Not defined.""" if nb == 1: return np.array([self.choice()]) else: return self.choiceMultiple(nb=nb)
[docs] def estimatedOrder(self): """ Return the estimate order of the arms, as a permutation on [0..K-1] that would order the arms by increasing means. - For a base policy, it is completely random. """ return np.random.permutation(self.nbArms)