Source code for Policies.GenericAggregation

# -*- coding: utf-8 -*-
r""" The GenericAggregation aggregation bandit algorithm: use a bandit policy A (master), managing several "slave" algorithms, :math:`A_1, ..., A_N`.

- At every step, one slave algorithm A_i is selected, by the master policy A.
- Then its decision is listen to, played by the master algorithm, and a feedback reward is received.
- All slaves receive the observation (arm, reward).
- The master also receives the same observation.
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.9"

from random import random
import numpy as np
import numpy.random as rn
try:
    from .BasePolicy import BasePolicy
    from .with_proba import with_proba
except ImportError:
    from BasePolicy import BasePolicy
    from with_proba import with_proba


# --- GenericAggregation algorithm

[docs]class GenericAggregation(BasePolicy):
    """ The GenericAggregation aggregation bandit algorithm."""

[docs]    def __init__(self, nbArms, master=None, children=None,
            lower=0., amplitude=1.
        ):
        # Attributes
        self.nbArms = nbArms  #: Number of arms.
        self.lower = lower  #: Lower values for rewards.
        self.amplitude = amplitude  #: Larger values for rewards.
        self.last_choice = 0  #: Remember the index of the last child trusted for a decision.
        self.nbChildren = nbChildren = len(children)  #: Number N of slave algorithms.

        # Internal object memory
        self.master = None
        if isinstance(master, dict):
            print("  Creating this master player from a dictionnary 'master' = {} ...".format(master))  # DEBUG
            localparams = {'lower': lower, 'amplitude': amplitude}
            localparams.update(master['params'])
            self.master = master['archtype'](nbChildren, **localparams)
        elif isinstance(master, type):
            print("  Using this not-yet created player 'master' = {} ...".format(master))  # DEBUG
            self.master = master(nbChildren, lower=lower, amplitude=amplitude)  # Create it here
        else:
            print("  Using this already created player 'master' = {} ...".format(master))  # DEBUG
            self.master = master

        self.children = []  #: List of slave algorithms.
        for i, child in enumerate(children):
            if isinstance(child, dict):
                print("  Creating this child player from a dictionnary 'children[{}]' = {} ...".format(i, child))  # DEBUG
                localparams = {'lower': lower, 'amplitude': amplitude}
                localparams.update(child['params'])
                self.children.append(child['archtype'](nbArms, **localparams))
            elif isinstance(child, type):
                print("  Using this not-yet created player 'children[{}]' = {} ...".format(i, child))  # DEBUG
                self.children.append(child(nbArms, lower=lower, amplitude=amplitude))  # Create it here!
            else:
                print("  Using this already created player 'children[{}]' = {} ...".format(i, child))  # DEBUG
                self.children.append(child)

[docs]    def __str__(self):
        """ Nicely print the name of the algorithm with its relevant parameters."""
        return r"GenericAggr({}, $N={}$)".format(self.master, self.nbChildren)

    # --- Start the game

[docs]    def startGame(self):
        """ Start the game for each child, and for the master."""
        self.master.startGame()
        for i in range(self.nbChildren):
            self.children[i].startGame()

    # --- Get a reward

[docs]    def getReward(self, arm, reward):
        """ Give reward for each child, and for the master."""
        self.master.getReward(self.last_choice, reward)
        for i in range(self.nbChildren):
            self.children[i].getReward(arm, reward)

    # --- Choice of arm methods

[docs]    def choice(self):
        """ Trust one of the slave and listen to his `choice`."""
        # 1. first decide who to listen to
        self.last_choice = self.master.choice()
        # 2. then listen to him
        return self.children[self.last_choice].choice()

[docs]    def choiceWithRank(self, rank=1):
        """ Trust one of the slave and listen to his `choiceWithRank`."""
        # 1. first decide who to listen to
        self.last_choice = self.master.choice()
        # 2. then listen to him
        return self.children[self.last_choice].choiceWithRank(rank=rank)

[docs]    def choiceFromSubSet(self, availableArms='all'):
        """ Trust one of the slave and listen to his `choiceFromSubSet`."""
        # 1. first decide who to listen to
        self.last_choice = self.master.choice()
        # 2. then listen to him
        return self.children[self.last_choice].choiceFromSubSet(availableArms=availableArms)

[docs]    def choiceMultiple(self, nb=1):
        """ Trust one of the slave and listen to his `choiceMultiple`."""
        # 1. first decide who to listen to
        self.last_choice = self.master.choice()
        # 2. then listen to him
        return self.children[self.last_choice].choiceMultiple(nb=nb)

[docs]    def choiceIMP(self, nb=1, startWithChoiceMultiple=True):
        """ Trust one of the slave and listen to his `choiceIMP`."""
        # 1. first decide who to listen to
        self.last_choice = self.master.choice()
        # 2. then listen to him
        return self.children[self.last_choice].choiceIMP(nb=nb)

[docs]    def estimatedOrder(self):
        r""" Trust one of the slave and listen to his `estimatedOrder`.

        - Return the estimate order of the arms, as a permutation on :math:`[0,...,K-1]` that would order the arms by increasing means.
        """
        # 1. first decide who to listen to
        self.last_choice = self.master.choice()
        # 2. then listen to him
        return self.children[self.last_choice].estimatedOrder()

[docs]    def estimatedBestArms(self, M=1):
        """ Return a (non-necessarily sorted) list of the indexes of the M-best arms. Identify the set M-best."""
        assert 1 <= M <= self.nbArms, "Error: the parameter 'M' has to be between 1 and K = {}, but it was {} ...".format(self.nbArms, M)  # DEBUG
        order = self.estimatedOrder()
        return order[-M:]