Source code for Policies.BESA

# -*- coding: utf-8 -*-
""" The Best Empirical Sampled Average (BESA) algorithm.

- Reference: [[Sub-Sampling For Multi Armed Bandits, Baransi et al., 2014]](https://hal.archives-ouvertes.fr/hal-01025651)
- See also: https://github.com/SMPyBandits/SMPyBandits/issues/103 and https://github.com/SMPyBandits/SMPyBandits/issues/116

.. warning:: This algorithm works VERY well but it is looks weird at first sight. It sounds "too easy", so take a look to the article before wondering why it should work.

.. warning:: Right now, it is between 10 and 25 times slower than :class:`Policies.klUCB` and other single-player policies.
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.9"


import numpy as np
try:
    from .IndexPolicy import IndexPolicy
except ImportError:
    from IndexPolicy import IndexPolicy


# --- Utility functions


[docs]def subsample_deterministic(n, m):
    r"""Returns :math:`\{1,\dots,n\}` if :math:`n < m` or :math:`\{1,\dots,m\}` if :math:`n \geq m` (*ie*, it is :math:`\{1,\dots,\min(n,m)\}`).

    .. warning:: The BESA algorithm is efficient only with the random sub-sampling, don't use this one except for comparing.

    >>> subsample_deterministic(5, 3)  # doctest: +ELLIPSIS
    array([0, 1, 2, 3])
    >>> subsample_deterministic(10, 20)  # doctest: +ELLIPSIS
    array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])
    """
    return np.arange(min(n, m) + 1)


[docs]def subsample_uniform(n, m):
    r"""Returns a uniform sub-set of size :math:`n`, from :math:`\{1,dots, m\}`.

    - Fails if n > m.

    .. note:: The BESA algorithm is efficient only with the random sub-sampling.

    >>> np.random.seed(1234)  # reproducible results
    >>> subsample_uniform(3, 5)  # doctest: +ELLIPSIS
    array([4, 0, 1])
    >>> subsample_uniform(10, 20)  # doctest: +ELLIPSIS
    array([ 7, 16,  2,  3,  1, 18,  5,  4,  0,  8])
    """
    return np.random.choice(m, size=n, replace=False)


# --- BESA core function, base case and recursive case


#: Numerical tolerance when comparing two means. Should not be zero!
TOLERANCE = 1e-6


[docs]def inverse_permutation(permutation, j):
    """ Inverse the permutation for given input j, that is, it finds i such that p[i] = j.

    >>> permutation = [1, 0, 3, 2]
    >>> inverse_permutation(permutation, 1)
    0
    >>> inverse_permutation(permutation, 0)
    1
    """
    for i, pi in enumerate(permutation):
        if pi == j:
            return i
    raise ValueError("inverse_permutation({}, {}) failed.".format(permutation, j))


[docs]def besa_two_actions(rewards, pulls, a, b, subsample_function=subsample_uniform):
    """ Core algorithm for the BESA selection, for two actions a and b:

    - N = min(Na, Nb),
    - Sub-sample N values from rewards of arm a, and N values from rewards of arm b,
    - Compute mean of both samples of size N, call them m_a, m_b,
    - If m_a > m_b, choose a,
    - Else if m_a < m_b, choose b,
    - And in case of a tie, break by choosing i such that Ni is minimal (or random [a, b] if Na=Nb).

    .. note:: ``rewards`` can be a numpy array of shape (at least) ``(nbArms, max(Na, Nb))`` or a dictionary maping ``a,b`` to lists (or iterators) of lengths ``>= max(Na, Nb)``.

    >>> np.random.seed(2345)  # reproducible results
    >>> pulls = [6, 10]; K = len(pulls); N = max(pulls)
    >>> rewards = np.random.randn(K, N)
    >>> np.mean(rewards, axis=1)  # arm 1 is better  # doctest: +ELLIPSIS
    array([0.154..., 0.158...])
    >>> np.mean(rewards[:, :min(pulls)], axis=1)  # arm 0 is better in the first 6 samples  # doctest: +ELLIPSIS
    array([0.341..., 0.019...])
    >>> besa_two_actions(rewards, pulls, 0, 1, subsample_function=subsample_deterministic)  # doctest: +ELLIPSIS
    0
    >>> [besa_two_actions(rewards, pulls, 0, 1, subsample_function=subsample_uniform) for _ in range(10)]  # doctest: +ELLIPSIS
    [0, 0, 1, 1, 0, 0, 1, 0, 0, 0]
    """
    if a == b:
        print("Error: no need to call 'besa_two_actions' if a = = {} = b = {}...".format(a, b))  # DEBUG
        return a
    Na, Nb = pulls[a], pulls[b]
    N = min(Na, Nb)
    Ia = subsample_function(N, Na)
    # assert all(0 <= i < Na for i in Ia), "Error: indexes in Ia should be between 0 and Na = {}".format(Na)  # DEBUG
    Ib = subsample_function(N, Nb)
    # assert all(0 <= i < Nb for i in Ib), "Error: indexes in Ib should be between 0 and Nb = {}".format(Nb)  # DEBUG
    # assert len(Ia) == len(Ib) == N, "Error in subsample_function, Ia of size = {} and Ib of size = {} should have size N = {} ...".format(len(Ia), len(Ib), N)  # DEBUG
    # Compute sub means
    if isinstance(rewards, np.ndarray):  # faster to compute this
        sub_mean_a = np.sum(rewards[a, Ia]) / N
        sub_mean_b = np.sum(rewards[b, Ib]) / N
    else:  # than this for other data type (eg. dict mapping int to list)
        sub_mean_a = sum(rewards[a][i] for i in Ia) / N
        sub_mean_b = sum(rewards[b][i] for i in Ib) / N
    # assert 0 <= min(sub_mean_a, sub_mean_b) <= max(sub_mean_a, sub_mean_b) <= 1
    # XXX I tested and these manual branching steps are the most efficient solution it is faster than using np.argmax()
    if sub_mean_a > (sub_mean_b + TOLERANCE):
        return a
    elif sub_mean_b > (sub_mean_a + TOLERANCE):
        return b
    else:  # 0 <= abs(sub_mean_a - sub_mean_b) <= TOLERANCE
        if Na < Nb:
            return a
        elif Na > Nb:
            return b
        else:  # if no way of breaking the tie, choose uniformly at random
            return np.random.choice([a, b])
            # chosen_arm = np.random.choice([a, b])
            # print("Warning: arms a = {} and b = {} had same sub-samples means = {:.3g} = {:.3g} and nb selections = {} = {}... so choosing uniformly at random {}!".format(a, b, sub_mean_a, sub_mean_b, Na, Nb, chosen_arm))  # WARNING
            # return chosen_arm


[docs]def besa_K_actions__non_randomized(rewards, pulls, left, right, subsample_function=subsample_uniform, depth=0):
    r""" BESA recursive selection algorithm for an action set of size :math:`\mathcal{K} \geq 1`.

    - I prefer to implement for a discrete action set :math:`\{\text{left}, \dots, \text{right}\}` (end *included*) instead of a generic ``actions`` vector, to speed up the code, but it is less readable.
    - The depth argument is just for pretty printing debugging information (useless).

    .. warning:: The binary tournament is NOT RANDOMIZED here, this version is only for testing.

    >>> np.random.seed(1234)  # reproducible results
    >>> pulls = [5, 6, 7, 8]; K = len(pulls); N = max(pulls)
    >>> rewards = np.random.randn(K, N)
    >>> np.mean(rewards, axis=1)  # arm 0 is better
    array([ 0.09876921, -0.18561207,  0.04463033,  0.0653539 ])
    >>> np.mean(rewards[:, :min(pulls)], axis=1)  # arm 1 is better in the first 6 samples
    array([-0.06401484,  0.17366346,  0.05323033, -0.09514708])
    >>> besa_K_actions__non_randomized(rewards, pulls, 0, K-1, subsample_function=subsample_deterministic)  # doctest: +ELLIPSIS
    3
    >>> [besa_K_actions__non_randomized(rewards, pulls, 0, K-1, subsample_function=subsample_uniform) for _ in range(10)]  # doctest: +ELLIPSIS
    [3, 3, 2, 3, 3, 0, 0, 0, 2, 3]
    """
    # assert left <= right, "Error: in 'besa_K_actions' function, left = {} was not <= right = {}...".format(left, right)  # DEBUG
    # print("In 'besa_K_actions', left = {} and right = {} for this call.".format(left, right))  # DEBUG
    if left == right:
        chosen_arm = left
    elif right == left + 1:
        chosen_arm = besa_two_actions(rewards, pulls, left, right, subsample_function=subsample_function)
    else:
        pivot = (left + right) // 2
        # print("Using pivot = {}, left = {} and right = {}...".format(pivot, left, right))  # DEBUG
        chosen_left = besa_K_actions__non_randomized(rewards, pulls, left, pivot, subsample_function=subsample_function, depth=depth+1)
        # assert left <= chosen_left <= pivot, "Error: the output chosen_left = {} from tournament from left = {} to pivot = {} should be between the two...".format(chosen_left, left, pivot)  # DEBUG
        chosen_right = besa_K_actions__non_randomized(rewards, pulls, pivot + 1, right, subsample_function=subsample_function, depth=depth+1)
        # assert pivot + 1 <= chosen_right <= right, "Error: the output chosen_right = {} from tournament from pivot + 1 = {} to right = {} should be between the two...".format(chosen_right, pivot + 1, right)  # DEBUG
        # print("The two recursive calls gave chosen_left = {}, chosen_right = {}...".format(chosen_left, chosen_right))  # DEBUG
        chosen_arm = besa_two_actions(rewards, pulls, chosen_left, chosen_right, subsample_function=subsample_function)
    # print("{}In 'besa_K_actions', left = {} and right = {} gave chosen_arm = {}.".format("\t" * depth, left, right, chosen_arm))  # DEBUG
    return chosen_arm


[docs]def besa_K_actions__smart_divideandconquer(rewards, pulls, left, right, random_permutation_of_arm=None, subsample_function=subsample_uniform, depth=0):
    r""" BESA recursive selection algorithm for an action set of size :math:`\mathcal{K} \geq 1`.

    - I prefer to implement for a discrete action set :math:`\{\text{left}, \dots, \text{right}\}` (end *included*) instead of a generic ``actions`` vector, to speed up the code, but it is less readable.
    - The depth argument is just for pretty printing debugging information (useless).

    .. note:: The binary tournament is RANDOMIZED here, as it should be.

    >>> np.random.seed(1234)  # reproducible results
    >>> pulls = [5, 6, 7, 8]; K = len(pulls); N = max(pulls)
    >>> rewards = np.random.randn(K, N)
    >>> np.mean(rewards, axis=1)  # arm 0 is better
    array([ 0.09876921, -0.18561207,  0.04463033,  0.0653539 ])
    >>> np.mean(rewards[:, :min(pulls)], axis=1)  # arm 1 is better in the first 6 samples
    array([-0.06401484,  0.17366346,  0.05323033, -0.09514708])
    >>> besa_K_actions__smart_divideandconquer(rewards, pulls, 0, K-1, subsample_function=subsample_deterministic)  # doctest: +ELLIPSIS
    3
    >>> [besa_K_actions__smart_divideandconquer(rewards, pulls, 0, K-1, subsample_function=subsample_uniform) for _ in range(10)]  # doctest: +ELLIPSIS
    [3, 3, 2, 3, 3, 0, 0, 0, 2, 3]
    """
    # assert left <= right, "Error: in 'besa_K_actions__smart_divideandconquer' function, left = {} was not <= right = {}...".format(left, right)  # DEBUG
    # print("In 'besa_K_actions__smart_divideandconquer', left = {} and right = {} for this call.".format(left, right))  # DEBUG
    if left == right:
        chosen_arm = left
    elif right == left + 1:
        chosen_arm = besa_two_actions(rewards, pulls, left, right, subsample_function=subsample_function)
    else:
        pivot = (left + right) // 2
        # print("Using pivot = {}, left = {} and right = {}...".format(pivot, left, right))  # DEBUG
        chosen_left = besa_K_actions__smart_divideandconquer(rewards, pulls, left, pivot, random_permutation_of_arm=random_permutation_of_arm, subsample_function=subsample_function, depth=depth+1)
        # chosen_left = inverse_permutation(random_permutation_of_arm, chosen_left)
        # assert left <= chosen_left <= pivot, "Error: the output chosen_left = {} from tournament from left = {} to pivot = {} should be between the two...".format(chosen_left, left, pivot)  # DEBUG
        chosen_right = besa_K_actions__smart_divideandconquer(rewards, pulls, pivot + 1, right, random_permutation_of_arm=random_permutation_of_arm, subsample_function=subsample_function, depth=depth+1)
        # chosen_right = inverse_permutation(random_permutation_of_arm, chosen_right)
        # assert pivot + 1 <= chosen_right <= right, "Error: the output chosen_right = {} from tournament from pivot + 1 = {} to right = {} should be between the two...".format(chosen_right, pivot + 1, right)  # DEBUG
        # print("The two recursive calls gave chosen_left = {}, chosen_right = {}...".format(chosen_left, chosen_right))  # DEBUG
        if random_permutation_of_arm is not None:
            chosen_left, chosen_right = random_permutation_of_arm[chosen_left], random_permutation_of_arm[chosen_right]
        chosen_arm = besa_two_actions(rewards, pulls, chosen_left, chosen_right, subsample_function=subsample_function)
    # print("{}In 'besa_K_actions__smart_divideandconquer', left = {} and right = {} gave chosen_arm = {}.".format("\t" * depth, left, right, chosen_arm))  # DEBUG
    if random_permutation_of_arm is not None:
        return inverse_permutation(random_permutation_of_arm, chosen_arm)
    else:
        return chosen_arm


[docs]def besa_K_actions(rewards, pulls, actions, subsample_function=subsample_uniform, depth=0):
    r""" BESA recursive selection algorithm for an action set of size :math:`\mathcal{K} \geq 1`.

    - The divide and conquer is implemented for a generic list of actions, it's slower but simpler to write! Left and right divisions are just ``actions[:len(actions)//2]`` and ``actions[len(actions)//2:]``.
    - Actions is assumed to be shuffled *before* calling this function!
    - The depth argument is just for pretty printing debugging information (useless).

    .. note:: The binary tournament is RANDOMIZED here, *as it should be*.

    >>> np.random.seed(1234)  # reproducible results
    >>> pulls = [5, 6, 7, 8]; K = len(pulls); N = max(pulls)
    >>> actions = np.arange(K)
    >>> rewards = np.random.randn(K, N)
    >>> np.mean(rewards, axis=1)  # arm 0 is better
    array([ 0.09876921, -0.18561207,  0.04463033,  0.0653539 ])
    >>> np.mean(rewards[:, :min(pulls)], axis=1)  # arm 1 is better in the first 6 samples
    array([-0.06401484,  0.17366346,  0.05323033, -0.09514708])
    >>> besa_K_actions(rewards, pulls, actions, subsample_function=subsample_deterministic)  # doctest: +ELLIPSIS
    3
    >>> [besa_K_actions(rewards, pulls, actions, subsample_function=subsample_uniform) for _ in range(10)]  # doctest: +ELLIPSIS
    [3, 3, 2, 3, 3, 0, 0, 0, 2, 3]
    """
    # print("In 'besa_K_actions', actions = {} for this call.".format(actions))  # DEBUG
    if len(actions) <= 1:
        chosen_arm = actions[0]
    elif len(actions) == 2:
        chosen_arm = besa_two_actions(rewards, pulls, actions[0], actions[1], subsample_function=subsample_function)
    else:
        # actions is already shuffled!
        actions_left = actions[:len(actions)//2]
        actions_right = actions[len(actions)//2:]
        # print("Using actions_left = {} and actions_right = {}...".format(actions_left, actions_right))  # DEBUG
        chosen_left = besa_K_actions(rewards, pulls, actions_left, subsample_function=subsample_function, depth=depth+1)
        chosen_right = besa_K_actions(rewards, pulls, actions_right, subsample_function=subsample_function, depth=depth+1)
        # print("The two recursive calls gave chosen_left = {}, chosen_right = {}...".format(chosen_left, chosen_right))  # DEBUG
        chosen_arm = besa_two_actions(rewards, pulls, chosen_left, chosen_right, subsample_function=subsample_function)
    # print("{}In 'besa_K_actions', actions = {} gave chosen_arm = {}.".format("\t" * depth, actions, chosen_arm))  # DEBUG
    return chosen_arm


[docs]def besa_K_actions__non_binary(rewards, pulls, actions, subsample_function=subsample_uniform, depth=0):
    r""" BESA recursive selection algorithm for an action set of size :math:`\mathcal{K} \geq 1`.

    - Instead of doing this binary tree tournaments (which results in :math:`\mathcal{O}(K^2)` calls to the 2-arm procedure), we can do a line tournaments: 1 vs 2, winner vs 3, winner vs 4 etc, winner vs K-1 (which results in :math:`\mathcal{O}(K)` calls),
    - Actions is assumed to be shuffled *before* calling this function!
    - The depth argument is just for pretty printing debugging information (useless).

    >>> np.random.seed(1234)  # reproducible results
    >>> pulls = [5, 6, 7, 8]; K = len(pulls); N = max(pulls)
    >>> actions = np.arange(K)
    >>> rewards = np.random.randn(K, N)
    >>> np.mean(rewards, axis=1)  # arm 0 is better
    array([ 0.09876921, -0.18561207,  0.04463033,  0.0653539 ])
    >>> np.mean(rewards[:, :min(pulls)], axis=1)  # arm 1 is better in the first 6 samples
    array([-0.06401484,  0.17366346,  0.05323033, -0.09514708])
    >>> besa_K_actions__non_binary(rewards, pulls, actions, subsample_function=subsample_deterministic)  # doctest: +ELLIPSIS
    3
    >>> [besa_K_actions__non_binary(rewards, pulls, actions, subsample_function=subsample_uniform) for _ in range(10)]  # doctest: +ELLIPSIS
    [3, 3, 3, 2, 0, 3, 3, 3, 3, 3]
    """
    # print("In 'besa_K_actions__non_binary', actions = {} for this call.".format(actions))  # DEBUG
    if len(actions) <= 1:
        chosen_arm = actions[0]
    elif len(actions) == 2:
        chosen_arm = besa_two_actions(rewards, pulls, actions[0], actions[1], subsample_function=subsample_function)
    else:
        chosen_arm = actions[0]
        for i in range(1, len(actions)):
            chosen_arm = besa_two_actions(rewards, pulls, chosen_arm, actions[i], subsample_function=subsample_function)
    # print("{}In 'besa_K_actions__non_binary', actions = {} gave chosen_arm = {}.".format("\t" * depth, actions, chosen_arm))  # DEBUG
    return chosen_arm


[docs]def besa_K_actions__non_recursive(rewards, pulls, actions, subsample_function=subsample_uniform, depth=0):
    r""" BESA non-recursive selection algorithm for an action set of size :math:`\mathcal{K} \geq 1`.

    - No calls to :func:`besa_two_actions`, just generalize it to K actions instead of 2.
    - Actions is assumed to be shuffled *before* calling this function!

    >>> np.random.seed(1234)  # reproducible results
    >>> pulls = [5, 6, 7, 8]; K = len(pulls); N = max(pulls)
    >>> rewards = np.random.randn(K, N)
    >>> np.mean(rewards, axis=1)  # arm 0 is better
    array([ 0.09876921, -0.18561207,  0.04463033,  0.0653539 ])
    >>> np.mean(rewards[:, :min(pulls)], axis=1)  # arm 1 is better in the first 6 samples
    array([-0.06401484,  0.17366346,  0.05323033, -0.09514708])
    >>> besa_K_actions__non_recursive(rewards, pulls, None, subsample_function=subsample_deterministic)  # doctest: +ELLIPSIS
    3
    >>> [besa_K_actions__non_recursive(rewards, pulls, None, subsample_function=subsample_uniform) for _ in range(10)]  # doctest: +ELLIPSIS
    [1, 3, 0, 2, 2, 3, 1, 1, 3, 1]
    """
    K = len(pulls)
    min_pulls = np.min(pulls)
    sub_means = np.zeros(K)
    for k in range(K):
        Ik = subsample_function(min_pulls, pulls[k])
        # sub_means[k] = np.mean(rewards[k, Ik])
        if isinstance(rewards, np.ndarray):  # faster to compute this
            sub_means[k] = np.sum(rewards[k, Ik]) / min_pulls
        else:  # than this for other data type (eg. dict mapping int to list)
            sub_means[k] = sum(rewards[k][i] for i in Ik) / min_pulls
    max_sub_means = np.max(sub_means)
    which_are_best = np.nonzero(sub_means == max_sub_means)[0]
    best_less_sampled = np.asarray(pulls)[which_are_best]
    # return which_are_best[np.argmin(best_less_sampled)]
    return which_are_best[np.random.choice(np.nonzero(best_less_sampled == np.min(best_less_sampled))[0])]


# --- The BESA policy


[docs]class BESA(IndexPolicy):
    r""" The Best Empirical Sampled Average (BESA) algorithm.

    - Reference: [[Sub-Sampling For Multi Armed Bandits, Baransi et al., 2014]](https://hal.inria.fr/hal-01025651)

    .. warning:: The BESA algorithm requires to store all the history of rewards, so its memory usage for :math:`T` rounds with :math:`K` arms is :math:`\mathcal{O}(K T)`, which is huge for large :math:`T`, be careful! Aggregating different BESA instances is probably a bad idea because of this limitation!
    """

[docs]    def __init__(self, nbArms, horizon=None,
                 minPullsOfEachArm=1, randomized_tournament=True, random_subsample=True,
                 non_binary=False, non_recursive=False,
                 lower=0., amplitude=1.):
        super(BESA, self).__init__(nbArms, lower=lower, amplitude=amplitude)
        # --- Arguments
        # XXX find a solution to not need to horizon?
        self.horizon = horizon  #: Just to know the memory to allocate for rewards. It could be implemented without knowing the horizon, by using lists to keep all the reward history, but this would be way slower!
        self.minPullsOfEachArm = max(1, int(minPullsOfEachArm))  #: Minimum number of pulls of each arm before using the BESA algorithm. Using 1 might not be the best choice
        self.randomized_tournament = randomized_tournament  #: Whether to use a deterministic or random tournament.
        self.random_subsample = random_subsample  #: Whether to use a deterministic or random sub-sampling procedure.
        self.non_binary = non_binary  #: Whether to use :func:`besa_K_actions` or :func:`besa_K_actions__non_binary` for the selection of K arms.
        self.non_recursive = non_recursive  #: Whether to use :func:`besa_K_actions` or :func:`besa_K_actions__non_recursive` for the selection of K arms.
        assert not (non_binary and non_recursive), "Error: BESA cannot use simultaneously non_binary and non_recursive option..."  # DEBUG
        self._subsample_function = subsample_uniform if random_subsample else subsample_deterministic
        self._besa_function = besa_K_actions
        if non_binary:
            self._besa_function = besa_K_actions__non_binary
        if non_recursive:
            self._besa_function = besa_K_actions__non_recursive
        # --- Internal memory
        assert nbArms >= 2, "Error: BESA algorithm can only work for at least 2 arms."
        self._left = 0  # just keep them in memory to increase readability
        self._right = nbArms - 1  # just keep them in memory to increase readability
        self._actions = np.arange(nbArms)  # just keep them in memory to increase readability

        # Memory to store all the rewards
        self._has_horizon = (self.horizon is not None) and (self.horizon > 1)
        if self._has_horizon:
            self.all_rewards = np.zeros((nbArms, horizon + 1))  #: Keep **all** rewards of each arms. It consumes a :math:`\mathcal{O}(K T)` memory, that's really bad!!
            self.all_rewards.fill(-1e5)  # Just security, to be sure they don't count as zero in some computation
        else:
            self.all_rewards = { k : [] for k in range(nbArms) }

[docs]    def __str__(self):
        """ -> str"""
        b1, b2, b3, b4, b5, b6 = not self.random_subsample, not self.randomized_tournament, self.minPullsOfEachArm > 1, not self._has_horizon, self.non_binary, self.non_recursive
        return "BESA{}{}{}{}{}{}{}{}{}{}{}{}{}".format(
            "(" if (b1 or b2 or b3 or b4 or b5 or b6) else "",
            "non-random subsample" if b1 else "",
            ", " if b1 and (b2 or b3 or b4 or b5 or b6) else "",
            "non-random tournament" if b2 else "",
            ", " if b2 and (b3 or b4 or b5 or b6) else "",
            r"$T_0={}$".format(self.minPullsOfEachArm) if b3 else "",
            ", " if b3 and (b4 or b5 or b6) else "",
            "anytime" if b4 else "",
            ", " if b4 and (b5 or b6) else "",
            "non-binary" if b5 else "",
            ", " if b5 and b6 else "",
            "non-recursive" if b6 else "",
            ")" if (b1 or b2 or b3 or b4 or b5 or b6) else "",
        )

[docs]    def getReward(self, arm, reward):
        """ Add the current reward in the global history.

        .. note:: There is no need to normalize the reward in [0,1], that's one of the strong point of the BESA algorithm."""
        # XXX find a solution to not need to horizon?
        if self._has_horizon:
            self.all_rewards[arm, self.pulls[arm]] = reward
        else:
            self.all_rewards[arm].append(reward)
        super(BESA, self).getReward(arm, reward)

    # --- Basic choice() and handleCollision() method

[docs]    def choice(self):
        """ Applies the BESA procedure with the current data history."""
        # if some arm has never been selected, force to explore it!
        if np.any(self.pulls < self.minPullsOfEachArm):
            return np.random.choice(np.where(self.pulls < self.minPullsOfEachArm)[0])
        else:
            if self.randomized_tournament:
                np.random.shuffle(self._actions)
            # print("Calling 'besa_K_actions' with actions list = {}...".format(self._actions))  # DEBUG
            return self._besa_function(self.all_rewards, self.pulls, self._actions, subsample_function=self._subsample_function, depth=0)

    # --- Others choice...() methods, partly implemented

[docs]    def choiceFromSubSet(self, availableArms='all'):
        """ Applies the BESA procedure with the current data history, to the restricted set of arm."""
        if availableArms == 'all':
            return self.choice()
        else:
            # if some arm has never been selected, force to explore it!
            if any(self.pulls[k] < self.minPullsOfEachArm for k in availableArms):
                return np.random.choice([k for k in availableArms if self.pulls[k] < self.minPullsOfEachArm])
            else:
                actions = list(availableArms)
                if self.randomized_tournament:
                    np.random.shuffle(actions)
                # print("Calling 'besa_K_actions' with actions list = {}...".format(actions))  # DEBUG
                return besa_K_actions(self.all_rewards, self.pulls, actions, subsample_function=self._subsample_function, depth=0)

[docs]    def choiceMultiple(self, nb=1):
        """ Applies the multiple-choice BESA procedure with the current data history:

        1. select a first arm with basic BESA procedure with full action set,
        2. remove it from the set of actions,
        3. restart step 1 with new smaller set of actions, until ``nb`` arm where chosen by basic BESA.

        .. note:: This was not studied or published before, and there is no theoretical results about it!

        .. warning:: This is very inefficient! The BESA procedure is already quite slow (with my current naive implementation), this is crazily slow!
        """
        if nb == 1:
            return np.array([self.choice()])
        else:
            actions = list(range(self.nbArms))
            choices = []
            for _ in range(nb):
                # if some arm has never been selected, force to explore it!
                if np.any(self.pulls[actions] < self.minPullsOfEachArm):
                    choice_n = actions[np.random.choice(np.where(self.pulls[actions] < self.minPullsOfEachArm)[0])]
                else:
                    if self.randomized_tournament:
                        np.random.shuffle(actions)
                    # print("Calling 'besa_K_actions' with actions list = {}...".format(actions))  # DEBUG
                    choice_n = besa_K_actions(self.all_rewards, self.pulls, actions, subsample_function=self._subsample_function, depth=0)
                # now, store it, remove it from action set
                choices.append(choice_n)
                actions.remove(choice_n)
            return np.array(choices)

[docs]    def choiceWithRank(self, rank=1):
        """ Applies the ranked BESA procedure with the current data history:

        1. use :meth:`choiceMultiplie` to select ``rank`` actions,
        2. then take the ``rank``-th chosen action (the last one).

        .. note:: This was not studied or published before, and there is no theoretical results about it!

        .. warning:: This is very inefficient! The BESA procedure is already quite slow (with my current naive implementation), this is crazily slow!
        """
        choices = self.choiceMultiple(nb=rank)
        return choices[-1]

    # XXX self.index is NOT used to choose arm, only to estimate their order

[docs]    def computeIndex(self, arm):
        """ Compute the current index of arm 'arm'.

        .. warning:: This index **is not** the one used for the choice of arm (which use sub sampling). It's just the empirical mean of the arm.
        """
        if self.pulls[arm] < 1:
            return float('+inf')
        else:
            return self.rewards[arm] / self.pulls[arm]

[docs]    def computeAllIndex(self):
        """ Compute the current index of arm 'arm' (vectorized).

        .. warning:: This index **is not** the one used for the choice of arm (which use sub sampling). It's just the empirical mean of the arm.
        """
        self.index = self.rewards / self.pulls
        self.index[self.pulls < 1] = float('+inf')

[docs]    def handleCollision(self, arm, reward=None):
        """ Nothing special to do."""
        pass


# --- Debugging

if __name__ == "__main__":
    # Code for debugging purposes.
    from doctest import testmod
    print("\nTesting automatically all the docstring written in each functions of this module :")
    testmod(verbose=True)