Source code for Policies.CPUCB

# -*- coding: utf-8 -*-
""" The Clopper-Pearson UCB policy for bounded bandits.
Reference: [Garivier & Cappé, COLT 2011](https://arxiv.org/pdf/1102.2490.pdf).
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Olivier Cappé, Aurélien Garivier, Lilian Besson"
__version__ = "0.6"

import numpy as np
import scipy.stats
np.seterr(divide='ignore')  # XXX dangerous in general, controlled here!

try:
    from .UCB import UCB
except ImportError:
    from UCB import UCB


[docs]def binofit_scalar(x, n, alpha=0.05):
    r""" Parameter estimates and confidence intervals for binomial data.

    For example:

    >>> np.random.seed(1234)  # reproducible results
    >>> true_p = 0.6
    >>> N = 100
    >>> x = np.random.binomial(N, true_p)
    >>> (phat, pci) = binofit_scalar(x, N)
    >>> phat
    0.61
    >>> pci  # 0.6 of course lies in the 95% confidence interval  # doctest: +ELLIPSIS
    (0.507..., 0.705...)
    >>> (phat, pci) = binofit_scalar(x, N, 0.01)
    >>> pci  # 0.6 is also in the 99% confidence interval, but it is larger  # doctest: +ELLIPSIS
    (0.476..., 0.732...)

    Like binofit_scalar in MATLAB, see https://fr.mathworks.com/help/stats/binofit_scalar.html.

    - ``(phat, pci) = binofit_scalar(x, n)`` returns a maximum likelihood estimate of the probability of success in a given binomial trial based on the number of successes, ``x``, observed in ``n`` independent trials.

    - ``(phat, pci) = binofit_scalar(x, n)`` returns the probability estimate, phat, and the 95% confidence intervals, pci, by using the `Clopper-Pearson method <https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Clopper-Pearson_interval>`_ to calculate confidence intervals.

    - ``(phat, pci) = binofit_scalar(x, n, alpha)`` returns the ``100(1 - alpha)%`` confidence intervals. For example, ``alpha = 0.01`` yields ``99%`` confidence intervals.


    For the Clopper-Pearson UCB algorithms:

    - x is the cum rewards of some arm k, :math:`x = X_k(t)`,
    - n is the number of samples of that arm k, :math:`n = N_k(t)`,
    - and alpha is a small positive number, :math:`\alpha = \frac{1}{t^c}` in this algorithm (for :math:`c > 1, \simeq 1`, for instance `c = 1.01`).

    Returns: (phat, pci)

    - phat: is the estimate of p
    - pci: is the confidence interval

    .. note::

       My reference implementation was https://github.com/sjara/extracellpy/blob/master/extrastats.py#L35,
       but http://statsmodels.sourceforge.net/devel/generated/statsmodels.stats.proportion.proportion_confint.html can also be used (it implies an extra requirement for the project).
    """
    # - If ``x = [x[0], x[1], ... x[k-1]]` is a vector, binofit_scalar returns a vector of the same size as ``x`` whose ``i``th entry is the parameter estimate for ``x[i]``.
    #   All ``k`` estimates are independent of each other.

    # - If ``n = [n[0], n[1], ... n[k-1]]`` is a vector of the same size as ``x``, the binomial fit, ``binofit_scalar``, returns a vector whose ``i``th entry is the parameter estimate based on the number of successes ``x[i]`` in ``n[i]`` independent trials.
    #   A scalar value for ``x`` or ``n`` is expanded to the same size as the other input.
    #
    if n < 1:  # Extreme case
        phat = np.NaN
        pci = (np.NaN, np.NaN)
    else:
        assert 0 <= x <= n, "Error: binofit_scalar(x, n) invalid value for x, not in [0, n], invalid outcome of binomial trials."  # DEBUG
        # Empirical mean, for one parameter
        phat = float(x) / float(n)
        # See https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Clopper-Pearson_interval for the computation
        # Lowerbound
        nu1 = 2 * x
        nu2 = 2 * (n - x + 1)
        F = scipy.stats.f.ppf(alpha / 2., nu1, nu2)
        lowerbound = (nu1 * F) / (nu2 + nu1 * F)
        if x == 0:  # extreme left case, truncate lowerbound to 0
            lowerbound = 0
            # upperbound = 1 - (alpha / 2.)**(1. / float(n))  # closed-form is available!
        # Upperbound
        nu1 = 2 * (x + 1)
        nu2 = 2 * (n - x)
        F = scipy.stats.f.ppf(1 - (alpha / 2.), nu1, nu2)
        upperbound = (nu1 * F) / (nu2 + nu1 * F)
        if x == n:  # extreme right case, truncate upperbound to 1
            # lowerbound = (alpha / 2.)**(1. / float(n))  # closed-form is available!
            upperbound = 1
        pci = (lowerbound, upperbound)
    return phat, pci


[docs]def binofit(xArray, nArray, alpha=0.05):
    """ Parameter estimates and confidence intervals for binomial data, for vectorial inputs.

    For example:

    >>> np.random.seed(1234)  # reproducible results
    >>> true_p = 0.6
    >>> N = 100
    >>> xArray = np.random.binomial(N, true_p, 4)
    >>> xArray
    array([61, 54, 61, 52])

    >>> (phat, pci) = binofit(xArray, N)
    >>> phat
    array([0.61, 0.54, 0.61, 0.52])
    >>> pci  # 0.6 of course lies in the 95% confidence intervals  # doctest: +ELLIPSIS
    array([[0.507..., 0.705...],
           [0.437..., 0.640...],
           [0.507..., 0.705...],
           [0.417..., 0.620...]])

    >>> (phat, pci) = binofit(xArray, N, 0.01)
    >>> pci  # 0.6 is also in the 99% confidence intervals, but it is larger  # doctest: +ELLIPSIS
    array([[0.476..., 0.732...],
           [0.407..., 0.668...],
           [0.476..., 0.732...],
           [0.387..., 0.650...]])
    """
    # If inputs are list or tuples
    if isinstance(xArray, (list, tuple)):
        xArray = np.asarray(xArray)
    if isinstance(nArray, (list, tuple)):
        nArray = np.asarray(nArray)
    # If x is vectorial
    if isinstance(xArray, np.ndarray):
        origShape = xArray.shape
        Psuccess = np.empty(xArray.size)
        ConfIntervals = np.empty((xArray.size, 2))
        if not isinstance(nArray, np.ndarray):
            nArray = nArray * np.ones_like(xArray)
        if not isinstance(alpha, np.ndarray):
            alpha = alpha * np.ones_like(xArray)
        for inde in range(xArray.size):
            Psuccess[inde], ConfIntervals[inde, :] = binofit_scalar(xArray.flat[inde], nArray.flat[inde], alpha.flat[inde])
        return Psuccess.reshape(origShape), ConfIntervals.reshape(origShape + (2, ))
    else:
        return binofit_scalar(xArray, nArray, alpha)


[docs]def ClopperPearsonUCB(x, N, alpha=0.05):
    """ Returns just the upper-confidence bound of the confidence interval. """
    phat, (lowerbound, upperbound) = binofit(x, N, alpha=alpha)
    return upperbound


# # Define a vectorized clopperPearsonUCB function, in ONE line!
# clopperPearsonUCB = np.vectorize(ClopperPearsonUCB)
# clopperPearsonUCB.__doc__ = ClopperPearsonUCB.__doc__


#: Default value for the parameter c for CP-UCB
C = 1.01


[docs]class CPUCB(UCB):
    """ The Clopper-Pearson UCB policy for bounded bandits.
    Reference: [Garivier & Cappé, COLT 2011].
    """

[docs]    def __init__(self, nbArms, c=C, lower=0., amplitude=1.):
        super(CPUCB, self).__init__(nbArms, lower=lower, amplitude=amplitude)
        self.c = c  #: Parameter c for the CP-UCB formula (see below)

[docs]    def computeIndex(self, arm):
        r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k:

        .. math:: I_k(t) = \mathrm{ClopperPearsonUCB}\left( X_k(t), N_k(t), \frac{1}{t^c} \right).

        Where :math:`\mathrm{ClopperPearsonUCB}` is defined above.
        The index is the upper-confidence bound of the binomial trial of :math:`N_k(t)` samples from arm k,
        having mean :math:`\mu_k`, and empirical outcome :math:`X_k(t)`.
        The confidence interval is with :math:`\alpha = 1 / t^c`, for a :math:`100(1 - \alpha)\%` confidence bound.
        """
        if self.pulls[arm] < 1:
            return float('+inf')
        else:
            return ClopperPearsonUCB(self.rewards[arm], self.pulls[arm], 1. / (self.t ** self.c))

    # def computeAllIndex(self):
    #     """ Compute the current indexes for all arms, in a vectorized manner."""
    #     indexes = ClopperPearsonUCB(self.rewards, self.pulls, 1. / (self.t ** self.c))
    #     indexes[self.pulls < 1] = float('+inf')
    #     self.index[:] = indexes


# --- Debugging

if __name__ == "__main__":
    # Code for debugging purposes.
    from doctest import testmod
    print("\nTesting automatically all the docstring written in each functions of this module :")
    testmod(verbose=True)