Source code for Policies.Posterior.Beta

# -*- coding: utf-8 -*-
r""" Manipulate posteriors of Bernoulli/Beta experiments.

Rewards not in :math:`{0, 1}` are handled with a trick, see :func:`bernoulliBinarization`, with a "random binarization", cf., [Agrawal12]_ (algorithm 2).
When reward :math:`r_t \in [0, 1]` is observed, the player receives the result of a Bernoulli sample of average :math:`r_t`: :math:`r_t \sim \mathrm{Bernoulli}(r_t)` so it is well in :math:`{0, 1}`.

- See https://en.wikipedia.org/wiki/Bernoulli_distribution#Related_distributions
- And https://en.wikipedia.org/wiki/Conjugate_prior#Discrete_distributions

.. [Agrawal12] http://jmlr.org/proceedings/papers/v23/agrawal12/agrawal12.pdf
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Olivier Cappé, Aurélien Garivier, Emilie Kaufmann, Lilian Besson"
__version__ = "0.9"

from random import random
try:
    from numpy.random import beta as betavariate  # Faster! Yes!
except ImportError:
    from random import betavariate
from scipy.special import btdtri

# Local imports
try:
    from .Posterior import Posterior

    from .with_proba import with_proba
except (ImportError, SystemError):
    from Posterior import Posterior

    from with_proba import with_proba


# --- Utility functions


[docs]def bernoulliBinarization(r_t): r""" Return a (random) binarization of a reward :math:`r_t`, in the continuous interval :math:`[0, 1]` as an observation in discrete :math:`{0, 1}`. - Useful to allow to use a Beta posterior for non-Bernoulli experiments, - That way, :class:`Thompson` sampling can be used for any continuous-valued bounded rewards. Examples: >>> import random >>> random.seed(0) >>> bernoulliBinarization(0.3) 1 >>> bernoulliBinarization(0.3) 0 >>> bernoulliBinarization(0.3) 0 >>> bernoulliBinarization(0.3) 0 >>> bernoulliBinarization(0.9) 1 >>> bernoulliBinarization(0.9) 1 >>> bernoulliBinarization(0.9) 1 >>> bernoulliBinarization(0.9) 0 """ if r_t == 0: return 0 # Returns a int! elif r_t == 1: return 1 # Returns a int! else: assert 0 <= r_t <= 1, "Error: only bounded rewards in [0, 1] are supported by this Beta posterior right now." return int(with_proba(r_t))
# --- Class
[docs]class Beta(Posterior): """ Manipulate posteriors of Bernoulli/Beta experiments."""
[docs] def __init__(self, a=1, b=1): r""" Create a Beta posterior :math:`\mathrm{Beta}(\alpha, \beta)` with no observation, i.e., :math:`\alpha = 1` and :math:`\beta = 1` by default.""" assert a >= 0, "Error: parameter 'a' for Beta posterior has to be >= 0." # DEBUG self._a = a assert b >= 0, "Error: parameter 'b' for Beta posterior has to be >= 0." # DEBUG self._b = b self.N = [a, b] #: List of two parameters [a, b]
[docs] def __str__(self): return r"Beta(\alpha={:.3g}, \beta={:.3g})".format(self.N[1], self.N[0])
[docs] def reset(self, a=None, b=None): """Reset alpha and beta, both to 1 as when creating a new default Beta.""" if a is None: a = self._a if b is None: b = self._b self.N = [a, b]
[docs] def sample(self): """Get a random sample from the Beta posterior (using :func:`numpy.random.betavariate`). - Used only by :class:`Thompson` Sampling and :class:`AdBandits` so far. """ return betavariate(self.N[1], self.N[0])
[docs] def quantile(self, p): """Return the p quantile of the Beta posterior (using :func:`scipy.stats.btdtri`). - Used only by :class:`BayesUCB` and :class:`AdBandits` so far. """ return btdtri(self.N[1], self.N[0], p)
# Bug: do not call btdtri with (0.5,0.5,0.5) in scipy version < 0.9 (old)
[docs] def mean(self): """Compute the mean of the Beta posterior (should be useless).""" return self.N[1] / float(sum(self.N))
[docs] def forget(self, obs): """Forget the last observation.""" # print("Info: calling Beta.forget() with obs = {} ...".format(obs)) # DEBUG # FIXED update this code, to accept obs that are FLOAT in [0, 1] and not just in {0, 1}... self.N[bernoulliBinarization(obs)] -= 1
[docs] def update(self, obs): r"""Add an observation. - If obs is 1, update :math:`\alpha` the count of positive observations, - If it is 0, update :math:`\beta` the count of negative observations. .. note:: Otherwise, a trick with :func:`bernoulliBinarization` has to be used. """ # print("Info: calling Beta.update() with obs = {} ...".format(obs)) # DEBUG # FIXED update this code, to accept obs that are FLOAT in [0, 1] and not just in {0, 1}... self.N[bernoulliBinarization(obs)] += 1