Source code for Policies.Thompson

# -*- coding: utf-8 -*-
""" The Thompson (Bayesian) index policy.

- By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm.
- Reference: [Thompson - Biometrika, 1933].
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Olivier Cappé, Aurélien Garivier, Emilie Kaufmann, Lilian Besson"
__version__ = "0.9"

try:
    from .BayesianIndexPolicy import BayesianIndexPolicy
except (ImportError, SystemError):
    from BayesianIndexPolicy import BayesianIndexPolicy


[docs]class Thompson(BayesianIndexPolicy): r"""The Thompson (Bayesian) index policy. - By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm. - Prior is initially flat, i.e., :math:`a=\alpha_0=1` and :math:`b=\beta_0=1`. - A non-flat prior for each arm can be given with parameters ``a`` and ``b``, for instance:: nbArms = 2 prior_failures = a = 100 prior_successes = b = 50 policy = Thompson(nbArms, a=a, b=b) np.mean([policy.choice() for _ in range(1000)]) # 0.515 ~= 0.5: each arm has same prior! - A different prior for each arm can be given with parameters ``params_for_each_posterior``, for instance:: nbArms = 2 params0 = { 'a': 10, 'b': 5} # mean 1/3 params1 = { 'a': 5, 'b': 10} # mean 2/3 params = [params0, params1] policy = Thompson(nbArms, params_for_each_posterior=params) np.mean([policy.choice() for _ in range(1000)]) # 0.9719 ~= 1: arm 1 is better than arm 0 ! - Reference: [Thompson - Biometrika, 1933]. """
[docs] def __str__(self): return "Thompson Sampling"
[docs] def computeIndex(self, arm): r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, giving :math:`S_k(t)` rewards of 1, by sampling from the Beta posterior: .. math:: A(t) &\sim U(\arg\max_{1 \leq k \leq K} I_k(t)),\\ I_k(t) &\sim \mathrm{Beta}(1 + \tilde{S_k}(t), 1 + \tilde{N_k}(t) - \tilde{S_k}(t)). """ return self.posterior[arm].sample()