Source code for Policies.BayesUCB

# -*- coding: utf-8 -*-
""" The Bayes-UCB policy.

- By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm.
- Reference: [Kaufmann, Cappé & Garivier - AISTATS, 2012]
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Olivier Cappé, Aurélien Garivier, Emilie Kaufmann, Lilian Besson"
__version__ = "0.5"

try:
  from .BayesianIndexPolicy import BayesianIndexPolicy
except ImportError:
  from BayesianIndexPolicy import BayesianIndexPolicy


[docs]class BayesUCB(BayesianIndexPolicy): """ The Bayes-UCB policy. - By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm. -Reference: [Kaufmann, Cappé & Garivier - AISTATS, 2012]. """
[docs] def computeIndex(self, arm): r""" Compute the current index, at time t and after :math:`N_k(t)` pulls of arm k, giving :math:`S_k(t)` rewards of 1, by taking the :math:`1 - \frac{1}{t}` quantile from the Beta posterior: .. math:: I_k(t) = \mathrm{Quantile}\left(\mathrm{Beta}(1 + S_k(t), 1 + N_k(t) - S_k(t)), 1 - \frac{1}{t}\right). """ return self.posterior[arm].quantile(1. - 1. / (1 + self.t))