Source code for Policies.kullback

# -*- coding: utf-8 -*-
""" Kullback-Leibler divergence functions and klUCB utilities.

- Faster implementation can be found in a C file, in ``Policies/C``, and should be compiled to speedup computations.
- However, the version here have examples, doctests, and are jit compiled on the fly (with numba, cf. http://numba.pydata.org/).
- Cf. https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
- Reference: [Filippi, Cappé & Garivier - Allerton, 2011](https://arxiv.org/pdf/1004.5229.pdf) and [Garivier & Cappé, 2011](https://arxiv.org/pdf/1102.2490.pdf)


.. warning::

    All functions are *not* vectorized, and assume only one value for each argument.
    If you want vectorized function, use the wrapper :py:class:`numpy.vectorize`:

    >>> import numpy as np
    >>> klBern_vect = np.vectorize(klBern)
    >>> klBern_vect([0.1, 0.5, 0.9], 0.2)  # doctest: +ELLIPSIS
    array([0.036..., 0.223..., 1.145...])
    >>> klBern_vect(0.4, [0.2, 0.3, 0.4])  # doctest: +ELLIPSIS
    array([0.104..., 0.022..., 0...])
    >>> klBern_vect([0.1, 0.5, 0.9], [0.2, 0.3, 0.4])  # doctest: +ELLIPSIS
    array([0.036..., 0.087..., 0.550...])

    For some functions, you would be better off writing a vectorized version manually, for instance if you want to fix a value of some optional parameters:

    >>> # WARNING using np.vectorize gave weird result on klGauss
    >>> # klGauss_vect = np.vectorize(klGauss, excluded="y")
    >>> def klGauss_vect(xs, y, sig2x=0.25):  # vectorized for first input only
    ...    return np.array([klGauss(x, y, sig2x) for x in xs])
    >>> klGauss_vect([-1, 0, 1], 0.1)  # doctest: +ELLIPSIS
    array([2.42, 0.02, 1.62])
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Olivier Cappé, Aurélien Garivier, Lilian Besson"
__version__ = "0.9"

from math import log, sqrt, exp

import numpy as np

# https://www.docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html#scipy.optimize.fixed_point
from scipy import optimize

try:
    from .usenumba import jit  # Import numba.jit or a dummy jit(f)=f
except (ValueError, ImportError, SystemError):
    from usenumba import jit  # Import numba.jit or a dummy jit(f)=f


eps = 1e-15  #: Threshold value: everything in [0, 1] is truncated to [eps, 1 - eps]


# --- Simple Kullback-Leibler divergence for known distributions


[docs]@jit
def klBern(x, y):
    r""" Kullback-Leibler divergence for Bernoulli distributions. https://en.wikipedia.org/wiki/Bernoulli_distribution#Kullback.E2.80.93Leibler_divergence

    .. math:: \mathrm{KL}(\mathcal{B}(x), \mathcal{B}(y)) = x \log(\frac{x}{y}) + (1-x) \log(\frac{1-x}{1-y}).

    >>> klBern(0.5, 0.5)
    0.0
    >>> klBern(0.1, 0.9)  # doctest: +ELLIPSIS
    1.757779...
    >>> klBern(0.9, 0.1)  # And this KL is symmetric  # doctest: +ELLIPSIS
    1.757779...
    >>> klBern(0.4, 0.5)  # doctest: +ELLIPSIS
    0.020135...
    >>> klBern(0.01, 0.99)  # doctest: +ELLIPSIS
    4.503217...

    - Special values:

    >>> klBern(0, 1)  # Should be +inf, but 0 --> eps, 1 --> 1 - eps  # doctest: +ELLIPSIS
    34.539575...
    """
    x = min(max(x, eps), 1 - eps)
    y = min(max(y, eps), 1 - eps)
    return x * log(x / y) + (1 - x) * log((1 - x) / (1 - y))


[docs]@jit
def klBin(x, y, n):
    r""" Kullback-Leibler divergence for Binomial distributions. https://math.stackexchange.com/questions/320399/kullback-leibner-divergence-of-binomial-distributions

    - It is simply the n times :func:`klBern` on x and y.

    .. math:: \mathrm{KL}(\mathrm{Bin}(x, n), \mathrm{Bin}(y, n)) = n \times \left(x \log(\frac{x}{y}) + (1-x) \log(\frac{1-x}{1-y}) \right).

    .. warning:: The two distributions must have the same parameter n, and x, y are p, q in (0, 1).

    >>> klBin(0.5, 0.5, 10)
    0.0
    >>> klBin(0.1, 0.9, 10)  # doctest: +ELLIPSIS
    17.57779...
    >>> klBin(0.9, 0.1, 10)  # And this KL is symmetric  # doctest: +ELLIPSIS
    17.57779...
    >>> klBin(0.4, 0.5, 10)  # doctest: +ELLIPSIS
    0.20135...
    >>> klBin(0.01, 0.99, 10)  # doctest: +ELLIPSIS
    45.03217...

    - Special values:

    >>> klBin(0, 1, 10)  # Should be +inf, but 0 --> eps, 1 --> 1 - eps  # doctest: +ELLIPSIS
    345.39575...
    """
    x = min(max(x, eps), 1 - eps)
    y = min(max(y, eps), 1 - eps)
    return n * (x * log(x / y) + (1 - x) * log((1 - x) / (1 - y)))


[docs]@jit
def klPoisson(x, y):
    r""" Kullback-Leibler divergence for Poison distributions. https://en.wikipedia.org/wiki/Poisson_distribution#Kullback.E2.80.93Leibler_divergence

    .. math:: \mathrm{KL}(\mathrm{Poisson}(x), \mathrm{Poisson}(y)) = y - x + x \times \log(\frac{x}{y}).

    >>> klPoisson(3, 3)
    0.0
    >>> klPoisson(2, 1)  # doctest: +ELLIPSIS
    0.386294...
    >>> klPoisson(1, 2)  # And this KL is non-symmetric  # doctest: +ELLIPSIS
    0.306852...
    >>> klPoisson(3, 6)  # doctest: +ELLIPSIS
    0.920558...
    >>> klPoisson(6, 8)  # doctest: +ELLIPSIS
    0.273907...

    - Special values:

    >>> klPoisson(1, 0)  # Should be +inf, but 0 --> eps, 1 --> 1 - eps  # doctest: +ELLIPSIS
    33.538776...
    >>> klPoisson(0, 0)
    0.0
    """
    x = max(x, eps)
    y = max(y, eps)
    return y - x + x * log(x / y)


inf = float('+inf')

[docs]@jit
def klExp(x, y):
    r""" Kullback-Leibler divergence for exponential distributions. https://en.wikipedia.org/wiki/Exponential_distribution#Kullback.E2.80.93Leibler_divergence

    .. math::

        \mathrm{KL}(\mathrm{Exp}(x), \mathrm{Exp}(y)) = \begin{cases}
        \frac{x}{y} - 1 - \log(\frac{x}{y}) & \text{if} x > 0, y > 0\\
        +\infty & \text{otherwise}
        \end{cases}

    >>> klExp(3, 3)
    0.0
    >>> klExp(3, 6)  # doctest: +ELLIPSIS
    0.193147...
    >>> klExp(1, 2)  # Only the proportion between x and y is used  # doctest: +ELLIPSIS
    0.193147...
    >>> klExp(2, 1)  # And this KL is non-symmetric  # doctest: +ELLIPSIS
    0.306852...
    >>> klExp(4, 2)  # Only the proportion between x and y is used  # doctest: +ELLIPSIS
    0.306852...
    >>> klExp(6, 8)  # doctest: +ELLIPSIS
    0.037682...

    - x, y have to be positive:

    >>> klExp(-3, 2)
    inf
    >>> klExp(3, -2)
    inf
    >>> klExp(-3, -2)
    inf
    """
    if x <= 0 or y <= 0:
        return inf
    else:
        x = max(x, eps)
        y = max(y, eps)
        return x / y - 1 - log(x / y)


[docs]@jit
def klGamma(x, y, a=1):
    r""" Kullback-Leibler divergence for gamma distributions. https://en.wikipedia.org/wiki/Gamma_distribution#Kullback.E2.80.93Leibler_divergence

    - It is simply the a times :func:`klExp` on x and y.

    .. math::

        \mathrm{KL}(\Gamma(x, a), \Gamma(y, a)) = \begin{cases}
        a \times \left( \frac{x}{y} - 1 - \log(\frac{x}{y}) \right) & \text{if} x > 0, y > 0\\
        +\infty & \text{otherwise}
        \end{cases}

    .. warning:: The two distributions must have the same parameter a.

    >>> klGamma(3, 3)
    0.0
    >>> klGamma(3, 6)  # doctest: +ELLIPSIS
    0.193147...
    >>> klGamma(1, 2)  # Only the proportion between x and y is used  # doctest: +ELLIPSIS
    0.193147...
    >>> klGamma(2, 1)  # And this KL is non-symmetric  # doctest: +ELLIPSIS
    0.306852...
    >>> klGamma(4, 2)  # Only the proportion between x and y is used  # doctest: +ELLIPSIS
    0.306852...
    >>> klGamma(6, 8)  # doctest: +ELLIPSIS
    0.037682...

    - x, y have to be positive:

    >>> klGamma(-3, 2)
    inf
    >>> klGamma(3, -2)
    inf
    >>> klGamma(-3, -2)
    inf
    """
    if x <= 0 or y <= 0:
        return inf
    else:
        x = max(x, eps)
        y = max(y, eps)
        return a * (x / y - 1 - log(x / y))


[docs]@jit
def klNegBin(x, y, r=1):
    r""" Kullback-Leibler divergence for negative binomial distributions. https://en.wikipedia.org/wiki/Negative_binomial_distribution

    .. math:: \mathrm{KL}(\mathrm{NegBin}(x, r), \mathrm{NegBin}(y, r)) = r \times \log((r + x) / (r + y)) - x \times \log(y \times (r + x) / (x \times (r + y))).

    .. warning:: The two distributions must have the same parameter r.

    >>> klNegBin(0.5, 0.5)
    0.0
    >>> klNegBin(0.1, 0.9)  # doctest: +ELLIPSIS
    -0.711611...
    >>> klNegBin(0.9, 0.1)  # And this KL is non-symmetric  # doctest: +ELLIPSIS
    2.0321564...
    >>> klNegBin(0.4, 0.5)  # doctest: +ELLIPSIS
    -0.130653...
    >>> klNegBin(0.01, 0.99)  # doctest: +ELLIPSIS
    -0.717353...

    - Special values:

    >>> klBern(0, 1)  # Should be +inf, but 0 --> eps, 1 --> 1 - eps  # doctest: +ELLIPSIS
    34.539575...

    - With other values for `r`:

    >>> klNegBin(0.5, 0.5, r=2)
    0.0
    >>> klNegBin(0.1, 0.9, r=2)  # doctest: +ELLIPSIS
    -0.832991...
    >>> klNegBin(0.1, 0.9, r=4)  # doctest: +ELLIPSIS
    -0.914890...
    >>> klNegBin(0.9, 0.1, r=2)  # And this KL is non-symmetric  # doctest: +ELLIPSIS
    2.3325528...
    >>> klNegBin(0.4, 0.5, r=2)  # doctest: +ELLIPSIS
    -0.154572...
    >>> klNegBin(0.01, 0.99, r=2)  # doctest: +ELLIPSIS
    -0.836257...
    """
    x = max(x, eps)
    y = max(y, eps)
    return r * log((r + x) / (r + y)) - x * log(y * (r + x) / (x * (r + y)))


[docs]@jit
def klGauss(x, y, sig2x=0.25, sig2y=None):
    r""" Kullback-Leibler divergence for Gaussian distributions of means ``x`` and ``y`` and variances ``sig2x`` and ``sig2y``, :math:`\nu_1 = \mathcal{N}(x, \sigma_x^2)` and :math:`\nu_2 = \mathcal{N}(y, \sigma_x^2)`:

    .. math:: \mathrm{KL}(\nu_1, \nu_2) = \frac{(x - y)^2}{2 \sigma_y^2} + \frac{1}{2}\left( \frac{\sigma_x^2}{\sigma_y^2} - 1 \log\left(\frac{\sigma_x^2}{\sigma_y^2}\right) \right).

    See https://en.wikipedia.org/wiki/Normal_distribution#Other_properties

    - By default, sig2y is assumed to be sig2x (same variance).

    .. warning:: The C version does not support different variances.

    >>> klGauss(3, 3)
    0.0
    >>> klGauss(3, 6)
    18.0
    >>> klGauss(1, 2)
    2.0
    >>> klGauss(2, 1)  # And this KL is symmetric
    2.0
    >>> klGauss(4, 2)
    8.0
    >>> klGauss(6, 8)
    8.0

    - x, y can be negative:

    >>> klGauss(-3, 2)
    50.0
    >>> klGauss(3, -2)
    50.0
    >>> klGauss(-3, -2)
    2.0
    >>> klGauss(3, 2)
    2.0

    - With other values for `sig2x`:

    >>> klGauss(3, 3, sig2x=10)
    0.0
    >>> klGauss(3, 6, sig2x=10)
    0.45
    >>> klGauss(1, 2, sig2x=10)
    0.05
    >>> klGauss(2, 1, sig2x=10)  # And this KL is symmetric
    0.05
    >>> klGauss(4, 2, sig2x=10)
    0.2
    >>> klGauss(6, 8, sig2x=10)
    0.2

    - With different values for `sig2x` and `sig2y`:

    >>> klGauss(0, 0, sig2x=0.25, sig2y=0.5)  # doctest: +ELLIPSIS
    -0.0284...
    >>> klGauss(0, 0, sig2x=0.25, sig2y=1.0)  # doctest: +ELLIPSIS
    0.2243...
    >>> klGauss(0, 0, sig2x=0.5, sig2y=0.25)  # not symmetric here!  # doctest: +ELLIPSIS
    1.1534...

    >>> klGauss(0, 1, sig2x=0.25, sig2y=0.5)  # doctest: +ELLIPSIS
    0.9715...
    >>> klGauss(0, 1, sig2x=0.25, sig2y=1.0)  # doctest: +ELLIPSIS
    0.7243...
    >>> klGauss(0, 1, sig2x=0.5, sig2y=0.25)  # not symmetric here!  # doctest: +ELLIPSIS
    3.1534...

    >>> klGauss(1, 0, sig2x=0.25, sig2y=0.5)  # doctest: +ELLIPSIS
    0.9715...
    >>> klGauss(1, 0, sig2x=0.25, sig2y=1.0)  # doctest: +ELLIPSIS
    0.7243...
    >>> klGauss(1, 0, sig2x=0.5, sig2y=0.25)  # not symmetric here!  # doctest: +ELLIPSIS
    3.1534...

    .. warning:: Using :class:`Policies.klUCB` (and variants) with :func:`klGauss` is equivalent to use :class:`Policies.UCB`, so prefer the simpler version.
    """
    if sig2y is None or - eps < (sig2y - sig2x) < eps:
        return (x - y) ** 2 / (2. * sig2x)
    else:
        return (x - y) ** 2 / (2. * sig2y) + 0.5 * ((sig2x/sig2y)**2 - 1 - log(sig2x/sig2y))


# --- KL functions, for the KL-UCB policy

[docs]@jit
def klucb(x, d, kl, upperbound,
        precision=1e-6, lowerbound=float('-inf'), max_iterations=50,
    ):
    r""" The generic KL-UCB index computation.

    - ``x``: value of the cum reward,
    - ``d``: upper bound on the divergence,
    - ``kl``: the KL divergence to be used (:func:`klBern`, :func:`klGauss`, etc),
    - ``upperbound``, ``lowerbound=float('-inf')``: the known bound of the values ``x``,
    - ``precision=1e-6``: the threshold from where to stop the research,
    - ``max_iterations=50``: max number of iterations of the loop (safer to bound it to reduce time complexity).

    .. math::

        \mathrm{klucb}(x, d) \simeq \sup_{\mathrm{lowerbound} \leq y \leq \mathrm{upperbound}} \{ y : \mathrm{kl}(x, y) < d \}.

    .. note:: It uses a **bisection search**, and one call to ``kl`` for each step of the bisection search.

    For example, for :func:`klucbBern`, the two steps are to first compute an upperbound (as precise as possible) and the compute the kl-UCB index:

    >>> x, d = 0.9, 0.2   # mean x, exploration term d
    >>> upperbound = min(1., klucbGauss(x, d, sig2x=0.25))  # variance 1/4 for [0,1] bounded distributions
    >>> upperbound  # doctest: +ELLIPSIS
    1.0
    >>> klucb(x, d, klBern, upperbound, lowerbound=0, precision=1e-3, max_iterations=10)  # doctest: +ELLIPSIS
    0.9941...
    >>> klucb(x, d, klBern, upperbound, lowerbound=0, precision=1e-6, max_iterations=10)  # doctest: +ELLIPSIS
    0.9944...
    >>> klucb(x, d, klBern, upperbound, lowerbound=0, precision=1e-3, max_iterations=50)  # doctest: +ELLIPSIS
    0.9941...
    >>> klucb(x, d, klBern, upperbound, lowerbound=0, precision=1e-6, max_iterations=100)  # more and more precise!  # doctest: +ELLIPSIS
    0.994489...

    .. note:: See below for more examples for different KL divergence functions.
    """
    value = max(x, lowerbound)
    u = upperbound
    _count_iteration = 0
    while _count_iteration < max_iterations and u - value > precision:
        _count_iteration += 1
        m = (value + u) * 0.5
        if kl(x, m) > d:
            u = m
        else:
            value = m
    return (value + u) * 0.5


[docs]@jit
def klucbBern(x, d, precision=1e-6):
    """ KL-UCB index computation for Bernoulli distributions, using :func:`klucb`.

    - Influence of x:

    >>> klucbBern(0.1, 0.2)  # doctest: +ELLIPSIS
    0.378391...
    >>> klucbBern(0.5, 0.2)  # doctest: +ELLIPSIS
    0.787088...
    >>> klucbBern(0.9, 0.2)  # doctest: +ELLIPSIS
    0.994489...

    - Influence of d:

    >>> klucbBern(0.1, 0.4)  # doctest: +ELLIPSIS
    0.519475...
    >>> klucbBern(0.1, 0.9)  # doctest: +ELLIPSIS
    0.734714...

    >>> klucbBern(0.5, 0.4)  # doctest: +ELLIPSIS
    0.871035...
    >>> klucbBern(0.5, 0.9)  # doctest: +ELLIPSIS
    0.956809...

    >>> klucbBern(0.9, 0.4)  # doctest: +ELLIPSIS
    0.999285...
    >>> klucbBern(0.9, 0.9)  # doctest: +ELLIPSIS
    0.999995...
    """
    upperbound = min(1., klucbGauss(x, d, sig2x=0.25, precision=precision))  # variance 1/4 for [0,1] bounded distributions
    # upperbound = min(1., klucbPoisson(x, d))  # also safe, and better ?
    return klucb(x, d, klBern, upperbound, precision)


[docs]@jit
def klucbGauss(x, d, sig2x=0.25, precision=0.):
    """ KL-UCB index computation for Gaussian distributions.

    - Note that it does not require any search.

    .. warning:: it works only if the good variance constant is given.

    - Influence of x:

    >>> klucbGauss(0.1, 0.2)  # doctest: +ELLIPSIS
    0.416227...
    >>> klucbGauss(0.5, 0.2)  # doctest: +ELLIPSIS
    0.816227...
    >>> klucbGauss(0.9, 0.2)  # doctest: +ELLIPSIS
    1.216227...

    - Influence of d:

    >>> klucbGauss(0.1, 0.4)  # doctest: +ELLIPSIS
    0.547213...
    >>> klucbGauss(0.1, 0.9)  # doctest: +ELLIPSIS
    0.770820...

    >>> klucbGauss(0.5, 0.4)  # doctest: +ELLIPSIS
    0.947213...
    >>> klucbGauss(0.5, 0.9)  # doctest: +ELLIPSIS
    1.170820...

    >>> klucbGauss(0.9, 0.4)  # doctest: +ELLIPSIS
    1.347213...
    >>> klucbGauss(0.9, 0.9)  # doctest: +ELLIPSIS
    1.570820...

    .. warning:: Using :class:`Policies.klUCB` (and variants) with :func:`klucbGauss` is equivalent to use :class:`Policies.UCB`, so prefer the simpler version.
    """
    return x + sqrt(abs(2 * sig2x * d))


[docs]@jit
def klucbPoisson(x, d, precision=1e-6):
    """ KL-UCB index computation for Poisson distributions, using :func:`klucb`.

    - Influence of x:

    >>> klucbPoisson(0.1, 0.2)  # doctest: +ELLIPSIS
    0.450523...
    >>> klucbPoisson(0.5, 0.2)  # doctest: +ELLIPSIS
    1.089376...
    >>> klucbPoisson(0.9, 0.2)  # doctest: +ELLIPSIS
    1.640112...

    - Influence of d:

    >>> klucbPoisson(0.1, 0.4)  # doctest: +ELLIPSIS
    0.693684...
    >>> klucbPoisson(0.1, 0.9)  # doctest: +ELLIPSIS
    1.252796...

    >>> klucbPoisson(0.5, 0.4)  # doctest: +ELLIPSIS
    1.422933...
    >>> klucbPoisson(0.5, 0.9)  # doctest: +ELLIPSIS
    2.122985...

    >>> klucbPoisson(0.9, 0.4)  # doctest: +ELLIPSIS
    2.033691...
    >>> klucbPoisson(0.9, 0.9)  # doctest: +ELLIPSIS
    2.831573...
    """
    upperbound = x + d + sqrt(d * d + 2 * x * d)  # looks safe, to check: left (Gaussian) tail of Poisson dev
    return klucb(x, d, klPoisson, upperbound, precision)


[docs]@jit
def klucbExp(x, d, precision=1e-6):
    """ KL-UCB index computation for exponential distributions, using :func:`klucb`.

    - Influence of x:

    >>> klucbExp(0.1, 0.2)  # doctest: +ELLIPSIS
    0.202741...
    >>> klucbExp(0.5, 0.2)  # doctest: +ELLIPSIS
    1.013706...
    >>> klucbExp(0.9, 0.2)  # doctest: +ELLIPSIS
    1.824671...

    - Influence of d:

    >>> klucbExp(0.1, 0.4)  # doctest: +ELLIPSIS
    0.285792...
    >>> klucbExp(0.1, 0.9)  # doctest: +ELLIPSIS
    0.559088...

    >>> klucbExp(0.5, 0.4)  # doctest: +ELLIPSIS
    1.428962...
    >>> klucbExp(0.5, 0.9)  # doctest: +ELLIPSIS
    2.795442...

    >>> klucbExp(0.9, 0.4)  # doctest: +ELLIPSIS
    2.572132...
    >>> klucbExp(0.9, 0.9)  # doctest: +ELLIPSIS
    5.031795...
    """
    if d < 0.77:  # XXX where does this value come from?
        upperbound = x / (1 + 2. / 3 * d - sqrt(4. / 9 * d * d + 2 * d))
        # safe, klexp(x,y) >= e^2/(2*(1-2e/3)) if x=y(1-e)
    else:
        upperbound = x * exp(d + 1)
    if d > 1.61:  # XXX where does this value come from?
        lowerbound = x * exp(d)
    else:
        lowerbound = x / (1 + d - sqrt(d * d + 2 * d))
    return klucb(x, d, klGamma, upperbound, precision, lowerbound)


# FIXME this one is wrong!
[docs]@jit
def klucbGamma(x, d, precision=1e-6):
    """ KL-UCB index computation for Gamma distributions, using :func:`klucb`.

    - Influence of x:

    >>> klucbGamma(0.1, 0.2)  # doctest: +ELLIPSIS
    0.202...
    >>> klucbGamma(0.5, 0.2)  # doctest: +ELLIPSIS
    1.013...
    >>> klucbGamma(0.9, 0.2)  # doctest: +ELLIPSIS
    1.824...

    - Influence of d:

    >>> klucbGamma(0.1, 0.4)  # doctest: +ELLIPSIS
    0.285...
    >>> klucbGamma(0.1, 0.9)  # doctest: +ELLIPSIS
    0.559...

    >>> klucbGamma(0.5, 0.4)  # doctest: +ELLIPSIS
    1.428...
    >>> klucbGamma(0.5, 0.9)  # doctest: +ELLIPSIS
    2.795...

    >>> klucbGamma(0.9, 0.4)  # doctest: +ELLIPSIS
    2.572...
    >>> klucbGamma(0.9, 0.9)  # doctest: +ELLIPSIS
    5.031...
    """
    if d < 0.77:  # XXX where does this value come from?
        upperbound = x / (1 + 2. / 3 * d - sqrt(4. / 9 * d * d + 2 * d))
        # safe, klexp(x,y) >= e^2/(2*(1-2e/3)) if x=y(1-e)
    else:
        upperbound = x * exp(d + 1)
    if d > 1.61:  # XXX where does this value come from?
        lowerbound = x * exp(d)
    else:
        lowerbound = x / (1 + d - sqrt(d * d + 2 * d))
    # FIXME specify the value for a !
    return klucb(x, d, klGamma, max(upperbound, 1e2), min(-1e2, lowerbound), precision)


# --- KL functions, for the KL Lower Confidence Bound

[docs]@jit
def kllcb(x, d, kl, lowerbound,
        precision=1e-6, upperbound=float('+inf'), max_iterations=50,
    ):
    r""" The generic KL-LCB index computation.

    - ``x``: value of the cum reward,
    - ``d``: lower bound on the divergence,
    - ``kl``: the KL divergence to be used (:func:`klBern`, :func:`klGauss`, etc),
    - ``lowerbound``, ``upperbound=float('-inf')``: the known bound of the values ``x``,
    - ``precision=1e-6``: the threshold from where to stop the research,
    - ``max_iterations=50``: max number of iterations of the loop (safer to bound it to reduce time complexity).

    .. math::

        \mathrm{kllcb}(x, d) \simeq \inf_{\mathrm{lowerbound} \leq y \leq \mathrm{upperbound}} \{ y : \mathrm{kl}(x, y) > d \}.

    .. note:: It uses a **bisection search**, and one call to ``kl`` for each step of the bisection search.

    For example, for :func:`kllcbBern`, the two steps are to first compute an upperbound (as precise as possible) and the compute the kl-UCB index:

    >>> x, d = 0.9, 0.2   # mean x, exploration term d
    >>> lowerbound = max(0., kllcbGauss(x, d, sig2x=0.25))  # variance 1/4 for [0,1] bounded distributions
    >>> lowerbound  # doctest: +ELLIPSIS
    0.5837...
    >>> kllcb(x, d, klBern, lowerbound, upperbound=0, precision=1e-3, max_iterations=10)  # doctest: +ELLIPSIS
    0.29...
    >>> kllcb(x, d, klBern, lowerbound, upperbound=0, precision=1e-6, max_iterations=10)  # doctest: +ELLIPSIS
    0.29188...
    >>> kllcb(x, d, klBern, lowerbound, upperbound=0, precision=1e-3, max_iterations=50)  # doctest: +ELLIPSIS
    0.291886...
    >>> kllcb(x, d, klBern, lowerbound, upperbound=0, precision=1e-6, max_iterations=100)  # more and more precise!  # doctest: +ELLIPSIS
    0.29188611...

    .. note:: See below for more examples for different KL divergence functions.
    """
    value = min(x, upperbound)
    l = lowerbound
    _count_iteration = 0
    while _count_iteration < max_iterations and value - l > precision:
        _count_iteration += 1
        m = (value + l) * 0.5
        if kl(x, m) > d:
            l = m
        else:
            value = m
    return (value + l) * 0.5


[docs]@jit
def kllcbBern(x, d, precision=1e-6):
    """ KL-LCB index computation for Bernoulli distributions, using :func:`kllcb`.

    - Influence of x:

    >>> kllcbBern(0.1, 0.2)  # doctest: +ELLIPSIS
    0.09999...
    >>> kllcbBern(0.5, 0.2)  # doctest: +ELLIPSIS
    0.49999...
    >>> kllcbBern(0.9, 0.2)  # doctest: +ELLIPSIS
    0.89999...

    - Influence of d:

    >>> kllcbBern(0.1, 0.4)  # doctest: +ELLIPSIS
    0.09999...
    >>> kllcbBern(0.1, 0.9)  # doctest: +ELLIPSIS
    0.09999...

    >>> kllcbBern(0.5, 0.4)  # doctest: +ELLIPSIS
    0.4999...
    >>> kllcbBern(0.5, 0.9)  # doctest: +ELLIPSIS
    0.4999...

    >>> kllcbBern(0.9, 0.4)  # doctest: +ELLIPSIS
    0.8999...
    >>> kllcbBern(0.9, 0.9)  # doctest: +ELLIPSIS
    0.8999...
    """
    lowerbound = max(0., kllcbGauss(x, d, sig2x=0.25, precision=precision))  # variance 1/4 for [0,1] bounded distributions
    # lowerbound = max(0., kllcbPoisson(x, d))  # also safe, and better ?
    return kllcb(x, d, klBern, lowerbound, precision)


[docs]@jit
def kllcbGauss(x, d, sig2x=0.25, precision=0.):
    """ KL-LCB index computation for Gaussian distributions.

    - Note that it does not require any search.

    .. warning:: it works only if the good variance constant is given.

    - Influence of x:

    >>> kllcbGauss(0.1, 0.2)  # doctest: +ELLIPSIS
    -0.21622...
    >>> kllcbGauss(0.5, 0.2)  # doctest: +ELLIPSIS
    0.18377...
    >>> kllcbGauss(0.9, 0.2)  # doctest: +ELLIPSIS
    0.58377...

    - Influence of d:

    >>> kllcbGauss(0.1, 0.4)  # doctest: +ELLIPSIS
    -0.3472...
    >>> kllcbGauss(0.1, 0.9)  # doctest: +ELLIPSIS
    -0.5708...

    >>> kllcbGauss(0.5, 0.4)  # doctest: +ELLIPSIS
    0.0527...
    >>> kllcbGauss(0.5, 0.9)  # doctest: +ELLIPSIS
    -0.1708...

    >>> kllcbGauss(0.9, 0.4)  # doctest: +ELLIPSIS
    0.4527...
    >>> kllcbGauss(0.9, 0.9)  # doctest: +ELLIPSIS
    0.2291...

    .. warning:: Using :class:`Policies.kllCB` (and variants) with :func:`kllcbGauss` is equivalent to use :class:`Policies.UCB`, so prefer the simpler version.
    """
    return x - sqrt(abs(2 * sig2x * d))


[docs]@jit
def kllcbPoisson(x, d, precision=1e-6):
    """ KL-LCB index computation for Poisson distributions, using :func:`kllcb`.

    - Influence of x:

    >>> kllcbPoisson(0.1, 0.2)  # doctest: +ELLIPSIS
    0.09999...
    >>> kllcbPoisson(0.5, 0.2)  # doctest: +ELLIPSIS
    0.49999...
    >>> kllcbPoisson(0.9, 0.2)  # doctest: +ELLIPSIS
    0.89999...

    - Influence of d:

    >>> kllcbPoisson(0.1, 0.4)  # doctest: +ELLIPSIS
    0.09999...
    >>> kllcbPoisson(0.1, 0.9)  # doctest: +ELLIPSIS
    0.09999...

    >>> kllcbPoisson(0.5, 0.4)  # doctest: +ELLIPSIS
    0.49999...
    >>> kllcbPoisson(0.5, 0.9)  # doctest: +ELLIPSIS
    0.49999...

    >>> kllcbPoisson(0.9, 0.4)  # doctest: +ELLIPSIS
    0.89999...
    >>> kllcbPoisson(0.9, 0.9)  # doctest: +ELLIPSIS
    0.89999...
    """
    lowerbound = x + d - sqrt(d * d + 2 * x * d)  # looks safe, to check: left (Gaussian) tail of Poisson dev
    return kllcb(x, d, klPoisson, lowerbound, precision)


[docs]@jit
def kllcbExp(x, d, precision=1e-6):
    """ KL-LCB index computation for exponential distributions, using :func:`kllcb`.

    - Influence of x:

    >>> kllcbExp(0.1, 0.2)  # doctest: +ELLIPSIS
    0.15267...
    >>> kllcbExp(0.5, 0.2)  # doctest: +ELLIPSIS
    0.7633...
    >>> kllcbExp(0.9, 0.2)  # doctest: +ELLIPSIS
    1.3740...

    - Influence of d:

    >>> kllcbExp(0.1, 0.4)  # doctest: +ELLIPSIS
    0.2000...
    >>> kllcbExp(0.1, 0.9)  # doctest: +ELLIPSIS
    0.3842...

    >>> kllcbExp(0.5, 0.4)  # doctest: +ELLIPSIS
    1.0000...
    >>> kllcbExp(0.5, 0.9)  # doctest: +ELLIPSIS
    1.9214...

    >>> kllcbExp(0.9, 0.4)  # doctest: +ELLIPSIS
    1.8000...
    >>> kllcbExp(0.9, 0.9)  # doctest: +ELLIPSIS
    3.4586...
    """
    if d < 0.77:  # XXX where does this value come from?
        lowerbound = x / (1 + 2. / 3 * d - sqrt(4. / 9 * d * d + 2 * d))
        # safe, klexp(x,y) >= e^2/(2*(1-2e/3)) if x=y(1-e)
    else:
        lowerbound = x * exp(d + 1)
    if d > 1.61:  # XXX where does this value come from?
        upperbound = x * exp(d)
    else:
        upperbound = x / (1 + d - sqrt(d * d + 2 * d))
    return kllcb(x, d, klGamma, lowerbound, precision, upperbound)


# # FIXME this one is wrong!
# @jit
# def kllcbGamma(x, d, precision=1e-6):
#     """ KL-LCB index computation for Gamma distributions, using :func:`kllcb`.

#     - Influence of x:

#     >>> kllcbGamma(0.1, 0.2)  # doctest: +ELLIPSIS
#     0.202...
#     >>> kllcbGamma(0.5, 0.2)  # doctest: +ELLIPSIS
#     1.013...
#     >>> kllcbGamma(0.9, 0.2)  # doctest: +ELLIPSIS
#     1.824...

#     - Influence of d:

#     >>> kllcbGamma(0.1, 0.4)  # doctest: +ELLIPSIS
#     0.285...
#     >>> kllcbGamma(0.1, 0.9)  # doctest: +ELLIPSIS
#     0.559...

#     >>> kllcbGamma(0.5, 0.4)  # doctest: +ELLIPSIS
#     1.428...
#     >>> kllcbGamma(0.5, 0.9)  # doctest: +ELLIPSIS
#     2.795...

#     >>> kllcbGamma(0.9, 0.4)  # doctest: +ELLIPSIS
#     2.572...
#     >>> kllcbGamma(0.9, 0.9)  # doctest: +ELLIPSIS
#     5.031...
#     """
#     if d < 0.77:  # XXX where does this value come from?
#         lowerbound = x / (1 + 2. / 3 * d - sqrt(4. / 9 * d * d + 2 * d))
#         # safe, klexp(x,y) >= e^2/(2*(1-2e/3)) if x=y(1-e)
#     else:
#         lowerbound = x * exp(d + 1)
#     if d > 1.61:  # XXX where does this value come from?
#         upperbound = x * exp(d)
#     else:
#         upperbound = x / (1 + d - sqrt(d * d + 2 * d))
#     # FIXME specify the value for a !
#     return kllcb(x, d, klGamma, min(lowerbound, -1e2), precision, max(1e2, upperbound))


# --- max EV functions

#@jit
[docs]def maxEV(p, V, klMax):
    r""" Maximize expectation of :math:`V` with respect to :math:`q` st. :math:`\mathrm{KL}(p, q) < \text{klMax}`.

    - Input args.: p, V, klMax.
    - Reference: Section 3.2 of [Filippi, Cappé & Garivier - Allerton, 2011](https://arxiv.org/pdf/1004.5229.pdf).
    """
    Uq = np.zeros(len(p))
    Kb = p > 0.
    K = ~Kb
    if any(K):
        # Do we need to put some mass on a point where p is zero?
        # If yes, this has to be on one which maximizes V.
        eta = np.max(V[K])
        J = K & (V == eta)
        if eta > np.max(V[Kb]):
            y = np.dot(p[Kb], np.log(eta - V[Kb])) + log(np.dot(p[Kb], (1. / (eta - V[Kb]))))
            # print("eta = ", eta, ", y = ", y)
            if y < klMax:
                rb = exp(y - klMax)
                Uqtemp = p[Kb] / (eta - V[Kb])
                Uq[Kb] = rb * Uqtemp / np.sum(Uqtemp)
                Uq[J] = (1. - rb) / np.sum(J)
                # or j = min([j for j in range(k) if J[j]])
                # Uq[j] = r
                return Uq
    # Here, only points where p is strictly positive (in Kb) will receive non-zero mass.
    if any(np.abs(V[Kb] - V[Kb][0]) > 1e-8):
        eta = reseqp(p[Kb], V[Kb], klMax)  # (eta = nu in the article)
        Uq = p / (eta - V)
        Uq = Uq / np.sum(Uq)
    else:
        # Case where all values in V(Kb) are almost identical.
        Uq[Kb] = 1.0 / len(Kb)
    return Uq


#@jit
[docs]def reseqp(p, V, klMax, max_iterations=50):
    """ Solve ``f(reseqp(p, V, klMax)) = klMax``, using Newton method.

    .. note:: This is a subroutine of :func:`maxEV`.

    - Reference: Eq. (4) in Section 3.2 of [Filippi, Cappé & Garivier - Allerton, 2011](https://arxiv.org/pdf/1004.5229.pdf).

    .. warning:: `np.dot` is very slow!
    """
    MV = np.max(V)
    mV = np.min(V)
    value = MV + 0.1
    tol = 1e-4
    if MV < mV + tol:
        return float('inf')
    u = np.dot(p, (1 / (value - V)))
    y = np.dot(p, np.log(value - V)) + log(u) - klMax
    print("value =", value, ", y = ", y)  # DEBUG
    _count_iteration = 0
    while _count_iteration < max_iterations and np.abs(y) > tol:
        _count_iteration += 1
        yp = u - np.dot(p, (1 / (value - V)**2)) / u  # derivative
        value -= y / yp
        print("value = ", value)  # DEBUG  # newton iteration
        if value < MV:
            value = (value + y / yp + MV) / 2  # unlikely, but not impossible
        u = np.dot(p, (1 / (value - V)))
        y = np.dot(p, np.log(value - V)) + np.log(u) - klMax
        print("value = ", value, ", y = ", y)  # DEBUG  # function
    return value


[docs]def reseqp2(p, V, klMax):
    """ Solve f(reseqp(p, V, klMax)) = klMax, using a blackbox minimizer, from scipy.optimize.

    - FIXME it does not work well yet!

    .. note:: This is a subroutine of :func:`maxEV`.

    - Reference: Eq. (4) in Section 3.2 of [Filippi, Cappé & Garivier - Allerton, 2011].

    .. warning:: `np.dot` is very slow!
    """
    MV = np.max(V)
    mV = np.min(V)
    tol = 1e-4
    value0 = mV + 0.1

    #@jit  # TODO try numba.jit() on this function
    def f(value):
        """ Function fo to minimize."""
        if MV < mV + tol:
            y = float('inf')
        else:
            u = np.dot(p, (1 / (value - V)))
            y = np.dot(p, np.log(value - V)) + log(u)
        return np.abs(y - klMax)

    res = optimize.minimize(f, value0)
    print("scipy.optimize.minimize returned", res)
    return res.x if hasattr(res, 'x') else res


# --- Debugging

if __name__ == "__main__":
    # Code for debugging purposes.
    from doctest import testmod
    print("\nTesting automatically all the docstring written in each functions of this module :")
    testmod(verbose=True)

    # import matplotlib.pyplot as plt
    # t = np.linspace(0, 1)
    # plt.subplot(2, 1, 1)
    # plt.plot(t, kl(t, 0.6))
    # plt.subplot(2, 1, 2)
    # d = np.linspace(0, 1, 100)
    # plt.plot(d, [klucb(0.3, dd) for dd in d])
    # plt.show()

    print("\nklucbGauss(0.9, 0.2) =", klucbGauss(0.9, 0.2))
    print("klucbBern(0.9, 0.2) =", klucbBern(0.9, 0.2))
    print("klucbPoisson(0.9, 0.2) =", klucbPoisson(0.9, 0.2))

    p = np.array([0.5, 0.5])
    print("\np =", p)
    V = np.array([10, 3])
    print("V =", V)
    klMax = 0.1
    print("klMax =", klMax)
    print("eta = ", reseqp(p, V, klMax))
    # print("eta 2 = ", reseqp2(p, V, klMax))
    print("Uq = ", maxEV(p, V, klMax))

    print("\np =", p)
    p = np.array([0.11794872, 0.27948718, 0.31538462, 0.14102564, 0.0974359, 0.03076923, 0.00769231, 0.01025641, 0.])
    print("V =", V)
    V = np.array([0, 1, 2, 3, 4, 5, 6, 7, 10])
    klMax = 0.0168913409484
    print("klMax =", klMax)
    print("eta = ", reseqp(p, V, klMax))
    # print("eta 2 = ", reseqp2(p, V, klMax))
    print("Uq = ", maxEV(p, V, klMax))

    x = 2
    print("\nx =", x)
    d = 2.51
    print("d =", d)
    print("klucbExp(x, d) = ", klucbExp(x, d))

    ub = x / (1 + 2. / 3 * d - sqrt(4. / 9 * d * d + 2 * d))
    print("Upper bound = ", ub)
    print("Stupid upperbound = ", x * exp(d + 1))

    print("\nDone for tests of 'kullback.py' ...")