Source code for Policies.DoublingTrickWrapper

# -*- coding: utf-8 -*-
r""" A policy that acts as a wrapper on another policy `P`, assumed to be *horizon dependent* (has to known :math:`T`), by implementing a "doubling trick":

- starts to assume that :math:`T=T_0=1000`, and run the policy :math:`P(T_0)`, from :math:`t=1` to :math:`t=T_0`,
- if :math:`t > T_0`, then the "doubling trick" is performed, by either re-initializing or just changing the parameter `horizon` of the policy P, for instance with :math:`T_2 = 10 \times T_0`,
- and keep doing this until :math:`t = T`.

.. note::

   This is implemented in a very generic way, with simply a function `next_horizon(horizon)` that gives the next horizon to try when crossing the current guess.
   It can be a simple linear function (`next_horizon(horizon) = horizon + 100`), a geometric growth to have the "real" doubling trick (`next_horizon(horizon) = horizon * 10`), or even functions growing exponentially fast (`next_horizon(horizon) = horizon ** 1.1`, `next_horizon(horizon) = horizon ** 1.5`, `next_horizon(horizon) = horizon ** 2`).

.. note::

   My guess is that this "doubling trick" wrapping policy can only be efficient (for stochastic problems) if:

   - the underlying policy `P` is a very efficient horizon-dependent algorithm, e.g., the :class:`Policies.ApproximatedFHGittins`,
   - the growth function `next_horizon` is growing faster than any geometric rate, so that the number of refresh is :math:`o(\log T)` and not :math:`O(\log T)`.

.. seealso::

   Reference: [[What the Doubling Trick Can or Can't Do for Multi-Armed Bandits, Lilian Besson and Emilie Kaufmann, 2018]](https://hal.inria.fr/hal-01736357), to be presented soon.

.. warning::

   Interface: If `FULL_RESTART=False` (default), the underlying algorithm is recreated at every breakpoint,
   instead its attribute `horizon` or `_horizon` is updated. Be sure that this is enough to really
   change the internal value used by the policy. Some policy use T only once to compute others parameters,
   which should be updated as well. A manual implementation of the `__setattr__` method can help.
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.9"


import numpy as np
try:
    from .BaseWrapperPolicy import BaseWrapperPolicy
    from .UCBH import UCBH
except ImportError:
    from BaseWrapperPolicy import BaseWrapperPolicy
    from UCBH import UCBH
try:
    from .usenumba import jit  # Import numba.jit or a dummy jit(f)=f
except (ValueError, ImportError, SystemError):
    from usenumba import jit  # Import numba.jit or a dummy jit(f)=f


#: Default horizon-dependent policy
default_horizonDependent_policy = UCBH

#: Default constant to know what to do when restarting the underlying policy with a new horizon parameter.
#:
#: - `True` means that a new policy, initialized from scratch, will be created at every breakpoint.
#: - `False` means that the same policy object is used but just its attribute `horizon` is updated (default).
FULL_RESTART = True
FULL_RESTART = False



#: Default horizon, used for the first step.
DEFAULT_FIRST_HORIZON = 200


#: Default stepsize for the arithmetic horizon progression.
ARITHMETIC_STEP = 10 * DEFAULT_FIRST_HORIZON
ARITHMETIC_STEP = 1 * DEFAULT_FIRST_HORIZON


[docs]@jit def next_horizon__arithmetic(i, horizon): r""" The arithmetic horizon progression function: .. math:: T &\mapsto T + 100,\\ T_i &:= T_0 + 100 \times i. """ return horizon + ARITHMETIC_STEP
next_horizon__arithmetic.__latex_name__ = "arithm" next_horizon__arithmetic.__latex_name__ = r"$T_i = {} + {} \times i$".format(DEFAULT_FIRST_HORIZON, ARITHMETIC_STEP) #: Default multiplicative constant for the geometric horizon progression. GEOMETRIC_STEP = 2
[docs]@jit def next_horizon__geometric(i, horizon): r""" The geometric horizon progression function: .. math:: T &\mapsto T \times 2,\\ T_i &:= T_0 2^i. """ return horizon * GEOMETRIC_STEP
next_horizon__geometric.__latex_name__ = "geom" next_horizon__geometric.__latex_name__ = r"$T_i = {} \times {}^i$".format(DEFAULT_FIRST_HORIZON, GEOMETRIC_STEP) #: Default exponential constant for the exponential horizon progression. EXPONENTIAL_STEP = 1.5
[docs]@jit def next_horizon__exponential(i, horizon): r""" The exponential horizon progression function: .. math:: T &\mapsto \left\lfloor T^{1.5} \right\rfloor,\\ T_i &:= \left\lfloor T_0^{1.5^i} \right\rfloor. """ return int(np.floor(horizon ** EXPONENTIAL_STEP))
next_horizon__exponential.__latex_name__ = "exp" next_horizon__exponential.__latex_name__ = r"$T_i = {}^{}$".format(DEFAULT_FIRST_HORIZON, r"{%.3g^i}" % EXPONENTIAL_STEP) #: Default exponential constant for the slow exponential horizon progression. SLOW_EXPONENTIAL_STEP = 1.1
[docs]@jit def next_horizon__exponential_slow(i, horizon): r""" The exponential horizon progression function: .. math:: T &\mapsto \left\lfloor T^{1.1} \right\rfloor,\\ T_i &:= \left\lfloor T_0^{1.1^i} \right\rfloor. """ return int(np.floor(horizon ** SLOW_EXPONENTIAL_STEP))
next_horizon__exponential_slow.__latex_name__ = "slow exp" next_horizon__exponential_slow.__latex_name__ = r"$T_i = {}^{}$".format(DEFAULT_FIRST_HORIZON, r"{%.3g^i}" % SLOW_EXPONENTIAL_STEP) #: Default exponential constant for the fast exponential horizon progression. FAST_EXPONENTIAL_STEP = 2
[docs]@jit def next_horizon__exponential_fast(i, horizon): r""" The exponential horizon progression function: .. math:: T &\mapsto \lfloor T^{2} \rfloor,\\ T_i &:= \lfloor T_0^{2^i} \rfloor. """ return int(np.floor(horizon ** 2))
next_horizon__exponential_fast.__latex_name__ = "fast exp" next_horizon__exponential_fast.__latex_name__ = r"$T_i = {}^{}$".format(DEFAULT_FIRST_HORIZON, r"{%.3g^i}" % FAST_EXPONENTIAL_STEP) #: Default constant :math:`\alpha` for the generic exponential sequence. ALPHA = 2 #: Default constant :math:`\beta` for the generic exponential sequence. BETA = 2
[docs]def next_horizon__exponential_generic(i, horizon): r""" The generic exponential horizon progression function: .. math:: T_i := \left\lfloor \frac{T_0}{a} a^{b^i} \right\rfloor. """ return int((DEFAULT_FIRST_HORIZON / ALPHA) * ALPHA ** (BETA ** i))
# return int(ALPHA * np.floor(horizon ** BETA)) next_horizon__exponential_generic.__latex_name__ = r"exp $a={:.3g}$, $b={:.3g}$".format(ALPHA, BETA) next_horizon__exponential_generic.__latex_name__ = r"$T_i = ({}/{}) {}^{}$".format(DEFAULT_FIRST_HORIZON, ALPHA, ALPHA, r"{%.3g^i}" % BETA) #: Chose the default horizon growth function. # default_next_horizon = next_horizon__arithmetic # default_next_horizon = next_horizon__geometric # default_next_horizon = next_horizon__geometric # default_next_horizon = next_horizon__exponential_fast default_next_horizon = next_horizon__exponential_slow # --- Utility function
[docs]def breakpoints(next_horizon, first_horizon, horizon, debug=False): r""" Return the list of restart point (breakpoints), if starting from ``first_horizon`` to ``horizon`` with growth function ``next_horizon``. - Also return the gap between the last guess for horizon and the true horizon. This gap should not be too large. - Nicely print all the values if ``debug=True``. - First examples: >>> first_horizon = 1000 >>> horizon = 30000 >>> breakpoints(next_horizon__arithmetic, first_horizon, horizon) # doctest: +ELLIPSIS ([1000, 1200, 1400, ..., 29800, 30000], 0) >>> breakpoints(next_horizon__geometric, first_horizon, horizon) ([1000, 2000, 4000, 8000, 16000, 32000], 2000) >>> breakpoints(next_horizon__exponential, first_horizon, horizon) ([1000, 31622], 1622) >>> breakpoints(next_horizon__exponential_slow, first_horizon, horizon) ([1000, 1995, 4265, 9838, 24671, 67827], 37827) >>> breakpoints(next_horizon__exponential_fast, first_horizon, horizon) ([1000, 1000000], 970000) - Second examples: >>> first_horizon = 5000 >>> horizon = 1000000 >>> breakpoints(next_horizon__arithmetic, first_horizon, horizon) # doctest: +ELLIPSIS ([5000, 5200, ..., 999600, 999800, 1000000], 0) >>> breakpoints(next_horizon__geometric, first_horizon, horizon) ([5000, 10000, 20000, 40000, 80000, 160000, 320000, 640000, 1280000], 280000) >>> breakpoints(next_horizon__exponential, first_horizon, horizon) ([5000, 353553, 210223755], 209223755) >>> breakpoints(next_horizon__exponential_slow, first_horizon, horizon) ([5000, 11718, 29904, 83811, 260394, 906137, 3572014], 2572014) >>> breakpoints(next_horizon__exponential_fast, first_horizon, horizon) ([5000, 25000000], 24000000) - Third examples: >>> first_horizon = 10 >>> horizon = 1123456 >>> breakpoints(next_horizon__arithmetic, first_horizon, horizon) # doctest: +ELLIPSIS ([10, 210, 410, ..., 1123210, 1123410, 1123610], 154) >>> breakpoints(next_horizon__geometric, first_horizon, horizon) ([10, 20, 40, 80, 160, 320, 640, 1280, 2560, 5120, 10240, 20480, 40960, 81920, 163840, 327680, 655360, 1310720], 187264) >>> breakpoints(next_horizon__exponential, first_horizon, horizon) ([10, 31, 172, 2255, 107082, 35040856], 33917400) >>> breakpoints(next_horizon__exponential_slow, first_horizon, horizon) ([10, 12, 15, 19, 25, 34, 48, 70, 107, 170, 284, 499, 928, 1837, 3895, 8903, 22104, 60106, 180638, 606024, 2294768], 1171312) >>> breakpoints(next_horizon__exponential_fast, first_horizon, horizon) ([10, 100, 10000, 100000000], 98876544) """ i = 0 t = max(first_horizon, 2) times = [t] if debug: print("\n\nFor the growth function {}, named '{}', first guess of the horizon = {} and true horizon = {} ...\n ==> The times will be:".format(next_horizon, getattr(next_horizon, '__latex_name__', '?'), first_horizon, horizon)) while t < horizon: t = next_horizon(i, t) i += 1 times.append(t) if debug: print(" The {}th breakpoint is {} ...".format(i, t)) # DEBUG assert horizon <= t, "Error: the last guess for horizon = {} was found smaller than the true horizon = {}...".format(t, horizon) # DEBUG gap = t - horizon if debug: print("This last guess for horizon = {} gives a gap = {} against the true horizon {}. Relative difference = {:.3%}...".format(t, gap, horizon, gap / float(horizon))) # DEBUG return times, gap
# --- Experimental code to plot some doubling sequences and # check numerically some inequalities : # like controlling a sum Sigma_i=0^n u_i by a constant times to last term u_n # and controlling the last term u_{L_T} as a function of T. #: The constant c in front of the function f. constant_c_for_the_functions_f = 1.0 constant_c_for_the_functions_f = 0.1 constant_c_for_the_functions_f = 0.5
[docs]def function_f__for_geometric_sequences(i, c=constant_c_for_the_functions_f): r""" For the *geometric* doubling sequences, :math:`f(i) = c \times \log(i)`.""" if i <= 0: return 0.0 return c * np.log(i)
[docs]def function_f__for_exponential_sequences(i, c=constant_c_for_the_functions_f): r""" For the *exponential* doubling sequences, :math:`f(i) = c \times i`.""" return c * i
[docs]def function_f__for_generic_sequences(i, c=constant_c_for_the_functions_f, d=0.5, e=0.0): r""" For a certain *generic* family of doubling sequences, :math:`f(i) = c \times i^{d} \times (\log(i))^{e}`. - ``d, e = 0, 1`` gives :func:`function_f__for_geometric_sequences`, - ``d, e = 1, 0`` gives :func:`function_f__for_geometric_sequences`, - ``d, e = 0.5, 0`` gives an intermediate sequence, growing faster than any geometric sequence and slower than any exponential sequence, - any other combination has not been studied yet. .. warning:: ``d`` should most probably be smaller than 1. """ i = float(i) if i <= 0: return 0.0 if e == 0: assert d > 0, "Error: invalid value of d = {} for function_f__for_generic_sequences.".format(d) # DEBUG return c * (i ** d) if d == 0: assert e > 0, "Error: invalid value of e = {} for function_f__for_generic_sequences.".format(e) # DEBUG return c * ((np.log(i)) ** e) return c * (i ** d) * ((np.log(i)) ** e)
[docs]def function_f__for_intermediate_sequences(i): return function_f__for_generic_sequences(i, c=constant_c_for_the_functions_f, d=0.5, e=0.0)
[docs]def function_f__for_intermediate2_sequences(i): return function_f__for_generic_sequences(i, c=constant_c_for_the_functions_f, d=0.3333, e=0.0)
[docs]def function_f__for_intermediate3_sequences(i): return function_f__for_generic_sequences(i, c=constant_c_for_the_functions_f, d=0.6667, e=0.0)
[docs]def function_f__for_intermediate4_sequences(i): return function_f__for_generic_sequences(i, c=constant_c_for_the_functions_f, d=0.5, e=0.5)
[docs]def function_f__for_intermediate5_sequences(i): return function_f__for_generic_sequences(i, c=constant_c_for_the_functions_f, d=1, e=-1)
#: Value of the parameter :math:`\alpha` for the :func:`Ti_from_f` function. alpha_for_Ti = 0.1 alpha_for_Ti = 1.0 alpha_for_Ti = 0.5
[docs]def Ti_from_f(f, alpha=alpha_for_Ti, *args, **kwargs): r""" For any non-negative and increasing function :math:`f: i \mapsto f(i)`, the corresponding sequence is defined by: .. math:: \forall i\in\mathbb{N},\; T_i := \lfloor \exp(\alpha \times \exp(f(i))) \rfloor. .. warning:: :math:`f(i)` can need other parameters, see the examples above. They can be given as ``*args`` or ``**kwargs`` to :func:`Ti_from_f`. .. warning:: it should be computed otherwise, I should give :math:`i \mapsto \exp(f(i))` instead of :math:`f: i \mapsto f(i)`. I need to try as much as possible to reduce the risk of overflow errors! """ # WARNING don't forget the floor! def Ti(i): this_Ti = np.floor(np.exp(alpha * np.exp(f(float(i), *args, **kwargs)))) if not (np.isinf(this_Ti) or np.isnan(this_Ti)): this_Ti = int(this_Ti) # print(" For f = {}, i = {} gives Ti = {}".format(f, i, this_Ti)) # DEBUG return this_Ti return Ti
[docs]def Ti_geometric(i, horizon, alpha=alpha_for_Ti, first_horizon=DEFAULT_FIRST_HORIZON, *args, **kwargs): """ Sequence :math:`T_i` generated from the function :math:`f` = :func:`function_f__for_geometric_sequences`.""" f = function_f__for_geometric_sequences this_Ti = first_horizon + np.floor(np.exp(alpha * np.exp(f(float(i), *args, **kwargs)))) if not (np.isinf(this_Ti) or np.isnan(this_Ti)): this_Ti = int(this_Ti) return this_Ti
Ti_geometric.__latex_name__ = r"$f(i)=\log(i)$"
[docs]def Ti_exponential(i, horizon, alpha=alpha_for_Ti, first_horizon=DEFAULT_FIRST_HORIZON, *args, **kwargs): """ Sequence :math:`T_i` generated from the function :math:`f` = :func:`function_f__for_exponential_sequences`.""" f = function_f__for_exponential_sequences this_Ti = first_horizon + np.floor(np.exp(alpha * np.exp(f(float(i), *args, **kwargs)))) if not (np.isinf(this_Ti) or np.isnan(this_Ti)): this_Ti = int(this_Ti) return this_Ti
Ti_exponential.__latex_name__ = r"$f(i)=i$"
[docs]def Ti_intermediate_sqrti(i, horizon, alpha=alpha_for_Ti, first_horizon=DEFAULT_FIRST_HORIZON, *args, **kwargs): """ Sequence :math:`T_i` generated from the function :math:`f` = :func:`function_f__for_intermediate_sequences`.""" f = function_f__for_intermediate_sequences this_Ti = first_horizon + np.floor(np.exp(alpha * np.exp(f(float(i), *args, **kwargs)))) if not (np.isinf(this_Ti) or np.isnan(this_Ti)): this_Ti = int(this_Ti) return this_Ti
Ti_intermediate_sqrti.__latex_name__ = r"$f(i)=\sqrt{i}$"
[docs]def Ti_intermediate_i13(i, horizon, alpha=alpha_for_Ti, first_horizon=DEFAULT_FIRST_HORIZON, *args, **kwargs): """ Sequence :math:`T_i` generated from the function :math:`f` = :func:`function_f__for_intermediate2_sequences`.""" f = function_f__for_intermediate2_sequences this_Ti = first_horizon + np.floor(np.exp(alpha * np.exp(f(float(i), *args, **kwargs)))) if not (np.isinf(this_Ti) or np.isnan(this_Ti)): this_Ti = int(this_Ti) return this_Ti
Ti_intermediate_i13.__latex_name__ = r"$f(i)=i^{1/3}$"
[docs]def Ti_intermediate_i23(i, horizon, alpha=alpha_for_Ti, first_horizon=DEFAULT_FIRST_HORIZON, *args, **kwargs): """ Sequence :math:`T_i` generated from the function :math:`f` = :func:`function_f__for_intermediate3_sequences`.""" f = function_f__for_intermediate3_sequences this_Ti = first_horizon + np.floor(np.exp(alpha * np.exp(f(float(i), *args, **kwargs)))) if not (np.isinf(this_Ti) or np.isnan(this_Ti)): this_Ti = int(this_Ti) return this_Ti
Ti_intermediate_i23.__latex_name__ = r"$f(i)=i^{2/3}$"
[docs]def Ti_intermediate_i12_logi12(i, horizon, alpha=alpha_for_Ti, first_horizon=DEFAULT_FIRST_HORIZON, *args, **kwargs): """ Sequence :math:`T_i` generated from the function :math:`f` = :func:`function_f__for_intermediate4_sequences`.""" f = function_f__for_intermediate4_sequences this_Ti = first_horizon + np.floor(np.exp(alpha * np.exp(f(float(i), *args, **kwargs)))) if not (np.isinf(this_Ti) or np.isnan(this_Ti)): this_Ti = int(this_Ti) return this_Ti
Ti_intermediate_i12_logi12.__latex_name__ = r"$f(i)=\sqrt{i \log(i)}$"
[docs]def Ti_intermediate_i_by_logi(i, horizon, alpha=alpha_for_Ti, first_horizon=DEFAULT_FIRST_HORIZON, *args, **kwargs): """ Sequence :math:`T_i` generated from the function :math:`f` = :func:`function_f__for_intermediate5_sequences`.""" f = function_f__for_intermediate5_sequences this_Ti = first_horizon + np.floor(np.exp(alpha * np.exp(f(float(i + 1), *args, **kwargs)))) if not (np.isinf(this_Ti) or np.isnan(this_Ti)): this_Ti = int(this_Ti) return this_Ti
Ti_intermediate_i_by_logi.__latex_name__ = r"$f(i)=i / \log(i)$"
[docs]def last_term_operator_LT(Ti, max_i=10000): r""" For a certain function representing a doubling sequence, :math:`T: i \mapsto T_i`, this :func:`last_term_operator_LT` function returns the function :math:`L: T \mapsto L_T`, defined as: .. math:: \forall T\in\mathbb{N},\; L_T := \min\{ i \in\mathbb{N},\; T \leq T_i \}. :math:`L_T` is the only integer which satisfies :math:`T_{L_T - 1} < T \leq T_{L_T}`. """ def LT(T, max_i=max_i): i = 0 while Ti(i) < T: i += 1 if i >= max_i: raise ValueError("LT(T={T}) was unable to find a i <= {max_i} such that T_i >= T.".format(T, max_i)) # DEBUG assert Ti(i - 1) < T <= Ti(i), "Error: i = {} was computed as LT for T = {} and Ti = {} but does not satisfy T_(i-1) < T <= T(i)".format(i, T, Ti) # DEBUG # print(" For LT: i = {} was computed as LT for T = {} and Ti = {} and satisfies T(i-1) = {} < T <= T(i) = {}".format(i, T, Ti, Ti(i-1), Ti(i))) # DEBUG return i return LT
import matplotlib.pyplot as plt import seaborn as sns
[docs]def plot_doubling_sequences( i_min=1, i_max=30, list_of_f=( function_f__for_geometric_sequences, function_f__for_intermediate_sequences, function_f__for_intermediate2_sequences, function_f__for_intermediate3_sequences, function_f__for_intermediate4_sequences, function_f__for_exponential_sequences, ), label_of_f=( "Geometric doubling (d=0, e=1)", "Intermediate doubling (d=1/2, e=0)", "Intermediate doubling (d=1/3, e=0)", "Intermediate doubling (d=2/3, e=0)", "Intermediate doubling (d=1/2, e=1/2)", "Exponential doubling (d=1, e=0)", ), *args, **kwargs ): r""" Display a plot to illustrate the values of the :math:`T_i` as a function of :math:`i` for some i. - Can accept many functions f (and labels). """ # Make unique markers nb = len(list_of_f) allmarkers = ['o', 'D', 'v', 'p', '<', 's', '^', '*', 'h', '>'] longlist = allmarkers * (1 + int(nb / float(len(allmarkers)))) # Cycle the good number of time markers = longlist[:nb] # Truncate # Make unique colors colors = sns.hls_palette(nb + 1)[:nb] fig = plt.figure() # plt.hold(True) i_s = np.arange(i_min, i_max) # now for each function f for num_f, (f, la) in enumerate(zip(list_of_f, label_of_f)): print("\n\nThe {}th function is referred to as {} and is {}".format(num_f, la, f)) # DEBUG Ti = Ti_from_f(f) values_of_Ti = np.array([ Ti(i) for i in i_s ]) plt.plot(i_s, values_of_Ti, label=la, lw=3, ms=3, color=colors[num_f], marker=markers[num_f]) plt.legend() plt.xlabel(r"Value of the time horizon $i = {},...,{}$".format(i_min, i_max)) plt.title(r"Comparison of the values of $T_i$") plt.show() return fig
[docs]def plot_quality_first_upper_bound( Tmin=10, Tmax=int(1e8), nbTs=100, gamma=0.0, delta=1.0, # XXX bound in RT <= log(T) # gamma=0.5, delta=0.0, # XXX bound in RT <= sqrt(T) # gamma=0.5, delta=0.5, # XXX bound in RT <= sqrt(T * log(T)) # gamma=0.66667, delta=1.0, # XXX another weird bound in RT <= T^2/3 * log(T) list_of_f=( function_f__for_geometric_sequences, function_f__for_intermediate_sequences, function_f__for_intermediate2_sequences, function_f__for_intermediate3_sequences, function_f__for_intermediate4_sequences, function_f__for_exponential_sequences, ), label_of_f=( "Geometric doubling (d=0, e=1)", "Intermediate doubling (d=1/2, e=0)", "Intermediate doubling (d=1/3, e=0)", "Intermediate doubling (d=2/3, e=0)", "Intermediate doubling (d=1/2, e=1/2)", "Exponential doubling (d=1, e=0)", ), show_Ti_m_Tim1=True, # show_Ti_m_Tim1=False, # DEBUG *args, **kwargs ): r""" Display a plot to compare numerically between the following sum :math:`S` and the upper-bound we hope to have, :math:`T^{\gamma} (\log T)^{\delta}`, as a function of :math:`T` for some values between :math:`T_{\min}` and :math:`T_{\max}`: .. math:: S := \sum_{i=0}^{L_T} (T_i - T_{i-1})^{\gamma} (\log (T_i - T_{i-1}))^{\delta}. - Can accept many functions f (and labels). - Can use :math:`T_i` instead of :math:`T_i - T_{i-1}` if ``show_Ti_m_Tim1=False`` (default is to use the smaller possible bound, with difference of sequence lengths, :math:`T_i - T_{i-1}`). .. warning:: This is still ON GOING WORK. """ # Make unique markers nb = len(list_of_f) allmarkers = ['o', 'D', 'v', 'p', '<', 's', '^', '*', 'h', '>'] longlist = allmarkers * (1 + int(nb / float(len(allmarkers)))) # Cycle the good number of time markers = longlist[:nb] # Truncate # Make unique colors colors = sns.hls_palette(nb + 1)[:nb] fig = plt.figure() # plt.hold(True) Ts = np.floor(np.linspace(Tmin, Tmax, num=nbTs)) the_bound_we_want = (Ts ** gamma) * (np.log(Ts) ** delta) # plt.plot(Ts, the_bound_we_want, label=r"$T^{\gamma} (\log T)^{\delta}$", lw=3, ms=3, color=colors[0], marker=markers[0]) # compute the sequence lengths to use, either T_i or T_i - T_{i-1} Ts_for_f = np.copy(Ts) if show_Ti_m_Tim1: Ts_for_f[1:] = np.diff(Ts) # now for each function f for num_f, (f, la) in enumerate(zip(list_of_f, label_of_f)): print("\n\nThe {}th function is referred to as {} and is {}".format(num_f, la, f)) # DEBUG Ti = Ti_from_f(f) LT = last_term_operator_LT(Ti) the_sum_we_have = np.zeros_like(Ts_for_f) for j, (Tj, dTj) in enumerate(zip(Ts, Ts_for_f)): LTj = LT(Tj) the_sum_we_have[j] = sum( (dTj ** gamma) * (np.log(dTj) ** delta) for i in range(0, LTj + 1) ) print("For j = {}, Tj = {}, dTj = {}, gives LTj = {}, and the value of the sum from i=0 to LTj is = {}.".format(j, Tj, dTj, LTj, the_sum_we_have[j])) # DEBUG print("the_sum_we_have =", the_sum_we_have) # DEBUG plt.plot(Ts, the_sum_we_have / the_bound_we_want, label=la, lw=3, ms=3, color=colors[num_f], marker=markers[num_f]) plt.legend() plt.xlabel(r"Value of the time horizon $T = {},...,{}$".format(Tmin, Tmax)) str_of_Tj_or_dTj = "T_i - T_{i-1}" if show_Ti_m_Tim1 else "T_i" plt.title(r"Ratio of the sum $\sum_{i=0}^{L_T} (%s)^{\gamma} (\log(%s))^{\delta}$ and the upper-bound $T^{\gamma} \log(T)^{\delta}$, for $\gamma=%.3g$, $\delta=%.3g$." % (str_of_Tj_or_dTj, str_of_Tj_or_dTj, gamma, delta)) # DEBUG plt.show() return fig
# --- The interesting class #: If the sequence Ti does not grow enough, artificially increase i until T_inext > T_i MAX_NB_OF_TRIALS = 500
[docs]class DoublingTrickWrapper(BaseWrapperPolicy): r""" A policy that acts as a wrapper on another policy `P`, assumed to be *horizon dependent* (has to known :math:`T`), by implementing a "doubling trick". - Reference: [[What the Doubling Trick Can or Can't Do for Multi-Armed Bandits, Lilian Besson and Emilie Kaufmann, 2018]](https://hal.inria.fr/hal-01736357), to be presented soon. """
[docs] def __init__(self, nbArms, full_restart=FULL_RESTART, policy=default_horizonDependent_policy, next_horizon=default_next_horizon, first_horizon=DEFAULT_FIRST_HORIZON, *args, **kwargs): super(DoublingTrickWrapper, self).__init__(nbArms, policy=policy, *args, **kwargs) self.full_restart = full_restart #: Constant to know how to refresh the underlying policy. # --- Horizon self._i = 0 self._next_horizon = next_horizon # Function for the growing horizon self.next_horizon_name = getattr(next_horizon, '__latex_name__', '?') #: Pretty string of the name of this growing function self._first_horizon = max(2, first_horizon) # First guess for the horizon self.horizon = max(2, first_horizon) #: Last guess for the horizon # XXX Force it, just for pretty printing... self.startGame()
# --- pretty printing
[docs] def __str__(self): # remove the T0 part from string representation of the policy str_policy = str(self.policy) str_policy = str_policy.replace(r"($T={}$)".format(self._first_horizon), "") str_policy = str_policy.replace(r"$T={}$, ".format(self._first_horizon), "") return r"{}({})[{}]".format("DT" if self.full_restart else "DTnr", self.next_horizon_name, str_policy)
# --- Start game by creating new underlying policy
[docs] def startGame(self): """ Initialize the policy for a new game.""" super(BaseWrapperPolicy, self).startGame() # super(DoublingTrickWrapper, self).startGame() # WARNING no self._i = 0 # reinitialize this self.horizon = self._first_horizon #: Last guess for the horizon try: self.policy = self._policy(self.nbArms, horizon=self.horizon, lower=self.lower, amplitude=self.amplitude, *self._args, **self._kwargs) except Exception as e: print("WARNING: Received exception {} when trying to create the underlying policy... maybe the 'horizon={}' keyword argument was not understood correctly? Retrying without it...".format(e, self.horizon)) # DEBUG self.policy = self._policy(self.nbArms, lower=self.lower, amplitude=self.amplitude, *self._args, **self._kwargs) # now also start game for the underlying policy self.policy.startGame()
# --- Pass the call to the subpolicy
[docs] def getReward(self, arm, reward): """ Pass the reward, as usual, update t and sometimes restart the underlying policy.""" # print(" - At time t = {}, got a reward = {} from arm {} ...".format(self.t, arm, reward)) # DEBUG # super(DoublingTrickWrapper, self).getReward(arm, reward) self.t += 1 self.policy.getReward(arm, reward) # Maybe we have to update the horizon? if self.t > self.horizon: self._i += 1 new_horizon = self._next_horizon(self._i, self.horizon) # XXX <!-- small hack if the sequence is not growing fast enough nb_of_trials = 1 while nb_of_trials < MAX_NB_OF_TRIALS and new_horizon <= self.horizon: self._i += 1 nb_of_trials += 1 new_horizon = self._next_horizon(self._i, self.horizon) # XXX end of small hack --> assert new_horizon > self.horizon, "Error: the new_horizon = {} is not > the current horizon = {} ...".format(new_horizon, self.horizon) # DEBUG # print(" - At time t = {}, a DoublingTrickWrapper class was running with current horizon T_i = {} and decided to use {} as a new horizon...".format(self.t, self.horizon, new_horizon)) # DEBUG self.horizon = new_horizon # now we have to update or restart the underlying policy if self.full_restart: try: self.policy = self._policy(self.nbArms, horizon=self.horizon, lower=self.lower, amplitude=self.amplitude, *self._args, **self._kwargs) except Exception as e: # print("Received exception {} when trying to create the underlying policy... maybe the 'horizon={}' keyword argument was not understood correctly? Retrying without it...".format(e, self.horizon)) # DEBUG self.policy = self._policy(self.nbArms, lower=self.lower, amplitude=self.amplitude, *self._args, **self._kwargs) # now also start game for the underlying policy self.policy.startGame() # print(" ==> Fully restarting the underlying policy by creating a new object... Now it is = {} ...".format(self.policy)) # DEBUG else: if hasattr(self.policy, 'horizon'): try: self.policy.horizon = self.horizon except AttributeError: pass
# print("Warning: unable to update the parameter 'horizon' of the underlying policy {}... Trying '_horizon' ...".format(self.policy)) # DEBUG # print(" ==> Just updating the horizon parameter of the underlying policy... Now it is = {} ...".format(self.policy)) # DEBUG # else: # print(" ==> Nothing to do, as the underlying policy DOES NOT have a 'horizon' or '_horizon' parameter that could have been updated... Maybe you are not using a good policy? I suggest UCBH or ApproximatedFHGittins.") # DEBUG # # --- Debugging if __name__ == "__main__": import sys if "plot" in sys.argv[1:]: plt.ion() # plot_doubling_sequences() for gamma, delta in [ (0.0, 1.0), # XXX bound in RT <= log(T) (0.5, 0.0), # XXX bound in RT <= sqrt(T) (0.5, 0.5), # XXX bound in RT <= sqrt(T * log(T)) (0.66667, 1.0), # XXX another weird bound in RT <= T^2/3 * log(T) ]: plot_quality_first_upper_bound(gamma=gamma, delta=delta, show_Ti_m_Tim1=True) plot_quality_first_upper_bound(gamma=gamma, delta=delta, show_Ti_m_Tim1=False) sys.exit(0) # Code for debugging purposes. from doctest import testmod print("\nTesting automatically all the docstring written in each functions of this module :") testmod(verbose=True)