Source code for Environment.Result

# -*- coding: utf-8 -*-
""" Result.Result class to wrap the simulation results."""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.9"

import numpy as np


[docs]class Result(object):
    """ Result accumulators."""

    # , delta_t_save=1):
[docs]    def __init__(self, nbArms, horizon, indexes_bestarm=-1, means=None):
        """ Create ResultMultiPlayers."""
        # self._means = means  # Keep the means for ChangingAtEachRepMAB cases
        # self.delta_t_save = delta_t_save  #: Sample rate for saving.
        self.choices = np.zeros(horizon, dtype=int)  #: Store all the choices.
        self.rewards = np.zeros(horizon)  #: Store all the rewards, to compute the mean.
        self.pulls = np.zeros(nbArms, dtype=int)  #: Store the pulls.
        if means is not None:
            indexes_bestarm = np.nonzero(np.isclose(means, np.max(means)))[0]
        indexes_bestarm = np.asarray(indexes_bestarm)
        if np.size(indexes_bestarm) == 1:
            indexes_bestarm = np.asarray([indexes_bestarm])
        self.indexes_bestarm = [ indexes_bestarm for _ in range(horizon)]  #: Store also the position of the best arm, XXX in case of dynamically switching environment.
        self.running_time = -1  #: Store the running time of the experiment.
        self.memory_consumption = -1  #: Store the memory consumption of the experiment.
        self.number_of_cp_detections = 0  #: Store the number of change point detected during the experiment.

[docs]    def store(self, time, choice, reward):
        """ Store results."""
        self.choices[time] = choice
        self.rewards[time] = reward
        self.pulls[choice] += 1

[docs]    def change_in_arms(self, time, indexes_bestarm):
        """ Store the position of the best arm from this list of arm.

        - From that time t **and after**, the index of the best arm is stored as ``indexes_bestarm``.

        .. warning:: FIXME This is still experimental!
        """
        for t in range(time, len(self.indexes_bestarm)):
            self.indexes_bestarm[t] = indexes_bestarm