Source code for Environment.Evaluator

# -*- coding: utf-8 -*-
""" Evaluator class to wrap and run the simulations.
Lots of plotting methods, to have various visualizations.
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.9"

# Generic imports
import sys
import pickle
USE_PICKLE = False   #: Should we save the figure objects to a .pickle file at the end of the simulation?
import random
import time
from copy import deepcopy
# Scientific imports
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import inspect
[docs]def _nbOfArgs(function): try: return len(inspect.signature(functions).parameters) except NameError: return len(inspect.getargspec(function).args)
try: # Local imports, libraries from .usejoblib import USE_JOBLIB, Parallel, delayed from .usetqdm import USE_TQDM, tqdm # Local imports, tools and config from .plotsettings import BBOX_INCHES, signature, maximizeWindow, palette, makemarkers, add_percent_formatter, legend, show_and_save, nrows_ncols, violin_or_box_plot, adjust_xticks_subplots, table_to_latex from .sortedDistance import weightedDistance, manhattan, kendalltau, spearmanr, gestalt, meanDistance, sortedDistance # Local imports, objects and functions from .MAB import MAB, MarkovianMAB, ChangingAtEachRepMAB, NonStationaryMAB, PieceWiseStationaryMAB, IncreasingMAB from .Result import Result from .memory_consumption import getCurrentMemory, sizeof_fmt except ImportError: # Local imports, libraries from usejoblib import USE_JOBLIB, Parallel, delayed from usetqdm import USE_TQDM, tqdm # Local imports, tools and config from plotsettings import BBOX_INCHES, signature, maximizeWindow, palette, makemarkers, add_percent_formatter, legend, show_and_save, nrows_ncols, violin_or_box_plot, adjust_xticks_subplots, table_to_latex from sortedDistance import weightedDistance, manhattan, kendalltau, spearmanr, gestalt, meanDistance, sortedDistance # Local imports, objects and functions from MAB import MAB, MarkovianMAB, ChangingAtEachRepMAB, NonStationaryMAB, PieceWiseStationaryMAB, IncreasingMAB from Result import Result from memory_consumption import getCurrentMemory, sizeof_fmt REPETITIONS = 1 #: Default nb of repetitions DELTA_T_PLOT = 50 #: Default sampling rate for plotting plot_lowerbound = True #: Default is to plot the lower-bound USE_BOX_PLOT = False #: True to use boxplot, False to use violinplot. USE_BOX_PLOT = True #: True to use boxplot, False to use violinplot. # Parameters for the random events random_shuffle = False #: Use basic random events of shuffling the arms? random_invert = False #: Use basic random events of inverting the arms? nb_break_points = 0 #: Default nb of random events # Flag for experimental aspects STORE_ALL_REWARDS = True #: Store all rewards? STORE_ALL_REWARDS = False #: Store all rewards? STORE_REWARDS_SQUARED = True #: Store rewards squared? STORE_REWARDS_SQUARED = False #: Store rewards squared? MORE_ACCURATE = False #: Use the count of selections instead of rewards for a more accurate mean/var reward measure. MORE_ACCURATE = True #: Use the count of selections instead of rewards for a more accurate mean/var reward measure. FINAL_RANKS_ON_AVERAGE = True #: Final ranks are printed based on average on last 1% rewards and not only the last rewards USE_JOBLIB_FOR_POLICIES = False #: Don't use joblib to parallelize the simulations on various policies (we parallelize the random Monte Carlo repetitions)
[docs]class Evaluator(object): """ Evaluator class to run the simulations."""
[docs] def __init__(self, configuration, finalRanksOnAverage=FINAL_RANKS_ON_AVERAGE, averageOn=5e-3, useJoblibForPolicies=USE_JOBLIB_FOR_POLICIES, moreAccurate=MORE_ACCURATE): self.cfg = configuration #: Configuration dictionnary # Attributes self.nbPolicies = len(self.cfg['policies']) #: Number of policies print("Number of policies in this comparison:", self.nbPolicies) self.horizon = self.cfg['horizon'] #: Horizon (number of time steps) print("Time horizon:", self.horizon) self.repetitions = self.cfg.get('repetitions', REPETITIONS) #: Number of repetitions print("Number of repetitions:", self.repetitions) self.delta_t_plot = 1 if self.horizon <= 10000 else self.cfg.get('delta_t_plot', DELTA_T_PLOT) #: Sampling rate for plotting print("Sampling rate for plotting, delta_t_plot:", self.delta_t_plot) print("Number of jobs for parallelization:", self.cfg['n_jobs']) # Parameters for the random events self.random_shuffle = self.cfg.get('random_shuffle', random_shuffle) #: Random shuffling of arms? self.random_invert = self.cfg.get('random_invert', random_invert) #: Random inversion of arms? self.nb_break_points = self.cfg.get('nb_break_points', nb_break_points) #: How many random events? self.plot_lowerbound = self.cfg.get('plot_lowerbound', plot_lowerbound) #: Should we plot the lower-bound? self.signature = signature # Flags self.moreAccurate = moreAccurate #: Use the count of selections instead of rewards for a more accurate mean/var reward measure. self.finalRanksOnAverage = finalRanksOnAverage #: Final display of ranks are done on average rewards? self.averageOn = averageOn #: How many last steps for final rank average rewards self.useJoblibForPolicies = useJoblibForPolicies #: Use joblib to parallelize for loop on policies (useless) self.useJoblib = USE_JOBLIB and self.cfg['n_jobs'] != 1 #: Use joblib to parallelize for loop on repetitions (useful) self.cache_rewards = self.cfg.get('cache_rewards', False) #: Should we cache and precompute rewards self.environment_bayesian = self.cfg.get('environment_bayesian', False) #: Is the environment Bayesian? self.showplot = self.cfg.get('showplot', True) #: Show the plot (interactive display or not) self.use_box_plot = USE_BOX_PLOT or (self.repetitions == 1) #: To use box plot (or violin plot if False). Force to use boxplot if repetitions=1. self.change_labels = self.cfg.get('change_labels', {}) #: Possibly empty dictionary to map 'policyId' to new labels (overwrite their name). self.append_labels = self.cfg.get('append_labels', {}) #: Possibly empty dictionary to map 'policyId' to new labels (by appending the result from 'append_labels'). # Internal object memory self.envs = [] #: List of environments self.policies = [] #: List of policies self.__initEnvironments__() # Update signature for non stationary problems if self.nb_break_points > 1: changePoints = getattr(self.envs[0], 'changePoints', []) changePoints = sorted([tau for tau in changePoints if tau > 0]) if self.random_shuffle: self.signature = (r", $\Upsilon_T={}$ random arms shuffling".format(len(changePoints))) + self.signature elif self.random_invert: self.signature = (r", $\Upsilon_T={}$ arms inversion".format(len(changePoints))) + self.signature # else: # # self.signature = (r", $\Upsilon_T={}$ change point{}{}".format(len(changePoints), "s" if len(changePoints) > 1 else "", " ${}$".format(list(changePoints)) if len(changePoints) > 0 else "") + self.signature) # self.signature = (r", $\Upsilon_T={}$".format(len(changePoints)) + self.signature) # Internal vectorial memory self.rewards = np.zeros((self.nbPolicies, len(self.envs), self.horizon)) #: For each env, history of rewards, ie accumulated rewards self.lastCumRewards = np.zeros((self.nbPolicies, len(self.envs), self.repetitions)) #: For each env, last accumulated rewards, to compute variance and histogram of whole regret R_T self.minCumRewards = np.full((self.nbPolicies, len(self.envs), self.horizon), +np.inf) #: For each env, history of minimum of rewards, to compute amplitude (+- STD) self.maxCumRewards = np.full((self.nbPolicies, len(self.envs), self.horizon), -np.inf) #: For each env, history of maximum of rewards, to compute amplitude (+- STD) if STORE_REWARDS_SQUARED: self.rewardsSquared = np.zeros((self.nbPolicies, len(self.envs), self.horizon)) #: For each env, history of rewards squared if STORE_ALL_REWARDS: self.allRewards = np.zeros((self.nbPolicies, len(self.envs), self.horizon, self.repetitions)) #: For each env, full history of rewards self.bestArmPulls = dict() #: For each env, keep the history of best arm pulls self.pulls = dict() #: For each env, keep cumulative counts of all arm pulls if self.moreAccurate: self.allPulls = dict() #: For each env, keep cumulative counts of all arm pulls self.lastPulls = dict() #: For each env, keep cumulative counts of all arm pulls self.runningTimes = dict() #: For each env, keep the history of running times self.memoryConsumption = dict() #: For each env, keep the history of running times self.numberOfCPDetections = dict() #: For each env, store the number of change-point detections by each algorithms, to print it's average at the end (to check if a certain Change-Point detector algorithm detects too few or too many changes). # XXX: WARNING no memorized vectors should have dimension duration * repetitions, that explodes the RAM consumption! for envId in range(len(self.envs)): self.bestArmPulls[envId] = np.zeros((self.nbPolicies, self.horizon), dtype=np.int32) self.pulls[envId] = np.zeros((self.nbPolicies, self.envs[envId].nbArms), dtype=np.int32) if self.moreAccurate: self.allPulls[envId] = np.zeros((self.nbPolicies, self.envs[envId].nbArms, self.horizon), dtype=np.int32) self.lastPulls[envId] = np.zeros((self.nbPolicies, self.envs[envId].nbArms, self.repetitions), dtype=np.int32) self.runningTimes[envId] = np.zeros((self.nbPolicies, self.repetitions)) self.memoryConsumption[envId] = np.zeros((self.nbPolicies, self.repetitions)) self.numberOfCPDetections[envId] = np.zeros((self.nbPolicies, self.repetitions), dtype=np.int32) print("Number of environments to try:", len(self.envs)) # To speed up plotting self._times = np.arange(1, 1 + self.horizon)
# --- Init methods
[docs] def __initEnvironments__(self): """ Create environments.""" for configuration_arms in self.cfg['environment']: print("Using this dictionary to create a new environment:\n", configuration_arms) # DEBUG new_mab_problem = None if isinstance(configuration_arms, dict) \ and "arm_type" in configuration_arms \ and "params" in configuration_arms: # PieceWiseStationaryMAB or NonStationaryMAB or ChangingAtEachRepMAB if "listOfMeans" in configuration_arms["params"] \ and "changePoints" in configuration_arms["params"]: new_mab_problem = PieceWiseStationaryMAB(configuration_arms) elif "newMeans" in configuration_arms["params"] \ and "args" in configuration_arms["params"]: if "changePoints" in configuration_arms["params"]: new_mab_problem = NonStationaryMAB(configuration_arms) else: new_mab_problem = ChangingAtEachRepMAB(configuration_arms) # MarkovianMAB elif configuration_arms["arm_type"] == "Markovian" \ and "transitions" in configuration_arms["params"]: new_mab_problem = MarkovianMAB(configuration_arms) # IncreasingMAB elif "change_lower_amplitude" in configuration_arms: new_mab_problem = IncreasingMAB(configuration_arms) if new_mab_problem is None: new_mab_problem = MAB(configuration_arms) self.envs.append(new_mab_problem)
[docs] def __initPolicies__(self, env): """ Create or initialize policies.""" for policyId, policy in enumerate(self.cfg['policies']): print("- Adding policy #{} = {} ...".format(policyId + 1, policy)) # DEBUG if isinstance(policy, dict): print(" Creating this policy from a dictionnary 'self.cfg['policies'][{}]' = {} ...".format(policyId, policy)) # DEBUG self.policies.append(policy['archtype'](env.nbArms, **policy['params'])) else: print(" Using this already created policy 'self.cfg['policies'][{}]' = {} ...".format(policyId, policy)) # DEBUG self.policies.append(policy) for policyId in range(self.nbPolicies): self.policies[policyId].__cachedstr__ = str(self.policies[policyId]) if policyId in self.append_labels: self.policies[policyId].__cachedstr__ += self.append_labels[policyId] if policyId in self.change_labels: self.policies[policyId].__cachedstr__ = self.change_labels[policyId]
# --- Start computation
[docs] def compute_cache_rewards(self, arms): """ Compute only once the rewards, then launch the experiments with the same matrix (r_{k,t}).""" rewards = np.zeros((len(arms), self.repetitions, self.horizon)) print("\n===> Pre-computing the rewards ... Of shape {} ...\n In order for all simulated algorithms to face the same random rewards (robust comparison of A1,..,An vs Aggr(A1,..,An)) ...\n".format(np.shape(rewards))) # DEBUG for armId, arm in tqdm(enumerate(arms), desc="Arms"): if hasattr(arm, 'draw_nparray'): # XXX Use this method to speed up computation rewards[armId] = arm.draw_nparray((self.repetitions, self.horizon)) else: # Slower for repeatId in tqdm(range(self.repetitions), desc="Repetitions"): for t in tqdm(range(self.horizon), desc="Time steps"): rewards[armId, repeatId, t] = arm.draw(t) return rewards
[docs] def startAllEnv(self): """Simulate all envs.""" for envId, env in enumerate(self.envs): self.startOneEnv(envId, env)
[docs] def startOneEnv(self, envId, env): """Simulate that env.""" plt.close('all') print("\n\nEvaluating environment:", repr(env)) self.policies = [] self.__initPolicies__(env) # Precompute rewards if self.cache_rewards: allrewards = self.compute_cache_rewards(env.arms) else: allrewards = None def store(r, policyId, repeatId): """ Store the result of the #repeatId experiment, for the #policyId policy.""" self.rewards[policyId, envId, :] += r.rewards self.lastCumRewards[policyId, envId, repeatId] = np.sum(r.rewards) if hasattr(self, 'rewardsSquared'): self.rewardsSquared[policyId, envId, :] += (r.rewards ** 2) if hasattr(self, 'allRewards'): self.allRewards[policyId, envId, :, repeatId] = r.rewards if hasattr(self, 'minCumRewards'): self.minCumRewards[policyId, envId, :] = np.minimum(self.minCumRewards[policyId, envId, :], np.cumsum(r.rewards)) if repeatId > 1 else np.cumsum(r.rewards) if hasattr(self, 'maxCumRewards'): self.maxCumRewards[policyId, envId, :] = np.maximum(self.maxCumRewards[policyId, envId, :], np.cumsum(r.rewards)) if repeatId > 1 else np.cumsum(r.rewards) self.bestArmPulls[envId][policyId, :] += np.cumsum(np.in1d(r.choices, r.indexes_bestarm)) self.pulls[envId][policyId, :] += r.pulls if self.moreAccurate: self.allPulls[envId][policyId, :, :] += np.array([1 * (r.choices == armId) for armId in range(env.nbArms)]) # XXX consumes a lot of zeros but it is not so costly self.memoryConsumption[envId][policyId, repeatId] = r.memory_consumption self.lastPulls[envId][policyId, :, repeatId] = r.pulls self.runningTimes[envId][policyId, repeatId] = r.running_time self.numberOfCPDetections[envId][policyId, repeatId] = r.number_of_cp_detections # Start for all policies for policyId, policy in enumerate(self.policies): print("\n\n\n- Evaluating policy #{}/{}: {} ...".format(policyId + 1, self.nbPolicies, policy)) if self.useJoblib: seeds = np.random.randint(low=0, high=100 * self.repetitions, size=self.repetitions) repeatIdout = 0 for r in Parallel(n_jobs=self.cfg['n_jobs'], pre_dispatch='3*n_jobs', verbose=self.cfg['verbosity'])( delayed(delayed_play)(env, policy, self.horizon, random_shuffle=self.random_shuffle, random_invert=self.random_invert, nb_break_points=self.nb_break_points, allrewards=allrewards, seed=seeds[repeatId], repeatId=repeatId, useJoblib=self.useJoblib) for repeatId in tqdm(range(self.repetitions), desc="Repeat||") ): store(r, policyId, repeatIdout) repeatIdout += 1 else: for repeatId in tqdm(range(self.repetitions), desc="Repeat"): r = delayed_play(env, policy, self.horizon, random_shuffle=self.random_shuffle, random_invert=self.random_invert, nb_break_points=self.nb_break_points, allrewards=allrewards, repeatId=repeatId, useJoblib=self.useJoblib) store(r, policyId, repeatId)
# --- Save to disk methods
[docs] def saveondisk(self, filepath="saveondisk_Evaluator.hdf5"): """ Save the content of the internal data to into a HDF5 file on the disk. - See http://docs.h5py.org/en/stable/quick.html if needed. """ import h5py # 1. create the h5py file h5file = h5py.File(filepath, "w") # 2. store main attributes and all other attributes, if they exist for name_of_attr in [ "horizon", "repetitions", "nbPolicies", "delta_t_plot", "random_shuffle", "random_invert", "nb_break_points", "plot_lowerbound", "signature", "moreAccurate", "finalRanksOnAverage", "averageOn", "useJoblibForPolicies", "useJoblib", "cache_rewards", "environment_bayesian", "showplot", "change_labels", "append_labels" ]: if not hasattr(self, name_of_attr): continue value = getattr(self, name_of_attr) if isinstance(value, str): value = np.string_(value) try: h5file.attrs[name_of_attr] = value except (ValueError, TypeError): print("Error: when saving the Evaluator object to a HDF5 file, the attribute named {} (value {} of type {}) couldn't be saved. Skipping...".format(name_of_attr, value, type(value))) # DEBUG # 2.bis. store list of names of policies labels = [ np.string_(policy.__cachedstr__) for policy in self.policies ] h5file.attrs["labels"] = labels # 3. store some arrays that are shared between envs? for name_of_dataset in ["rewards", "rewardsSquared", "allRewards"]: if not hasattr(self, name_of_dataset): continue data = getattr(self, name_of_dataset) try: h5file.create_dataset(name_of_dataset, data=data) except (ValueError, TypeError) as e: print("Error: when saving the Evaluator object to a HDF5 file, the dataset named {} (value of type {} and shape {} and dtype {}) couldn't be saved. Skipping...".format(name_of_dataset, type(data), data.shape, data.dtype)) # DEBUG print("Exception:\n", e) # DEBUG # 4. for each environment h5file.attrs["number_of_envs"] = len(self.envs) for envId in range(len(self.envs)): # 4.a. create subgroup for this env sbgrp = h5file.create_group("env_{}".format(envId)) # 4.b. store attribute of the MAB problem mab = self.envs[envId] for name_of_attr in ["isChangingAtEachRepetition", "isMarkovian", "_sparsity", "means", "nbArms", "maxArm", "minArm"]: if not hasattr(mab, name_of_attr): continue value = getattr(mab, name_of_attr) if isinstance(value, str): value = np.string_(value) try: sbgrp.attrs[name_of_attr] = value except (ValueError, TypeError): print("Error: when saving the Evaluator object to a HDF5 file, the attribute named {} (value {} of type {}) couldn't be saved. Skipping...".format(name_of_attr, value, type(value))) # DEBUG # 4.c. store data for that env for name_of_dataset in ["allPulls", "lastPulls", "runningTimes", "memoryConsumption", "numberOfCPDetections"]: if not ( hasattr(self, name_of_dataset) and envId in getattr(self, name_of_dataset) ): continue data = getattr(self, name_of_dataset)[envId] try: sbgrp.create_dataset(name_of_dataset, data=data) except (ValueError, TypeError) as e: print("Error: when saving the Evaluator object to a HDF5 file, the dataset named {} (value of type {} and shape {} and dtype {}) couldn't be saved. Skipping...".format(name_of_dataset, type(data), data.shape, data.dtype)) # DEBUG print("Exception:\n", e) # DEBUG # 4.d. compute and store data for that env for methodName in ["getRunningTimes", "getMemoryConsumption", "getNumberOfCPDetections", "getBestArmPulls", "getPulls", "getRewards", "getCumulatedRegret", "getLastRegrets", "getAverageRewards"]: if not hasattr(self, methodName): continue name_of_dataset = methodName.replace("get", "") name_of_dataset = name_of_dataset[0].lower() + name_of_dataset[1:] if name_of_dataset in sbgrp: name_of_dataset = methodName # XXX be sure to not use twice the same name, e.g., for getRunningTimes and runningTimes method = getattr(self, methodName) if _nbOfArgs(method) > 2: if isinstance(method(0, envId=envId), tuple): data = np.array([method(policyId, envId=envId)[0] for policyId in range(len(self.policies))]) else: data = np.array([method(policyId, envId=envId) for policyId in range(len(self.policies))]) else: if isinstance(method(envId), tuple): data = method(envId)[0] else: data = method(envId) try: sbgrp.create_dataset(name_of_dataset, data=data) except (ValueError, TypeError) as e: print("Error: when saving the Evaluator object to a HDF5 file, the dataset named {} (value of type {} and shape {} and dtype {}) couldn't be saved. Skipping...".format(name_of_dataset, type(data), data.shape, data.dtype)) # DEBUG print("Exception:\n", e) # DEBUG # 5. when done, close the file h5file.close()
# def loadfromdisk(self, filepath): # """ Update internal memory of the Evaluator object by loading data the opened HDF5 file. # .. warning:: FIXME this is not YET implemented! # """ # # FIXME I just have to fill all the internal matrices from the HDF5 file ? # raise NotImplementedError # --- Get data
[docs] def getPulls(self, policyId, envId=0): """Extract mean pulls.""" return self.pulls[envId][policyId, :] / float(self.repetitions)
[docs] def getBestArmPulls(self, policyId, envId=0): """Extract mean best arm pulls.""" # We have to divide by a arange() = cumsum(ones) to get a frequency return self.bestArmPulls[envId][policyId, :] / (float(self.repetitions) * self._times)
[docs] def getRewards(self, policyId, envId=0): """Extract mean rewards.""" return self.rewards[policyId, envId, :] / float(self.repetitions)
[docs] def getAverageWeightedSelections(self, policyId, envId=0): """Extract weighted count of selections.""" weighted_selections = np.zeros(self.horizon) for armId in range(self.envs[envId].nbArms): mean_selections = self.allPulls[envId][policyId, armId, :] / float(self.repetitions) # DONE this is now fixed for non-stationary bandits if hasattr(self.envs[envId], 'get_allMeans'): meanOfThisArm = self.envs[envId].get_allMeans(horizon=self.horizon)[armId, :] else: meanOfThisArm = self.envs[envId].means[armId] weighted_selections += meanOfThisArm * mean_selections return weighted_selections
[docs] def getMaxRewards(self, envId=0): """Extract max mean rewards.""" return np.max(self.rewards[:, envId, :] / float(self.repetitions))
[docs] def getCumulatedRegret_LessAccurate(self, policyId, envId=0): """Compute cumulative regret, based on accumulated rewards.""" return np.cumsum(self.envs[envId].get_maxArm(self.horizon) - self.getRewards(policyId, envId))
[docs] def getCumulatedRegret_MoreAccurate(self, policyId, envId=0): """Compute cumulative regret, based on counts of selections and not actual rewards.""" assert self.moreAccurate, "Error: getCumulatedRegret_MoreAccurate() is only available when using the 'moreAccurate' option (it consumes more memory!)." # DEBUG instant_oracle_performance = self.envs[envId].get_maxArm(self.horizon) instant_performance = self.getAverageWeightedSelections(policyId, envId) instant_loss = instant_oracle_performance - instant_performance return np.cumsum(instant_loss)
# return np.cumsum(self.envs[envId].get_maxArm(self.horizon) - self.getAverageWeightedSelections(policyId, envId))
[docs] def getCumulatedRegret(self, policyId, envId=0, moreAccurate=None): """Using either the more accurate or the less accurate regret count.""" moreAccurate = moreAccurate if moreAccurate is not None else self.moreAccurate # print("Computing the vector of mean cumulated regret with '{}' accurate method...".format("more" if moreAccurate else "less")) # DEBUG return self.getCumulatedRegret_MoreAccurate(policyId, envId=envId) if moreAccurate else self.getCumulatedRegret_LessAccurate(policyId, envId=envId)
[docs] def getLastRegrets_LessAccurate(self, policyId, envId=0): """Extract last regrets, based on accumulated rewards.""" return np.sum(self.envs[envId].get_maxArm(self.horizon)) - self.lastCumRewards[policyId, envId, :]
[docs] def getAllLastWeightedSelections(self, policyId, envId=0): """Extract weighted count of selections.""" all_last_weighted_selections = np.zeros(self.repetitions) for armId in range(self.envs[envId].nbArms): if hasattr(self.envs[envId], 'get_allMeans'): meanOfThisArm = self.envs[envId].get_allMeans(horizon=self.horizon)[armId, :] # DONE this is now fixed for non-stationary bandits else: meanOfThisArm = self.envs[envId].means[armId] if hasattr(self, 'allPulls'): all_selections = self.allPulls[envId][policyId, armId, :] / float(self.repetitions) if np.size(meanOfThisArm) == 1: # problem was stationary! last_selections = np.sum(all_selections) # no variance, but we don't care! all_last_weighted_selections += meanOfThisArm * last_selections else: # problem was non stationary! last_selections = all_selections all_last_weighted_selections += np.sum(meanOfThisArm * last_selections) else: last_selections = self.lastPulls[envId][policyId, armId, :] all_last_weighted_selections += meanOfThisArm * last_selections return all_last_weighted_selections
[docs] def getLastRegrets_MoreAccurate(self, policyId, envId=0): """Extract last regrets, based on counts of selections and not actual rewards.""" return np.sum(self.envs[envId].get_maxArm(self.horizon)) - self.getAllLastWeightedSelections(policyId, envId=envId)
[docs] def getLastRegrets(self, policyId, envId=0, moreAccurate=None): """Using either the more accurate or the less accurate regret count.""" moreAccurate = moreAccurate if moreAccurate is not None else self.moreAccurate # print("Computing the vector of last cumulated regrets (on repetitions) with '{}' accurate method...".format("more" if moreAccurate else "less")) # DEBUG return self.getLastRegrets_MoreAccurate(policyId, envId=envId) if moreAccurate else self.getLastRegrets_LessAccurate(policyId, envId=envId)
[docs] def getAverageRewards(self, policyId, envId=0): """Extract mean rewards (not `rewards` but `cumsum(rewards)/cumsum(1)`.""" return np.cumsum(self.getRewards(policyId, envId)) / self._times
[docs] def getRewardsSquared(self, policyId, envId=0): """Extract rewards squared.""" return self.rewardsSquared[policyId, envId, :] / float(self.repetitions)
[docs] def getSTDRegret(self, policyId, envId=0, meanReward=False): """Extract standard deviation of rewards. .. warning:: FIXME experimental! """ # X = self._times # YMAX = self.getMaxRewards(envId=envId) # Y = self.getRewards(policyId, envId) # Y2 = self.getRewardsSquared(policyId, envId) # if meanReward: # Cumulated expectation on time # Ycum2 = (np.cumsum(Y) / X)**2 # Y2cum = np.cumsum(Y2) / X # assert np.all(Y2cum >= Ycum2), "Error: getSTDRegret found a nan value in the standard deviation (ie a point where Y2cum < Ycum2)." # DEBUG # stdY = np.sqrt(Y2cum - Ycum2) # YMAX *= 20 # XXX make it look smaller, for the plots # else: # Expectation on nb of repetitions # # https://en.wikipedia.org/wiki/Algebraic_formula_for_the_variance#In_terms_of_raw_moments # # std(Y) = sqrt( E[Y**2] - E[Y]**2 ) # # stdY = np.cumsum(np.sqrt(Y2 - Y**2)) # stdY = np.sqrt(Y2 - Y**2) # YMAX *= np.log(2 + self.horizon) # Normalize the std variation # YMAX *= 50 # XXX make it look larger, for the plots # # Renormalize this standard deviation # # stdY /= YMAX allRewards = self.allRewards[policyId, envId, :, :] return np.std(np.cumsum(allRewards, axis=0), axis=1)
[docs] def getMaxMinReward(self, policyId, envId=0): """Extract amplitude of rewards as maxCumRewards - minCumRewards.""" return (self.maxCumRewards[policyId, envId, :] - self.minCumRewards[policyId, envId, :]) / (float(self.repetitions) ** 0.5)
# return self.maxCumRewards[policyId, envId, :] - self.minCumRewards[policyId, envId, :]
[docs] def getRunningTimes(self, envId=0): """Get the means and stds and list of running time of the different policies.""" all_times = [ self.runningTimes[envId][policyId, :] for policyId in range(self.nbPolicies) ] means = [ np.mean(times) for times in all_times ] stds = [ np.std(times) for times in all_times ] return means, stds, all_times
[docs] def getMemoryConsumption(self, envId=0): """Get the means and stds and list of memory consumptions of the different policies.""" all_memories = [ self.memoryConsumption[envId][policyId, :] for policyId in range(self.nbPolicies) ] for policyId in range(self.nbPolicies): all_memories[policyId] = [ m for m in all_memories[policyId] if m > 0 ] means = [np.mean(memories) if len(memories) > 0 else 0 for memories in all_memories] stds = [np.std(memories) if len(memories) > 0 else 0 for memories in all_memories] return means, stds, all_memories
[docs] def getNumberOfCPDetections(self, envId=0): """Get the means and stds and list of numberOfCPDetections of the different policies.""" all_number_of_cp_detections = [ self.numberOfCPDetections[envId][policyId, :] for policyId in range(self.nbPolicies) ] means = [ np.mean(number_of_cp_detections) for number_of_cp_detections in all_number_of_cp_detections ] stds = [ np.std(number_of_cp_detections) for number_of_cp_detections in all_number_of_cp_detections ] return means, stds, all_number_of_cp_detections
# --- Plotting methods
[docs] def printFinalRanking(self, envId=0, moreAccurate=None): """Print the final ranking of the different policies.""" print("\nGiving the final ranks ...") assert 0 < self.averageOn < 1, "Error, the parameter averageOn of a EvaluatorMultiPlayers classs has to be in (0, 1) strictly, but is = {} here ...".format(self.averageOn) # DEBUG print("\nFinal ranking for this environment #{} : (using {} accurate estimate of the regret)".format(envId, "more" if moreAccurate else "less")) nbPolicies = self.nbPolicies lastRegret = np.zeros(nbPolicies) totalRegret = np.zeros(nbPolicies) totalRewards = np.zeros(nbPolicies) totalWeightedSelections = np.zeros(nbPolicies) for i, policy in enumerate(self.policies): Y = self.getCumulatedRegret(i, envId, moreAccurate=moreAccurate) if self.finalRanksOnAverage: lastRegret[i] = np.mean(Y[-int(self.averageOn * self.horizon):]) # get average value during the last 0.5% of the iterations else: lastRegret[i] = Y[-1] # get the last value totalRegret[i] = Y[-1] totalRewards[i] = np.sum(self.getRewards(i, envId)) totalWeightedSelections[i] = np.sum( self.getAverageWeightedSelections(i, envId)) # Sort lastRegret and give ranking index_of_sorting = np.argsort(lastRegret) for i, k in enumerate(index_of_sorting): policy = self.policies[k] print("- Policy '{}'\twas ranked\t{} / {} for this simulation\n\t(last regret = {:.5g},\ttotal regret = {:.5g},\ttotal reward = {:.5g},\ttotal weighted selection = {:.5g}).".format(policy.__cachedstr__, i + 1, nbPolicies, lastRegret[k], totalRegret[k], totalRewards[k], totalWeightedSelections[k])) return lastRegret, index_of_sorting return fig
[docs] def _xlabel(self, envId, *args, **kwargs): """Add xlabel to the plot, and if the environment has change-point, draw vertical lines to clearly identify the locations of the change points.""" env = self.envs[envId] if hasattr(env, 'changePoints'): ymin, ymax = plt.ylim() taus = self.envs[envId].changePoints if len(taus) > 25: print("WARNING: Adding vlines for the change points with more than 25 change points will be ugly on the plots...") # DEBUG if len(taus) > 50: # Force to NOT add the vlines return plt.xlabel(*args, **kwargs) for tau in taus: if tau > 0 and tau < self.horizon: plt.vlines(tau, ymin, ymax, linestyles='dotted', alpha=0.5) return plt.xlabel(*args, **kwargs)
[docs] def plotRegrets(self, envId=0, savefig=None, meanReward=False, plotSTD=False, plotMaxMin=False, semilogx=False, semilogy=False, loglog=False, normalizedRegret=False, drawUpperBound=False, moreAccurate=None ): """Plot the centralized cumulated regret, support more than one environments (use evaluators to give a list of other environments). """ moreAccurate = moreAccurate if moreAccurate is not None else self.moreAccurate fig = plt.figure() ymin = 0 colors = palette(self.nbPolicies) markers = makemarkers(self.nbPolicies) X = self._times - 1 plot_method = plt.loglog if loglog else plt.plot plot_method = plt.semilogy if semilogy else plot_method plot_method = plt.semilogx if semilogx else plot_method for i, policy in enumerate(self.policies): if meanReward: Y = self.getAverageRewards(i, envId) else: Y = self.getCumulatedRegret(i, envId, moreAccurate=moreAccurate) if normalizedRegret: Y /= np.log(X + 2) # XXX prevent /0 ymin = min(ymin, np.min(Y)) lw = 10 if ('$N=' in policy.__cachedstr__ or 'Aggr' in policy.__cachedstr__ or 'CORRAL' in policy.__cachedstr__ or 'LearnExp' in policy.__cachedstr__ or 'Exp4' in policy.__cachedstr__) else 8 if len(self.policies) > 8: lw -= 1 if semilogx or loglog: # FIXED for semilogx plots, truncate to only show t >= 100 X_to_plot_here = X[X >= 100] Y_to_plot_here = Y[X >= 100] plot_method(X_to_plot_here[::self.delta_t_plot], Y_to_plot_here[::self.delta_t_plot], label=policy.__cachedstr__, color=colors[i], marker=markers[i], markevery=(i / 50., 0.1), lw=lw, ms=int(1.5*lw)) else: plot_method(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=policy.__cachedstr__, color=colors[i], marker=markers[i], markevery=(i / 50., 0.1), lw=lw, ms=int(1.5*lw)) if semilogx or loglog: # Manual fix for issue https://github.com/SMPyBandits/SMPyBandits/issues/38 plt.xscale('log') if semilogy or loglog: # Manual fix for issue https://github.com/SMPyBandits/SMPyBandits/issues/38 plt.yscale('log') # Print standard deviation of regret if plotSTD and self.repetitions > 1: stdY = self.getSTDRegret(i, envId, meanReward=meanReward) if normalizedRegret: stdY /= np.log(2 + X) plt.fill_between(X[::self.delta_t_plot], Y[::self.delta_t_plot] - stdY[::self.delta_t_plot], Y[::self.delta_t_plot] + stdY[::self.delta_t_plot], facecolor=colors[i], alpha=0.2) # Print amplitude of regret if plotMaxMin and self.repetitions > 1: MaxMinY = self.getMaxMinReward(i, envId) / 2. if normalizedRegret: MaxMinY /= np.log(2 + X) plt.fill_between(X[::self.delta_t_plot], Y[::self.delta_t_plot] - MaxMinY[::self.delta_t_plot], Y[::self.delta_t_plot] + MaxMinY[::self.delta_t_plot], facecolor=colors[i], alpha=0.2) self._xlabel(envId, r"Time steps $t = 1...T$, horizon $T = {}${}".format(self.horizon, self.signature)) lowerbound = self.envs[envId].lowerbound() lowerbound_sparse = self.envs[envId].lowerbound_sparse() if not (semilogx or semilogy or loglog): print("\nThis MAB problem has: \n - a [Lai & Robbins] complexity constant C(mu) = {:.3g} for 1-player problem... \n - a Optimal Arm Identification factor H_OI(mu) = {:.2%} ...".format(lowerbound, self.envs[envId].hoifactor())) # DEBUG if self.envs[envId]._sparsity is not None and not np.isnan(lowerbound_sparse): print("\n- a [Kwon et al] sparse lower-bound with s = {} non-negative arm, C'(mu) = {:.3g}...".format(self.envs[envId]._sparsity, lowerbound_sparse)) # DEBUG if not meanReward: if semilogy or loglog: ymin = max(0, ymin) plt.ylim(ymin, plt.ylim()[1]) # Get a small string to add to ylabel ylabel2 = r"%s%s" % (r", $\pm 1$ standard deviation" if (plotSTD and not plotMaxMin) else "", r", $\pm 1$ amplitude" if (plotMaxMin and not plotSTD) else "") if meanReward: if hasattr(self.envs[envId], 'get_allMeans'): # DONE this is now fixed for non-stationary bandits means = self.envs[envId].get_allMeans(horizon=self.horizon) minArm, maxArm = np.min(means), np.max(means) else: minArm, maxArm = self.envs[envId].minArm, self.envs[envId].maxArm # We plot a horizontal line ----- at the best arm mean plt.plot(X[::self.delta_t_plot], self.envs[envId].maxArm * np.ones_like(X)[::self.delta_t_plot], 'k--', label="Largest mean = ${:.3g}$".format(maxArm)) legend() plt.ylabel(r"Mean reward, average on time $\tilde{r}_t = \frac{1}{t} \sum_{s=1}^{t}$ %s%s" % (r"$\sum_{k=1}^{%d} \mu_k\mathbb{E}_{%d}[T_k(t)]$" % (self.envs[envId].nbArms, self.repetitions) if moreAccurate else r"$\mathbb{E}_{%d}[r_s]$" % (self.repetitions), ylabel2)) if not self.envs[envId].isChangingAtEachRepetition and not self.nb_break_points > 0: plt.ylim(0.80 * minArm, 1.10 * maxArm) # if self.nb_break_points > 0: # plt.ylim(0, 1) # FIXME do better! plt.title("Mean rewards for different bandit algorithms, averaged ${}$ times\n${}$ arms{}: {}".format(self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(1, latex=True))) elif normalizedRegret: if self.plot_lowerbound: # We also plot the Lai & Robbins lower bound plt.plot(X[::self.delta_t_plot], lowerbound * np.ones_like(X)[::self.delta_t_plot], 'k-', label="[Lai & Robbins] lower bound = ${:.3g}$".format(lowerbound), lw=3) # We also plot the Kwon et al lower bound if self.envs[envId]._sparsity is not None and not np.isnan(lowerbound_sparse): plt.plot(X[::self.delta_t_plot], lowerbound_sparse * np.ones_like(X)[::self.delta_t_plot], 'k--', label="[Kwon et al.] lower bound, $s = {}$, $= {:.3g}$".format(self.envs[envId]._sparsity, lowerbound_sparse), lw=3) legend() if self.nb_break_points > 0: # DONE fix math formula in case of non stationary bandits plt.ylabel("Normalized non-stationary regret\n" + r"$\frac{R_t}{\log(t)} = \frac{1}{\log(t)}\sum_{s=1}^{t} \max_k \mu_k(t) - \frac{1}{\log(t)}$ %s%s" % (r"$\sum_{s=1}^{t} \sum_{k=1}^{%d} \mu_k(t) \mathbb{E}_{%d}[1(I(t)=k)]$" % (self.envs[envId].nbArms, self.repetitions) if moreAccurate else r"$\sum_{s=1}^{t} $\mathbb{E}_{%d}[r_s]$" % (self.repetitions), ylabel2)) else: plt.ylabel(r"Normalized regret%s$\frac{R_t}{\log(t)} = \frac{t}{\log(t)} \mu^* - \frac{1}{\log(t)}\sum_{s=1}^{t}$ %s%s" % ("\n", r"$\sum_{k=1}^{%d} \mu_k\mathbb{E}_{%d}[T_k(t)]$" % (self.envs[envId].nbArms, self.repetitions) if moreAccurate else r"$\mathbb{E}_{%d}[r_s]$" % (self.repetitions), ylabel2)) plt.title("Normalized cumulated regrets for different bandit algorithms, averaged ${}$ times\n${}$ arms{}: {}".format(self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(1, latex=True))) else: if drawUpperBound and not (semilogx or loglog): # Experiment to print also an upper bound: it is CRAZILY huge!! lower_amplitudes = np.asarray([arm.lower_amplitude for arm in self.envs[envId].arms]) amplitude = np.max(lower_amplitudes[:, 1]) maxVariance = max([p * (1 - p) for p in self.envs[envId].means]) K = self.envs[envId].nbArms upperbound = 76 * np.sqrt(maxVariance * K * X) + amplitude * K plt.plot(X[::self.delta_t_plot], upperbound[::self.delta_t_plot], 'r-', label=r"Minimax upper-bound for kl-UCB++", lw=3) # FIXED for semilogx plots, truncate to only show t >= 100 if semilogx or loglog: X = X[X >= 100] else: X = X[X >= 1] if self.plot_lowerbound: # We also plot the Lai & Robbins lower bound plt.plot(X[::self.delta_t_plot], lowerbound * np.log(X)[::self.delta_t_plot], 'k-', label=r"[Lai & Robbins] lower bound = ${:.3g}\; \log(t)$".format(lowerbound), lw=3) # We also plot the Kwon et al lower bound if self.envs[envId]._sparsity is not None and not np.isnan(lowerbound_sparse): plt.plot(X[::self.delta_t_plot], lowerbound_sparse * np.ones_like(X)[::self.delta_t_plot], 'k--', label=r"[Kwon et al.] lower bound, $s = {}$, $= {:.3g} \; \log(t)$".format(self.envs[envId]._sparsity, lowerbound_sparse), lw=3) legend() if self.nb_break_points > 0: # DONE fix math formula in case of non stationary bandits plt.ylabel("Non-stationary regret\n" + r"$R_t = \sum_{s=1}^{t} \max_k \mu_k(s) - \sum_{s=1}^{t}$%s%s" % (r"$\sum_{k=1}^{%d} \mu_k\mathbb{P}_{%d}[A(t)=k]$" % (self.envs[envId].nbArms, self.repetitions) if moreAccurate else r"$\mathbb{E}_{%d}[r_s]$" % (self.repetitions), ylabel2)) else: plt.ylabel(r"Regret $R_t = t \mu^* - \sum_{s=1}^{t}$ %s%s" % (r"$\sum_{k=1}^{%d} \mu_k\mathbb{E}_{%d}[T_k(t)]$" % (self.envs[envId].nbArms, self.repetitions) if moreAccurate else r"$\mathbb{E}_{%d}[r_s]$ (from actual rewards)" % (self.repetitions), ylabel2)) plt.title("Cumulated regrets for different bandit algorithms, averaged ${}$ times\n${}$ arms{}: {}".format(self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(1, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def plotBestArmPulls(self, envId, savefig=None): """Plot the frequency of pulls of the best channel. - Warning: does not adapt to dynamic settings! """ fig = plt.figure() colors = palette(self.nbPolicies) markers = makemarkers(self.nbPolicies) X = self._times[2:] for i, policy in enumerate(self.policies): Y = self.getBestArmPulls(i, envId)[2:] lw = 10 if ('$N=' in policy.__cachedstr__ or 'Aggr' in policy.__cachedstr__ or 'CORRAL' in policy.__cachedstr__ or 'LearnExp' in policy.__cachedstr__ or 'Exp4' in policy.__cachedstr__) else 8 if len(self.policies) > 8: lw -= 1 plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=policy.__cachedstr__, color=colors[i], marker=markers[i], markevery=(i / 50., 0.1), lw=lw, ms=int(1.5*lw)) legend() self._xlabel(envId, r"Time steps $t = 1...T$, horizon $T = {}${}".format(self.horizon, self.signature)) add_percent_formatter("yaxis", 1.0) plt.ylabel("Frequency of pulls of the optimal arm") plt.title("Best arm pulls frequency for different bandit algorithms, averaged ${}$ times\n${}$ arms{}: {}".format(self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(1, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def printRunningTimes(self, envId=0, precision=3): """Print the average+-std running time of the different policies.""" print("\nGiving the mean and std running times ...") try: from IPython.core.magics.execution import _format_time except ImportError: _format_time = str means, stds, _ = self.getRunningTimes(envId) for policyId in np.argsort(means): policy = self.policies[policyId] print("\nFor policy #{} called '{}' ...".format(policyId, policy.__cachedstr__)) mean_time, var_time = means[policyId], stds[policyId] if self.repetitions <= 1: print(u" {} (mean of 1 run)" .format(_format_time(mean_time, precision))) else: print(u" {} ± {} per loop (mean ± std. dev. of {} run)" .format(_format_time(mean_time, precision), _format_time(var_time, precision), self.repetitions)) for policyId, policy in enumerate(self.policies): print("For policy #{} called '{}' ...".format(policyId, policy.__cachedstr__)) mean_time, var_time = means[policyId], stds[policyId] print(r"T^{%i}_{T=%i,K=%i} = " % (policyId + 1, self.horizon, self.envs[envId].nbArms) + r"{} pm {}".format(int(round(1000 * mean_time)), int(round(1000 * var_time)))) # XXX in milli seconds
# table_to_latex(mean_data=means, std_data=stds, labels=[policy.__cachedstr__ for policy in self.policies], fmt_function=_format_time)
[docs] def plotRunningTimes(self, envId=0, savefig=None, base=1, unit="seconds"): """Plot the running times of the different policies, as a box plot for each.""" means, _, all_times = self.getRunningTimes(envId=envId) # order by increasing mean time index_of_sorting = np.argsort(means) labels = [ policy.__cachedstr__ for policy in self.policies ] labels = [ labels[i] for i in index_of_sorting ] all_times = [ np.asarray(all_times[i]) / float(base) for i in index_of_sorting ] fig = plt.figure() violin_or_box_plot(data=all_times, labels=labels, boxplot=self.use_box_plot) plt.xlabel("Bandit algorithms{}".format(self.signature)) ylabel = "Running times (in {}), for {} repetitions".format(unit, self.repetitions) plt.ylabel(ylabel) adjust_xticks_subplots(ylabel=ylabel, labels=labels) plt.title("Running times for different bandit algorithms, horizon $T={}$, averaged ${}$ times\n${}$ arms{}: {}".format(self.horizon, self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(1, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def printMemoryConsumption(self, envId=0): """Print the average+-std memory consumption of the different policies.""" print("\nGiving the mean and std memory consumption ...") means, stds, _ = self.getMemoryConsumption(envId) for policyId in np.argsort(means): policy = self.policies[policyId] print("\nFor policy #{} called '{}' ...".format(policyId, policy.__cachedstr__)) mean_memory, var_memory = means[policyId], stds[policyId] if self.repetitions <= 1: print(u" {} (mean of 1 run)".format(sizeof_fmt(mean_memory))) else: print(u" {} ± {} (mean ± std. dev. of {} runs)".format(sizeof_fmt(mean_memory), sizeof_fmt(var_memory), self.repetitions)) for policyId, policy in enumerate(self.policies): print("For policy #{} called '{}' ...".format(policyId, policy.__cachedstr__)) mean_memory, var_memory = means[policyId], stds[policyId] print(r"M^{%i}_{T=%i,K=%i} = " % (policyId + 1, self.horizon, self.envs[envId].nbArms) + r"{} pm {}".format(int(round(mean_memory)), int(round(var_memory)))) # XXX in B
# table_to_latex(mean_data=means, std_data=stds, labels=[policy.__cachedstr__ for policy in self.policies], fmt_function=sizeof_fmt)
[docs] def plotMemoryConsumption(self, envId=0, savefig=None, base=1024, unit="KiB"): """Plot the memory consumption of the different policies, as a box plot for each.""" means, _, all_memories = self.getMemoryConsumption(envId=envId) # order by increasing mean memory consumption index_of_sorting = np.argsort(means) labels = [ policy.__cachedstr__ for policy in self.policies ] labels = [ labels[i] for i in index_of_sorting ] all_memories = [ np.asarray(all_memories[i]) / float(base) for i in index_of_sorting ] fig = plt.figure() violin_or_box_plot(data=all_memories, labels=labels, boxplot=self.use_box_plot) plt.xlabel("Bandit algorithms{}".format(self.signature)) ylabel = "Memory consumption (in {}), for {} repetitions".format(unit, self.repetitions) plt.ylabel(ylabel) adjust_xticks_subplots(ylabel=ylabel, labels=labels) plt.title("Memory consumption for different bandit algorithms, horizon $T={}$, averaged ${}$ times\n${}$ arms{}: {}".format(self.horizon, self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(1, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE)
[docs] def printNumberOfCPDetections(self, envId=0): """Print the average+-std number_of_cp_detections of the different policies.""" means, stds, _ = self.getNumberOfCPDetections(envId) if np.max(means) == 0: return None print("\nGiving the mean and std number of CP detections ...") for policyId in np.argsort(means): policy = self.policies[policyId] print("\nFor policy #{} called '{}' ...".format(policyId, policy.__cachedstr__)) mean_number_of_cp_detections, var_number_of_cp_detections = means[policyId], stds[policyId] if self.repetitions <= 1: print(u" {:.3g} (mean of 1 run)".format(mean_number_of_cp_detections)) else: print(u" {:.3g} ± {:.3g} (mean ± std. dev. of {} runs)".format(mean_number_of_cp_detections, var_number_of_cp_detections, self.repetitions))
# table_to_latex(mean_data=means, std_data=stds, labels=[policy.__cachedstr__ for policy in self.policies])
[docs] def plotNumberOfCPDetections(self, envId=0, savefig=None): """Plot the number of change-point detections of the different policies, as a box plot for each.""" means, _, all_number_of_cp_detections = self.getNumberOfCPDetections(envId=envId) if np.max(means) == 0: return None # order by increasing mean nb of change-point detection index_of_sorting = np.argsort(means) labels = [ policy.__cachedstr__ for policy in self.policies ] labels = [ labels[i] for i in index_of_sorting ] all_number_of_cp_detections = [ np.asarray(all_number_of_cp_detections[i]) for i in index_of_sorting ] fig = plt.figure() violin_or_box_plot(data=all_number_of_cp_detections, labels=labels, boxplot=self.use_box_plot) plt.xlabel("Bandit algorithms{}".format(self.signature)) ylabel = "Number of detected change-points, for {} repetitions".format(self.repetitions) plt.ylabel(ylabel) adjust_xticks_subplots(ylabel=ylabel, labels=labels) plt.title("Detected change-points for different bandit algorithms, horizon $T={}$, averaged ${}$ times\n${}$ arms{}: {}".format(self.horizon, self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(1, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def printLastRegrets(self, envId=0, moreAccurate=False): """Print the last regrets of the different policies.""" print("\nGiving the vector of final regrets ...") for policyId, policy in enumerate(self.policies): print("\n For policy #{} called '{}' ...".format(policyId, policy.__cachedstr__)) last_regrets = self.getLastRegrets(policyId, envId=envId, moreAccurate=moreAccurate) print(" Last regrets (for all repetitions) have:") print("Min of last regrets R_T = {:.3g}".format(np.min(last_regrets))) print("Mean of last regrets R_T = {:.3g}".format(np.mean(last_regrets))) print("Median of last regrets R_T = {:.3g}".format(np.median(last_regrets))) print("Max of last regrets R_T = {:.3g}".format(np.max(last_regrets))) print("Standard deviation R_T = {:.3g}".format(np.std(last_regrets))) for policyId, policy in enumerate(self.policies): print("For policy #{} called '{}' ...".format(policyId, policy.__cachedstr__)) last_regrets = self.getLastRegrets(policyId, envId=envId, moreAccurate=moreAccurate) print(r"R^{%i}_{T=%i,K=%i} = " % (policyId + 1, self.horizon, self.envs[envId].nbArms) + r"{} pm {}".format(int(round(np.mean(last_regrets))), int(round(np.std(last_regrets))))) means = [np.mean(self.getLastRegrets(policyId, envId=envId, moreAccurate=moreAccurate)) for policyId in range(self.nbPolicies)] stds = [np.std(self.getLastRegrets(policyId, envId=envId, moreAccurate=moreAccurate)) for policyId in range(self.nbPolicies)]
# table_to_latex(mean_data=means, std_data=stds, labels=[policy.__cachedstr__ for policy in self.policies])
[docs] def plotLastRegrets(self, envId=0, normed=False, subplots=True, nbbins=15, log=False, all_on_separate_figures=False, sharex=False, sharey=False, boxplot=False, normalized_boxplot=True, savefig=None, moreAccurate=False): """Plot histogram of the regrets R_T for all policies.""" N = self.nbPolicies if N == 1: subplots = False # no need for a subplot colors = palette(N) markers = makemarkers(N) if self.repetitions == 1: boxplot = True if boxplot: all_last_regrets = [] for policyId, policy in enumerate(self.policies): last_regret = self.getLastRegrets(policyId, envId=envId, moreAccurate=moreAccurate) if normalized_boxplot: last_regret /= np.log(self.horizon) all_last_regrets.append(last_regret) means = [ np.mean(last_regrets) for last_regrets in all_last_regrets ] # order by increasing mean regret index_of_sorting = np.argsort(means) labels = [ policy.__cachedstr__ for policy in self.policies ] labels = [ labels[i] for i in index_of_sorting ] all_last_regrets = [ np.asarray(all_last_regrets[i]) for i in index_of_sorting ] fig = plt.figure() plt.xlabel("Bandit algorithms{}".format(self.signature)) ylabel = "{}egret value $R_T{}$,\nfor $T = {}$, for {} repetitions".format("Normalized r" if normalized_boxplot else "R", r"/\log(T)" if normalized_boxplot else "", self.horizon, self.repetitions) plt.ylabel(ylabel, fontsize="x-small") violin_or_box_plot(data=all_last_regrets, labels=labels, boxplot=self.use_box_plot) adjust_xticks_subplots(ylabel=ylabel, labels=labels) plt.title("Regret for different bandit algorithms, horizon $T={}$, averaged ${}$ times\n${}$ arms{}: {}".format(self.horizon, self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(1, latex=True))) elif all_on_separate_figures: figs = [] for policyId, policy in enumerate(self.policies): fig = plt.figure() plt.title("Histogram of regrets for {}\n${}$ arms{}: {}".format(policy.__cachedstr__, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(1, latex=True))) plt.xlabel("Regret value $R_T$, horizon $T = {}${}".format(self.horizon, self.signature)) plt.ylabel("Density of observations, ${}$ repetitions".format(self.repetitions)) last_regrets = self.getLastRegrets(policyId, envId=envId, moreAccurate=moreAccurate) try: sns.distplot(last_regrets, hist=True, bins=nbbins, color=colors[policyId], kde_kws={'cut': 0, 'marker': markers[policyId], 'markevery': (policyId / 50., 0.1)}) except np.linalg.linalg.LinAlgError: print("WARNING: a call to sns.distplot() failed because of a stupid numpy.linalg.linalg.LinAlgError exception... See https://api.travis-ci.org/v3/job/528931259/log.txt") # WARNING legend() show_and_save(self.showplot, None if savefig is None else "{}__Algo_{}_{}".format(savefig, 1 + policyId, 1 + N), fig=fig, pickleit=USE_PICKLE) figs.append(fig) return figs elif subplots: nrows, ncols = nrows_ncols(N) fig, axes = plt.subplots(nrows, ncols, sharex=sharex, sharey=sharey) fig.suptitle("Histogram of regrets for different bandit algorithms\n${}$ arms{}: {}".format(self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(1, latex=True))) # XXX See https://stackoverflow.com/a/36542971/ ax0 = fig.add_subplot(111, frame_on=False) # add a big axes, hide frame ax0.grid(False) # hide grid ax0.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) # hide tick and tick label of the big axes # Add only once the ylabel, xlabel, in the middle ax0.set_ylabel("{} of observations, ${}$ repetitions".format("Frequency" if normed else "Histogram and density", self.repetitions)) ax0.set_xlabel("Regret value $R_T$, horizon $T = {}${}".format(self.horizon, self.signature)) for policyId, policy in enumerate(self.policies): i, j = policyId % nrows, policyId // nrows ax = axes[i, j] if ncols > 1 else axes[i] last_regrets = self.getLastRegrets(policyId, envId=envId, moreAccurate=moreAccurate) try: sns.distplot(last_regrets, ax=ax, hist=True, bins=nbbins, color=colors[policyId], kde_kws={'cut': 0, 'marker': markers[policyId], 'markevery': (policyId / 50., 0.1)}) # XXX except np.linalg.linalg.LinAlgError: print("WARNING: a call to sns.distplot() failed because of a stupid numpy.linalg.linalg.LinAlgError exception... See https://api.travis-ci.org/v3/job/528931259/log.txt") # WARNING ax.set_title(policy.__cachedstr__, fontdict={'fontsize': 'xx-small'}) # XXX one of x-large, medium, small, None, xx-large, x-small, xx-small, smaller, larger, large ax.tick_params(axis='both', labelsize=8) # XXX https://stackoverflow.com/a/11386056/ else: fig = plt.figure() plt.title("Histogram of regrets for different bandit algorithms\n${}$ arms{}: {}".format(self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(1, latex=True))) plt.xlabel("Regret value $R_T$, horizon $T = {}${}".format(self.horizon, self.signature)) plt.ylabel("{} of observations, ${}$ repetitions".format("Frequency" if normed else "Number", self.repetitions)) all_last_regrets = [] labels = [] for policyId, policy in enumerate(self.policies): all_last_regrets.append(self.getLastRegrets(policyId, envId=envId, moreAccurate=moreAccurate)) labels.append(policy.__cachedstr__) if self.nbPolicies > 6: nbbins = int(nbbins * self.nbPolicies / 6) for policyId in range(self.nbPolicies): try: sns.distplot(all_last_regrets[policyId], label=labels[policyId], hist=False, color=colors[policyId], kde_kws={'cut': 0, 'marker': markers[policyId], 'markevery': (policyId / 50., 0.1)}) #, bins=nbbins) # XXX except np.linalg.linalg.LinAlgError: print("WARNING: a call to sns.distplot() failed because of a stupid numpy.linalg.linalg.LinAlgError exception... See https://api.travis-ci.org/v3/job/528931259/log.txt") # WARNING legend() # Common part show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def plotHistoryOfMeans(self, envId=0, horizon=None, savefig=None): """ Plot the history of means, as a plot with x axis being the time, y axis the mean rewards, and K curves one for each arm.""" if horizon is None: horizon = self.horizon env = self.envs[envId] if hasattr(env, 'plotHistoryOfMeans'): fig = env.plotHistoryOfMeans(horizon=horizon, savefig=savefig, showplot=self.showplot) # FIXME https://github.com/SMPyBandits/SMPyBandits/issues/175#issuecomment-455637453 # For one trajectory, we can ask Evaluator.Evaluator to store not only the number of detections, but more! We can store the times of detections, for each arms (as a list of list). # If we have these data (for each repetitions), we can plot the detection times (for each arm) on a plot like the following return fig else: print("Warning: environment {} did not have a method plotHistoryOfMeans...".format(env)) # DEBUG
# Helper function for the parallelization
[docs]def delayed_play(env, policy, horizon, random_shuffle=random_shuffle, random_invert=random_invert, nb_break_points=nb_break_points, seed=None, allrewards=None, repeatId=0, useJoblib=False): """Helper function for the parallelization.""" start_time = time.time() start_memory = getCurrentMemory(thread=useJoblib) # Give a unique seed to random & numpy.random for each call of this function if seed is not None: random.seed(seed) np.random.seed(seed) # We have to deepcopy because this function is Parallel-ized env = deepcopy(env) policy = deepcopy(policy) means = env.means if env.isChangingAtEachRepetition: means = env.newRandomArms() indexes_bestarm = np.nonzero(np.isclose(means, max(means)))[0] # Start game policy.startGame() result = Result(env.nbArms, horizon, indexes_bestarm=indexes_bestarm, means=means) # One Result object, for every policy # FIXME Monkey patching policy.detect_change() to store number of detections, see https://stackoverflow.com/a/42657312/ if hasattr(policy, 'detect_change'): from types import MethodType old_detect_change = policy.detect_change def new_detect_change(self, *args, **kwargs): response_of_detect_change = old_detect_change(*args, **kwargs) if (isinstance(response_of_detect_change, bool) and response_of_detect_change) or (isinstance(response_of_detect_change, tuple) and response_of_detect_change[0]): result.number_of_cp_detections += 1 return response_of_detect_change policy.detect_change = MethodType(new_detect_change, policy) # XXX Experimental support for random events: shuffling or inverting the list of arms, at these time steps if nb_break_points is None or nb_break_points <= 0: random_shuffle = False random_invert = False if nb_break_points > 0: t_events = [i * int(horizon / float(nb_break_points)) for i in range(nb_break_points)] prettyRange = tqdm(range(horizon), desc="Time t") if repeatId == 0 else range(horizon) for t in prettyRange: # 1. The player's policy choose an arm choice = policy.choice() # 2. A random reward is drawn, from this arm at this time if allrewards is None: reward = env.draw(choice, t) else: reward = allrewards[choice, repeatId, t] # 3. The policy sees the reward policy.getReward(choice, reward) # 4. Finally we store the results result.store(t, choice, reward) if env.isDynamic: if t in env.changePoints: means = env.newRandomArms(t) indexes_bestarm = np.nonzero(np.isclose(means, np.max(means)))[0] result.change_in_arms(t, indexes_bestarm) if repeatId == 0: print("\nNew means vector = {}, best arm(s) = {}, at time t = {} ...".format(means, indexes_bestarm, t)) # DEBUG # XXX remove these two special cases when the NonStationaryMAB is ready? # XXX regret is not correct when displayed for these two guys… # XXX Experimental : shuffle the arms at the middle of the simulation if random_shuffle and t > 0 and t in t_events: indexes_bestarm = env.new_order_of_arm(shuffled(env.arms)) result.change_in_arms(t, indexes_bestarm) if repeatId == 0: print("\nShuffling the arms, best arm(s) = {}, at time t = {} ...".format(indexes_bestarm, t)) # DEBUG # XXX Experimental : invert the order of the arms at the middle of the simulation if random_invert and t > 0 and t in t_events: indexes_bestarm = env.new_order_of_arm(env.arms[::-1]) result.change_in_arms(t, indexes_bestarm) if repeatId == 0: print("\nInverting the order of the arms, best arm(s) = {}, at time t = {} ...".format(indexes_bestarm, t)) # DEBUG # Print the quality of estimation of arm ranking for this policy, just for 1st repetition if repeatId == 0 and hasattr(policy, 'estimatedOrder'): order = policy.estimatedOrder() print("\nEstimated order by the policy {} after {} steps: {} ...".format(policy, horizon, order)) print(" ==> Optimal arm identification: {:.2%} (relative success)...".format(weightedDistance(order, env.means, n=1))) # print(" ==> Manhattan distance from optimal ordering: {:.2%} (relative success)...".format(manhattan(order))) # # print(" ==> Kendell Tau distance from optimal ordering: {:.2%} (relative success)...".format(kendalltau(order))) # # print(" ==> Spearman distance from optimal ordering: {:.2%} (relative success)...".format(spearmanr(order))) # print(" ==> Gestalt distance from optimal ordering: {:.2%} (relative success)...".format(gestalt(order))) print(" ==> Mean distance from optimal ordering: {:.2%} (relative success)...".format(meanDistance(order))) # Finally, store running time and consumed memory result.running_time = time.time() - start_time memory_consumption = getCurrentMemory(thread=useJoblib) - start_memory if memory_consumption == 0: # XXX https://stackoverflow.com/a/565382/ memory_consumption = sys.getsizeof(pickle.dumps(policy)) # if repeatId == 0: print("Warning: unable to get the memory consumption for policy {}, so we used a trick to measure {} bytes.".format(policy, memory_consumption)) # DEBUG result.memory_consumption = memory_consumption return result
# --- Helper for loading a previous Evaluator object
[docs]def EvaluatorFromDisk(filepath='/tmp/saveondiskEvaluator.hdf5'): """ Create a new Evaluator object from the HDF5 file given in argument.""" with open(filepath, 'r') as hdf: configuration = hdf.configuration evaluator = Evaluator(configuration) evaluator.loadfromdisk(hdf) return evaluator
# --- Utility function from random import shuffle from copy import copy
[docs]def shuffled(mylist): """Returns a shuffled version of the input 1D list. sorted() exists instead of list.sort(), but shuffled() does not exist instead of random.shuffle()... >>> from random import seed; seed(1234) # reproducible results >>> mylist = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] >>> shuffled(mylist) [0.9, 0.4, 0.3, 0.6, 0.5, 0.7, 0.1, 0.2, 0.8] >>> shuffled(mylist) [0.4, 0.3, 0.7, 0.5, 0.8, 0.1, 0.9, 0.6, 0.2] >>> shuffled(mylist) [0.4, 0.6, 0.9, 0.5, 0.7, 0.2, 0.1, 0.3, 0.8] >>> shuffled(mylist) [0.8, 0.7, 0.3, 0.1, 0.9, 0.5, 0.6, 0.2, 0.4] """ copiedlist = copy(mylist) shuffle(copiedlist) return copiedlist
# --- Debugging if __name__ == "__main__": # Code for debugging purposes. from doctest import testmod print("\nTesting automatically all the docstring written in each functions of this module :") testmod(verbose=True)