Source code for Environment.EvaluatorMultiPlayers

# -*- coding: utf-8 -*-
""" EvaluatorMultiPlayers class to wrap and run the simulations, for the multi-players case.
Lots of plotting methods, to have various visualizations. See documentation.
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.9"

# Generic imports
import sys
import pickle
USE_PICKLE = False   #: Should we save the figure objects to a .pickle file at the end of the simulation?
from copy import deepcopy
from re import search
import random
import time
# Scientific imports
import numpy as np
import matplotlib.pyplot as plt

import inspect
[docs]def _nbOfArgs(function): try: return len(inspect.signature(functions).parameters) except NameError: return len(inspect.getargspec(function).args)
# Local imports, libraries try: from .usejoblib import USE_JOBLIB, Parallel, delayed from .usetqdm import USE_TQDM, tqdm # Local imports, tools and config from .plotsettings import BBOX_INCHES, signature, maximizeWindow, palette, makemarkers, add_percent_formatter, wraptext, wraplatex, legend, show_and_save, nrows_ncols, addTextForWorstCases, violin_or_box_plot, adjust_xticks_subplots from .sortedDistance import weightedDistance, manhattan, kendalltau, spearmanr, gestalt, meanDistance, sortedDistance from .fairnessMeasures import amplitude_fairness, std_fairness, rajjain_fairness, mean_fairness, fairnessMeasure, fairness_mapping # Local imports, objects and functions from .CollisionModels import onlyUniqUserGetsReward, noCollision, closerUserGetsReward, rewardIsSharedUniformly, defaultCollisionModel, full_lost_if_collision from .MAB import MAB, MarkovianMAB, ChangingAtEachRepMAB, NonStationaryMAB, PieceWiseStationaryMAB, IncreasingMAB from .ResultMultiPlayers import ResultMultiPlayers from .memory_consumption import getCurrentMemory, sizeof_fmt except ImportError: from usejoblib import USE_JOBLIB, Parallel, delayed from usetqdm import USE_TQDM, tqdm # Local imports, tools and config from plotsettings import BBOX_INCHES, signature, maximizeWindow, palette, makemarkers, add_percent_formatter, wraptext, wraplatex, legend, show_and_save, nrows_ncols, addTextForWorstCases, violin_or_box_plot, adjust_xticks_subplots from sortedDistance import weightedDistance, manhattan, kendalltau, spearmanr, gestalt, meanDistance, sortedDistance from fairnessMeasures import amplitude_fairness, std_fairness, rajjain_fairness, mean_fairness, fairnessMeasure, fairness_mapping # Local imports, objects and functions from CollisionModels import onlyUniqUserGetsReward, noCollision, closerUserGetsReward, rewardIsSharedUniformly, defaultCollisionModel, full_lost_if_collision from MAB import MAB, MarkovianMAB, ChangingAtEachRepMAB, NonStationaryMAB, PieceWiseStationaryMAB, IncreasingMAB from ResultMultiPlayers import ResultMultiPlayers from memory_consumption import getCurrentMemory, sizeof_fmt REPETITIONS = 1 #: Default nb of repetitions DELTA_T_PLOT = 50 #: Default sampling rate for plotting COUNT_RANKS_MARKOV_CHAIN = False #: If true, count and then print a lot of statistics for the Markov Chain of the underlying configurations on ranks MORE_ACCURATE = False #: Use the count of selections instead of rewards for a more accurate mean/var reward measure. MORE_ACCURATE = True #: Use the count of selections instead of rewards for a more accurate mean/var reward measure. plot_lowerbounds = True #: Default is to plot the lower-bounds USE_BOX_PLOT = True #: True to use boxplot, False to use violinplot (default). nb_break_points = 0 #: Default nb of random events FINAL_RANKS_ON_AVERAGE = True #: Default value for ``finalRanksOnAverage`` USE_JOBLIB_FOR_POLICIES = False #: Default value for ``useJoblibForPolicies``. Does not speed up to use it (too much overhead in using too much threads); so it should really be disabled. # --- Class EvaluatorMultiPlayers
[docs]class EvaluatorMultiPlayers(object): """ Evaluator class to run the simulations, for the multi-players case. """
[docs] def __init__(self, configuration, moreAccurate=MORE_ACCURATE): # Configuration self.cfg = configuration #: Configuration dictionnary # Attributes self.nbPlayers = len(self.cfg['players']) #: Number of players print("Number of players in the multi-players game:", self.nbPlayers) self.horizon = self.cfg['horizon'] #: Horizon (number of time steps) print("Time horizon:", self.horizon) self.repetitions = self.cfg.get('repetitions', REPETITIONS) #: Number of repetitions print("Number of repetitions:", self.repetitions) self.delta_t_plot = 1 if self.horizon <= 10000 else self.cfg.get('delta_t_plot', DELTA_T_PLOT) print("Sampling rate for plotting, delta_t_plot:", self.delta_t_plot) #: Sampling rate for plotting self.horizon = int(self.horizon) print("Number of jobs for parallelization:", self.cfg['n_jobs']) self.collisionModel = self.cfg.get('collisionModel', defaultCollisionModel) #: Which collision model should be used self.full_lost_if_collision = full_lost_if_collision.get(self.collisionModel.__name__, True) #: Is there a full loss of rewards if collision ? To compute the correct decomposition of regret print("Using collision model {} (function {}).\nMore details:\n{}".format(self.collisionModel.__name__, self.collisionModel, self.collisionModel.__doc__)) self.signature = signature # Flags self.moreAccurate = moreAccurate #: Use the count of selections instead of rewards for a more accurate mean/var reward measure. print("Using accurate regrets and last regrets ? {}".format(moreAccurate)) self.finalRanksOnAverage = self.cfg.get('finalRanksOnAverage', FINAL_RANKS_ON_AVERAGE) #: Final display of ranks are done on average rewards? self.averageOn = self.cfg.get('averageOn', 5e-3) #: How many last steps for final rank average rewards self.nb_break_points = self.cfg.get('nb_break_points', nb_break_points) #: How many random events? self.plot_lowerbounds = self.cfg.get('plot_lowerbounds', plot_lowerbounds) #: Should we plot the lower-bounds? self.useJoblib = USE_JOBLIB and self.cfg['n_jobs'] != 1 #: Use joblib to parallelize for loop on repetitions (useful) self.showplot = self.cfg.get('showplot', True) #: Show the plot (interactive display or not) self.use_box_plot = USE_BOX_PLOT or (self.repetitions == 1) #: To use box plot (or violin plot if False). Force to use boxplot if repetitions=1. self.count_ranks_markov_chain = self.cfg.get('count_ranks_markov_chain', COUNT_RANKS_MARKOV_CHAIN)#: If true, count and then print a lot of statistics for the Markov Chain of the underlying configurations on ranks self.change_labels = self.cfg.get('change_labels', {}) #: Possibly empty dictionary to map 'playerId' to new labels (overwrite their name). self.append_labels = self.cfg.get('append_labels', {}) #: Possibly empty dictionary to map 'playerId' to new labels (by appending the result from 'append_labels'). # Internal object memory self.envs = [] #: List of environments self.players = [] #: List of players self.__initEnvironments__() # Internal vectorial memory self.rewards = dict() #: For each env, history of rewards # self.rewardsSquared = dict() self.pulls = dict() #: For each env, keep the history of arm pulls (mean) self.lastPulls = dict() #: For each env, keep the distribution of arm pulls self.allPulls = dict() #: For each env, keep the full history of arm pulls self.collisions = dict() #: For each env, keep the history of collisions on all arms self.lastCumCollisions = dict() #: For each env, last count of collisions on all arms self.nbSwitchs = dict() #: For each env, keep the history of switches (change of configuration of players) self.bestArmPulls = dict() #: For each env, keep the history of best arm pulls self.freeTransmissions = dict() #: For each env, keep the history of successful transmission (1 - collisions, basically) self.lastCumRewards = dict() #: For each env, last accumulated rewards, to compute variance and histogram of whole regret R_T self.runningTimes = dict() #: For each env, keep the history of running times self.memoryConsumption = dict() #: For each env, keep the history of running times print("Number of environments to try:", len(self.envs)) # DEBUG # XXX: WARNING no memorized vectors should have dimension horizon * repetitions, that explodes the RAM consumption! for envId in range(len(self.envs)): # Zeros everywhere self.rewards[envId] = np.zeros((self.nbPlayers, self.horizon)) # self.rewardsSquared[envId] = np.zeros((self.nbPlayers, self.horizon)) self.lastCumRewards[envId] = np.zeros(self.repetitions) self.pulls[envId] = np.zeros((self.nbPlayers, self.envs[envId].nbArms), dtype=np.int32) self.lastPulls[envId] = np.zeros((self.nbPlayers, self.envs[envId].nbArms, self.repetitions), dtype=np.int32) self.allPulls[envId] = np.zeros((self.nbPlayers, self.envs[envId].nbArms, self.horizon), dtype=np.int32) self.collisions[envId] = np.zeros((self.envs[envId].nbArms, self.horizon)) self.lastCumCollisions[envId] = np.zeros((self.envs[envId].nbArms, self.repetitions), dtype=np.int32) self.nbSwitchs[envId] = np.zeros((self.nbPlayers, self.horizon), dtype=np.int32) self.bestArmPulls[envId] = np.zeros((self.nbPlayers, self.horizon), dtype=np.int32) self.freeTransmissions[envId] = np.zeros((self.nbPlayers, self.horizon), dtype=np.int32) self.runningTimes[envId] = np.zeros((self.nbPlayers, self.repetitions)) self.memoryConsumption[envId] = np.zeros((self.nbPlayers, self.repetitions)) # To speed up plotting self._times = np.arange(1, 1 + self.horizon)
# --- Init methods
[docs] def __initEnvironments__(self): """ Create environments.""" nbArms = [] for configuration_arms in self.cfg['environment']: print("Using this dictionary to create a new environment:\n", configuration_arms) # DEBUG new_mab_problem = None if isinstance(configuration_arms, dict) \ and "arm_type" in configuration_arms \ and "params" in configuration_arms: # PieceWiseStationaryMAB or NonStationaryMAB or ChangingAtEachRepMAB if "listOfMeans" in configuration_arms["params"] \ and "changePoints" in configuration_arms["params"]: new_mab_problem = PieceWiseStationaryMAB(configuration_arms) elif "newMeans" in configuration_arms["params"] \ and "args" in configuration_arms["params"]: if "changePoints" in configuration_arms["params"]: new_mab_problem = NonStationaryMAB(configuration_arms) else: new_mab_problem = ChangingAtEachRepMAB(configuration_arms) # MarkovianMAB elif configuration_arms["arm_type"] == "Markovian" \ and "transitions" in configuration_arms["params"]: new_mab_problem = MarkovianMAB(configuration_arms) # IncreasingMAB elif "change_lower_amplitude" in configuration_arms: new_mab_problem = IncreasingMAB(configuration_arms) if new_mab_problem is None: new_mab_problem = MAB(configuration_arms) self.envs.append(new_mab_problem) nbArms.append(new_mab_problem.nbArms) if len(set(nbArms)) != 1: # FIXME add support of multi-environments evaluator for MP policies with different number of arms in the scenarios. raise ValueError("ERROR: right now, the multi-environments evaluator does not work well for MP policies, if there is a number different of arms in the scenarios!")
[docs] def __initPlayers__(self, env): """ Create or initialize players.""" playersId = self.cfg.get('playersId', '0') for playerId, player in enumerate(self.cfg['players']): print("- Adding player #{:>2} = {} ...".format(playerId + 1, player)) # DEBUG if isinstance(player, dict): # Either the 'player' is a config dict print(" Creating this player from a dictionnary 'player' = {} ...".format(player)) # DEBUG self.players.append(player['archtype'](env.nbArms, **player['params'])) else: # Or already a player object print(" Using this already created player 'player' = {} ...".format(player)) # DEBUG self.players.append(player) for playerId in range(len(self.players)): self.players[playerId].__cachedstr__ = str(self.players[playerId]) if playersId in self.append_labels: self.players[playerId].__cachedstr__ += self.append_labels[playersId] if playersId in self.change_labels: self.players[playerId].__cachedstr__ = self.change_labels[playersId]
# --- Start computation
[docs] def startAllEnv(self): """Simulate all envs.""" for envId, env in enumerate(self.envs): self.startOneEnv(envId, env)
[docs] def startOneEnv(self, envId, env): """Simulate that env.""" print("\n\nEvaluating environment:", repr(env)) # DEBUG self.players = [] self.__initPlayers__(env) # Get the position of the best arms means = env.means bestarm = env.maxArm # FIXME for > 1 player, this has no meaning indexes_bestarm = np.nonzero(np.isclose(means, bestarm))[0] def store(r, repeatId): """Store the result of the experiment r.""" self.rewards[envId] += np.cumsum(r.rewards, axis=1) # cumsum on time # self.rewardsSquared[envId] += np.cumsum(r.rewards ** 2, axis=1) # cumsum on time # self.rewardsSquared[envId] += np.cumsum(r.rewardsSquared, axis=1) # cumsum on time self.lastCumRewards[envId][repeatId] = np.sum(r.rewards) # sum on time and sum on players self.pulls[envId] += r.pulls self.lastPulls[envId][:, :, repeatId] = r.pulls self.allPulls[envId] += r.allPulls self.collisions[envId] += r.collisions self.lastCumCollisions[envId][:, repeatId] = np.sum(r.collisions, axis=1) # sum on time for playerId in range(self.nbPlayers): self.nbSwitchs[envId][playerId, 1:] += (np.diff(r.choices[playerId, :]) != 0) self.bestArmPulls[envId][playerId, :] += np.cumsum(np.in1d(r.choices[playerId, :], indexes_bestarm)) # FIXME there is probably a bug in this computation self.freeTransmissions[envId][playerId, :] += np.array([r.choices[playerId, t] not in r.collisions[:, t] for t in range(self.horizon)]) self.runningTimes[envId][playerId, repeatId] = r.running_time self.memoryConsumption[envId][playerId, repeatId] = r.memory_consumption # Start now if self.useJoblib: seeds = np.random.randint(low=0, high=100 * self.repetitions, size=self.repetitions) repeatIdout = 0 for r in Parallel(n_jobs=self.cfg['n_jobs'], verbose=self.cfg['verbosity'])( delayed(delayed_play)(env, self.players, self.horizon, self.collisionModel, seed=seeds[repeatId], repeatId=repeatId, count_ranks_markov_chain=self.count_ranks_markov_chain, useJoblib=self.useJoblib) for repeatId in tqdm(range(self.repetitions), desc="Repeat||") ): store(r, repeatIdout) repeatIdout += 1 if env.isChangingAtEachRepetition: env._t += self.repetitions # new self.repetitions draw! else: for repeatId in tqdm(range(self.repetitions), desc="Repeat"): r = delayed_play(env, self.players, self.horizon, self.collisionModel, repeatId=repeatId, count_ranks_markov_chain=self.count_ranks_markov_chain, useJoblib=self.useJoblib) store(r, repeatId)
# --- Save to disk methods
[docs] def saveondisk(self, filepath="saveondisk_EvaluatorMultiPlayers.hdf5"): """ Save the content of the internal data to into a HDF5 file on the disk. - See http://docs.h5py.org/en/stable/quick.html if needed. """ # 1. create the h5py file import h5py h5file = h5py.File(filepath, "w") # 2. store main attributes and all other attributes, if they exist for name_of_attr in [ "nbPlayers", "horizon", "repetitions", "delta_t_plot", "collisionModel", "full_lost_if_collision", "signature", "nb_break_points", "plot_lowerbounds", "moreAccurate", "finalRanksOnAverage", "useJoblib", "showplot", "use_box_plot", "count_ranks_markov_chain", "cache_rewards", "change_labels", "append_labels" ]: if not hasattr(self, name_of_attr): continue value = getattr(self, name_of_attr) if inspect.isfunction(value): value = value.__name__ if isinstance(value, str): value = np.string_(value) try: h5file.attrs[name_of_attr] = value except (ValueError, TypeError): print("Error: when saving the Evaluator object to a HDF5 file, the attribute named {} (value {} of type {}) couldn't be saved. Skipping...".format(name_of_attr, value, type(value))) # DEBUG # 3. for each environment h5file.attrs["number_of_envs"] = len(self.envs) for envId in range(len(self.envs)): # 3.a. create subgroup for this env sbgrp = h5file.create_group("env_{}".format(envId)) # 3.b. store attribute of the MAB problem mab = self.envs[envId] for name_of_attr in ["isChangingAtEachRepetition", "isMarkovian", "_sparsity", "means", "nbArms", "maxArm", "minArm"]: if not hasattr(mab, name_of_attr): continue value = getattr(mab, name_of_attr) if isinstance(value, str): value = np.string_(value) try: sbgrp.attrs[name_of_attr] = value except (ValueError, TypeError): print("Error: when saving the Evaluator object to a HDF5 file, the attribute named {} (value {} of type {}) couldn't be saved. Skipping...".format(name_of_attr, value, type(value))) # DEBUG # 3.c. store data for that env for name_of_dataset in [ "rewards", "lastCumRewards", "pulls", "lastPulls", "allPulls", "collisions", "lastCumCollisions", "nbSwitchs", "bestArmPulls", "freeTransmissions", "runningTimes", "memoryConsumption"]: if not (hasattr(self, name_of_dataset) and envId in getattr(self, name_of_dataset)): continue data = getattr(self, name_of_dataset)[envId] try: sbgrp.create_dataset(name_of_dataset, data=data) except (ValueError, TypeError) as e: print("Error: when saving the Evaluator object to a HDF5 file, the dataset named {} (value of type {} and shape {} and dtype {}) couldn't be saved. Skipping...".format(name_of_dataset, type(data), data.shape, data.dtype)) # DEBUG print("Exception:\n", e) # DEBUG # 3.d. compute and store data for that env for methodName in ["getRunningTimes", "getMemoryConsumption", "getPulls", "getNbSwitchs", "getBestArmPulls", "getfreeTransmissions", "getCollisions", "getRewards", "getFirstRegretTerm", "getSecondRegretTerm", "getThirdRegretTerm", "getCentralizedRegret", "getLastRegrets"]: if not hasattr(self, methodName): continue name_of_dataset = methodName.replace("get", "") name_of_dataset = name_of_dataset[0].lower() + name_of_dataset[1:] if name_of_dataset in sbgrp: name_of_dataset = methodName # XXX be sure to not use twice the same name, e.g., for getRunningTimes and runningTimes method = getattr(self, methodName) try: if _nbOfArgs(method) > 2: if isinstance(method(0, envId=envId), tuple): data = np.array([method(playerId, envId=envId)[0] for playerId in range(len(self.players))]) else: data = np.array([method(playerId, envId=envId) for playerId in range(len(self.players))]) else: if isinstance(method(envId), tuple): data = method(envId)[0] else: data = method(envId) except TypeError: if isinstance(method(envId), tuple): data = method(envId)[0] else: data = method(envId) try: sbgrp.create_dataset(name_of_dataset, data=data) except (ValueError, TypeError) as e: print("Error: when saving the Evaluator object to a HDF5 file, the dataset named {} (value of type {} and shape {} and dtype {}) couldn't be saved. Skipping...".format(name_of_dataset, type(data), data.shape, data.dtype)) # DEBUG print("Exception:\n", e) # DEBUG # 4. when done, close the file h5file.close()
[docs] def loadfromdisk(self, filepath): """ Update internal memory of the Evaluator object by loading data the opened HDF5 file. .. warning:: FIXME this is not YET implemented! """ # FIXME I just have to fill all the internal matrices from the HDF5 file ? raise NotImplementedError
# --- Getter methods
[docs] def getPulls(self, playerId, envId=0): """Extract mean pulls.""" return self.pulls[envId][playerId, :] / float(self.repetitions)
[docs] def getAllPulls(self, playerId, armId, envId=0): """Extract mean of all pulls.""" return self.allPulls[envId][playerId, armId, :] / float(self.repetitions)
[docs] def getNbSwitchs(self, playerId, envId=0): """Extract mean nb of switches.""" return self.nbSwitchs[envId][playerId, :] / float(self.repetitions)
[docs] def getCentralizedNbSwitchs(self, envId=0): """Extract average of mean nb of switches.""" return np.sum(self.nbSwitchs[envId], axis=0) / (float(self.repetitions) * self.nbPlayers)
[docs] def getBestArmPulls(self, playerId, envId=0): """Extract mean of best arms pulls.""" # We have to divide by a arange() = cumsum(ones) to get a frequency return self.bestArmPulls[envId][playerId, :] / (float(self.repetitions) * self._times)
[docs] def getfreeTransmissions(self, playerId, envId=0): """Extract mean of successful transmission.""" return self.freeTransmissions[envId][playerId, :] / float(self.repetitions)
[docs] def getCollisions(self, armId, envId=0): """Extract mean of number of collisions.""" return self.collisions[envId][armId, :] / float(self.repetitions)
[docs] def getRewards(self, playerId, envId=0): """Extract mean of rewards.""" return self.rewards[envId][playerId, :] / float(self.repetitions)
[docs] def getRegretMean(self, playerId, envId=0): """Extract mean of regret, for one arm for one player (no meaning). .. warning:: This is the centralized regret, *for one arm*, it does not make much sense in the multi-players setting! """ return np.cumsum(self.envs[envId].get_maxArm(self.horizon) - self.getRewards(playerId, envId))
[docs] def getCentralizedRegret_LessAccurate(self, envId=0): """Compute the empirical centralized regret: cumsum on time of the mean rewards of the M best arms - cumsum on time of the empirical rewards obtained by the players, based on accumulated rewards.""" assert self.nbPlayers <= self.envs[envId].nbArms, "WARNING getCentralizedRegret_LessAccurate is not yet implement in the case when there is more players than arms ?" # DEBUG # FIXED use self.envs[envId].get_maxArms(M=self.nbPlayers, horizon=self.horizon) averageBestRewards = np.cumsum(self.envs[envId].get_maxArms(M=self.nbPlayers, horizon=self.horizon)) # And for the actual rewards, the collisions are counted in the rewards logged in self.getRewards actualRewards = np.sum([self.getRewards(playerId, envId=0) for playerId in range(self.nbPlayers)], axis=0) return averageBestRewards - actualRewards
# --- Three terms in the regret
[docs] def getFirstRegretTerm(self, envId=0): """Extract and compute the first term :math:`(a)` in the centralized regret: losses due to pulling suboptimal arms.""" losses = np.zeros(self.horizon) # means = self.envs[envId].means # Shape: (nbArms) allMeans = self.envs[envId].get_allMeans(self.horizon) # Shape: (nbArms, horizon) allPulls = self.allPulls[envId] / float(self.repetitions) # Shape: (nbPlayers, nbArms, horizon) # it's hard to program this in vector operations, so let's do just a loop... for t in range(self.horizon): means = allMeans[:, t] sortingIndex = np.argsort(means) means = np.sort(means) deltaMeansWorstArms = means[-self.nbPlayers] - means[:-self.nbPlayers] allWorstPulls = allPulls[:, sortingIndex[:-self.nbPlayers], t] worstPulls = np.sum(allWorstPulls, axis=0) # sum for all players losses[t] = np.dot(deltaMeansWorstArms, worstPulls) # Count and sum on k in Mworst # Conclusion firstRegretTerm = np.cumsum(losses) # Accumulate losses return firstRegretTerm
[docs] def getSecondRegretTerm(self, envId=0): """Extract and compute the second term :math:`(b)` in the centralized regret: losses due to not pulling optimal arms.""" losses = np.zeros(self.horizon) # means = self.envs[envId].means # Shape: (nbArms) allMeans = self.envs[envId].get_allMeans(self.horizon) # Shape: (nbArms, horizon) allPulls = self.allPulls[envId] / float(self.repetitions) # Shape: (nbPlayers, nbArms, horizon) # it's hard to program this in vector operations, so let's do just a loop... for t in range(self.horizon): means = allMeans[:, t] sortingIndex = np.argsort(means) means = np.sort(means) deltaMeansBestArms = means[-self.nbPlayers:] - means[-self.nbPlayers] allBestPulls = allPulls[:, sortingIndex[-self.nbPlayers:], t] bestMisses = 1 - np.sum(allBestPulls, axis=0) # sum for all players losses[t] = np.dot(deltaMeansBestArms, bestMisses) # Count and sum on k in Mbest # Conclusion secondRegretTerm = np.cumsum(losses) # Accumulate losses return secondRegretTerm
[docs] def getThirdRegretTerm(self, envId=0): """Extract and compute the third term :math:`(c)` in the centralized regret: losses due to collisions.""" # means = self.envs[envId].means # Shape: (nbArms) allMeans = self.envs[envId].get_allMeans(self.horizon) # Shape: (nbArms, horizon) countCollisions = self.collisions[envId] # Shape: (nbArms, horizon) if not self.full_lost_if_collision: print("Warning: the collision model ({}) does *not* yield a loss in communication when colliding (one user can communicate, or in average one user can communicate), so countCollisions -= 1 for the 3rd regret term ...".format(self.collisionModel.__name__)) # DEBUG countCollisions = np.maximum(0, countCollisions - 1) # losses = np.dot(means, countCollisions / float(self.repetitions)) # Count and sum on k in 1...K losses = np.sum(allMeans * countCollisions, axis=0) / float(self.repetitions) # Count and sum on k in 1...K thirdRegretTerm = losses # Accumulate losses return thirdRegretTerm
[docs] def getCentralizedRegret_MoreAccurate(self, envId=0): """Compute the empirical centralized regret, based on counts of selections and not actual rewards.""" return self.getFirstRegretTerm(envId=envId) + self.getSecondRegretTerm(envId=envId) + self.getThirdRegretTerm(envId=envId)
[docs] def getCentralizedRegret(self, envId=0, moreAccurate=None): """Using either the more accurate or the less accurate regret count.""" moreAccurate = moreAccurate if moreAccurate is not None else self.moreAccurate # print("Computing the vector of mean cumulated regret with '{}' accurate method...".format("more" if moreAccurate else "less")) # DEBUG if moreAccurate: return self.getCentralizedRegret_MoreAccurate(envId=envId) else: return self.getCentralizedRegret_LessAccurate(envId=envId)
# --- Last regrets
[docs] def getLastRegrets_LessAccurate(self, envId=0): """Extract last regrets, based on accumulated rewards.""" # FIXME it depends on the collision model ! assert self.nbPlayers <= self.envs[envId].nbArms, "WARNING getLastRegrets_LessAccurate is not yet implement in the case when there is more players than arms ?" # DEBUG sumBestMeans = np.sum(self.envs[envId].get_maxArms(M=self.nbPlayers, horizon=self.horizon)) # if self.envs[envId].nbArms < self.nbPlayers: # # sure to have collisions, then the best strategy is to put all the collisions in the worse arm # worseArm = np.min(meansArms) # sumBestMeans -= worseArm # This count the collisions return sumBestMeans - self.lastCumRewards[envId]
[docs] def getAllLastWeightedSelections(self, envId=0): """Extract weighted count of selections.""" all_last_weighted_selections = np.zeros(self.repetitions) lastCumCollisions = self.lastCumCollisions[envId] means = self.envs[envId].means # Shape: (nbArms) for armId, mean in enumerate(means): last_selections = np.sum(self.lastPulls[envId][:, armId, :], axis=0) # sum on players all_last_weighted_selections += mean * (last_selections - lastCumCollisions[armId, :]) return all_last_weighted_selections
[docs] def getLastRegrets_MoreAccurate(self, envId=0): """Extract last regrets, based on counts of selections and not actual rewards.""" # FIXME it depends on the collision model ! assert self.nbPlayers <= self.envs[envId].nbArms, "WARNING getLastRegrets_MoreAccurate is not yet implement in the case when there is more players than arms ?" # DEBUG sumBestMeans = np.sum(self.envs[envId].get_maxArms(M=self.nbPlayers, horizon=self.horizon)) # if self.envs[envId].nbArms < self.nbPlayers: # # sure to have collisions, then the best strategy is to put all the collisions in the worse arm # worseArm = np.min(meansArms) # sumBestMeans -= worseArm # This count the collisions return sumBestMeans - self.getAllLastWeightedSelections(envId=envId)
[docs] def getLastRegrets(self, envId=0, moreAccurate=None): """Using either the more accurate or the less accurate regret count.""" moreAccurate = moreAccurate if moreAccurate is not None else self.moreAccurate # print("Computing the vector of last cumulated regrets (on repetitions) with '{}' accurate method...".format("more" if moreAccurate else "less")) # DEBUG if moreAccurate: return self.getLastRegrets_MoreAccurate(envId=envId) else: return self.getLastRegrets_LessAccurate(envId=envId)
[docs] def getRunningTimes(self, envId=0): """Get the means and stds and list of running time of the different players.""" all_times = [ self.runningTimes[envId][playerId, :] for playerId in range(self.nbPlayers) ] means = [ np.mean(times) for times in all_times ] stds = [ np.std(times) for times in all_times ] return means, stds, all_times
[docs] def getMemoryConsumption(self, envId=0): """Get the means and stds and list of memory consumptions of the different players.""" all_memories = [ self.memoryConsumption[envId][playerId, :] for playerId in range(self.nbPlayers) ] for playerId in range(self.nbPlayers): all_memories[playerId] = [ m for m in all_memories[playerId] if m > 0 ] means = [ np.mean(memories) for memories in all_memories ] stds = [ np.std(memories) for memories in all_memories ] return means, stds, all_memories
# --- Plotting methods
[docs] def plotRewards(self, envId=0, savefig=None, semilogx=False, moreAccurate=None): """Plot the decentralized (vectorial) rewards, for each player.""" moreAccurate = moreAccurate if moreAccurate is not None else self.moreAccurate fig = plt.figure() ymin = 0 colors = palette(self.nbPlayers) markers = makemarkers(self.nbPlayers) X = self._times - 1 cumRewards = np.zeros((self.nbPlayers, self.horizon)) for playerId, player in enumerate(self.players): label = 'Player #{:>2}: {}'.format(playerId + 1, _extract(player.__cachedstr__)) Y = self.getRewards(playerId, envId) cumRewards[playerId, :] = Y ymin = min(ymin, np.min(Y)) if semilogx: plt.semilogx(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[playerId], marker=markers[playerId], markevery=(playerId / 50., 0.1), lw=2) else: plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[playerId], marker=markers[playerId], markevery=(playerId / 50., 0.1), lw=2) legend() plt.xlabel("Time steps $t = 1...T$, horizon $T = {}${}".format(self.horizon, self.signature)) if self.nb_break_points > 0: # DONE fix math formula in case of non stationary bandits plt.ylabel("Cumulative personal reward {}".format(r"$\sum_{s=1}^{t} \sum_{k=1}^{%d} \mu_k(t) \mathbb{E}_{%d}[1(I(t)=k)]$" % (self.envs[envId].nbArms, self.repetitions) if moreAccurate else r"$\mathbb{E}_{%d}[r_t]$" % self.repetitions)) else: plt.ylabel("Cumulative personal reward {}".format(r"$\sum_{k=1}^{%d} \mu_k\mathbb{E}_{%d}[T_k(t)]$" % (self.envs[envId].nbArms, self.repetitions) if moreAccurate else r"$\mathbb{E}_{%d}[r_t]$" % self.repetitions)) plt.title("Multi-players $M = {}$ : Personal reward for each player, averaged ${}$ times\n${}$ arms{}: {}".format(self.nbPlayers, self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def plotFairness(self, envId=0, savefig=None, semilogx=False, fairness="default", evaluators=()): """Plot a certain measure of "fairness", from these personal rewards, support more than one environments (use evaluators to give a list of other environments).""" fig = plt.figure() X = self._times - 1 evaluators = [self] + list(evaluators) # Default to only [self] colors = palette(len(evaluators)) markers = makemarkers(len(evaluators)) plot_method = plt.semilogx if semilogx else plt.plot # Decide which fairness function to use fairnessFunction = fairness_mapping[fairness] if isinstance(fairness, str) else fairness fairnessName = fairness if isinstance(fairness, str) else getattr(fairness, '__name__', "std_fairness") for evaId, eva in enumerate(evaluators): label = eva.strPlayers(short=True) cumRewards = np.zeros((eva.nbPlayers, eva.horizon)) for playerId, _ in enumerate(eva.players): cumRewards[playerId, :] = eva.getRewards(playerId, envId) # # Print each fairness measure # DEBUG # for fN, fF in fairness_mapping.items(): # f = fF(cumRewards) # print(" - {} fairness index is = {} ...".format(fN, f)) # DEBUG # Plot only one fairness term fairness = fairnessFunction(cumRewards) plot_method(X[::self.delta_t_plot][2:], fairness[::self.delta_t_plot][2:], markers[evaId] + '-', label=label, markevery=(evaId / 50., 0.1), color=colors[evaId], lw=2) if len(evaluators) > 1: legend() plt.xlabel("Time steps $t = 1...T$, horizon $T = {}$, {}{}".format(self.horizon, self.strPlayers() if len(evaluators) == 1 else "", self.signature)) add_percent_formatter("yaxis", 1.0) # plt.ylim(0, 1) plt.ylabel("Centralized measure of fairness for cumulative rewards ({})".format(fairnessName.title())) plt.title("Multi-players $M = {}$ : Centralized measure of fairness, averaged ${}$ times\n${}$ arms{}: {}".format(self.nbPlayers, self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def plotRegretCentralized(self, envId=0, savefig=None, semilogx=False, semilogy=False, loglog=False, normalized=False, evaluators=(), subTerms=False, sumofthreeterms=False, moreAccurate=None): """Plot the centralized cumulated regret, support more than one environments (use evaluators to give a list of other environments). - The lower bounds are also plotted (Besson & Kaufmann, and Anandkumar et al). - The three terms of the regret are also plotting if evaluators = () (that's the default). """ moreAccurate = moreAccurate if moreAccurate is not None else self.moreAccurate X0 = X = self._times - 1 fig = plt.figure() evaluators = [self] + list(evaluators) # Default to only [self] colors = palette(5 if len(evaluators) == 1 and subTerms else len(evaluators)) markers = makemarkers(5 if len(evaluators) == 1 and subTerms else len(evaluators)) plot_method = plt.loglog if loglog else plt.plot plot_method = plt.semilogy if semilogy else plot_method plot_method = plt.semilogx if semilogx else plot_method # Loop for evaId, eva in enumerate(evaluators): if subTerms: Ys = [None] * 3 labels = [""] * 3 Ys[0] = eva.getFirstRegretTerm(envId) labels[0] = "$(a)$ term: Pulls of {} suboptimal arms (lower-bounded)".format(max(0, self.envs[envId].nbArms - self.nbPlayers)) Ys[1] = eva.getSecondRegretTerm(envId) labels[1] = "$(b)$ term: Non-pulls of {} optimal arms".format(min(self.nbPlayers, self.envs[envId].nbArms)) Ys[2] = eva.getThirdRegretTerm(envId) labels[2] = "$(c)$ term: Weighted count of collisions" Y = eva.getCentralizedRegret(envId, moreAccurate=moreAccurate) label = "{}umulated centralized regret".format("Normalized c" if normalized else "C") if len(evaluators) == 1 else eva.strPlayers(short=True) if semilogx or loglog: # FIXED for semilogx plots, truncate to only show t >= 100 X, Y = X0[X0 >= 100], Y[X0 >= 100] if subTerms: for i in range(len(Ys)): Ys[i] = Ys[i][X0 >= 100] if normalized: Y = Y[X >= 1] / np.log(X[X >= 1]) # XXX prevent /0 if subTerms: for i in range(len(Ys)): Ys[i] = Ys[i][X >= 1] / np.log(X[X >= 1]) # XXX prevent /0 meanY = np.mean(Y) # Now plot plot_method(X[::self.delta_t_plot], Y[::self.delta_t_plot], (markers[evaId] + '-'), markevery=(evaId / 50., 0.1), label=label, color=colors[evaId], lw=2) if len(evaluators) == 1: # if not semilogx and not loglog and not semilogy: # # We plot a horizontal line ----- at the mean regret # plot_method(X[::self.delta_t_plot], meanY * np.ones_like(X)[::self.delta_t_plot], '--', label="Mean cumulated centralized regret", color=colors[evaId], lw=2) # " = ${:.3g}$".format(meanY) if subTerms: if sumofthreeterms: Ys.append(Ys[0] + Ys[1] + Ys[2]) labels.append("Sum of 3 terms (= regret)") # print("Difference between regret and sum of three terms:", Y - np.array(Ys[-1])) # DEBUG for i, (Y, label) in enumerate(zip(Ys, labels)): plot_method(X[::self.delta_t_plot], Y[::self.delta_t_plot], (markers[i + 1] + '-'), markevery=((i + 1) / 50., 0.1), label=label, color=colors[i + 1], lw=2) if semilogx or loglog: # Manual fix for issue https://github.com/SMPyBandits/SMPyBandits/issues/38 plt.xscale('log') if semilogy or loglog: # Manual fix for issue https://github.com/SMPyBandits/SMPyBandits/issues/38 plt.yscale('log') # We also plot our lower bound if not self.envs[envId].isDynamic: try: # XXX In fact, the lower-bound is also true for Bayesian policies! Finite means ARE ALWAYS linear! I should write the proof, but I convinced myself that the lower-bound is still correct (in a certain sense) and at least it gives an overview of the (average) complexity of the problem (randomly drawn and) used for the experiments. lowerbound, anandkumar_lowerbound, centralized_lowerbound = self.envs[envId].lowerbound_multiplayers(self.nbPlayers) if not (semilogx or semilogy or loglog): print("\nThis MAB problem has: \n - a [Lai & Robbins] complexity constant C(mu) = {:.3g} for 1-player problem ... \n - a Optimal Arm Identification factor H_OI(mu) = {:.2%} ...".format(self.envs[envId].lowerbound(), self.envs[envId].hoifactor())) # DEBUG if self.envs[envId].isDynamic: print("WARNING this env is in fact dynamic, this complexity term and H_OI factor do not have much sense... (they are computed from the average of the complexity for all mean vectors drawn in the repeated experiments...)") # DEBUG print(" - [Anandtharam et al] centralized lower-bound = {:.3g},\n - [Anandkumar et al] decentralized lower-bound = {:.3g}\n - Our better (larger) decentralized lower-bound = {:.3g},".format(centralized_lowerbound, anandkumar_lowerbound, lowerbound)) # DEBUG if normalized: T = np.ones_like(X) else: X = X[X >= 1] T = np.log(X) if self.plot_lowerbounds: plot_method(X[::self.delta_t_plot], lowerbound * T[::self.delta_t_plot], 'k-', label="Besson & Kaufmann L-B = ${:.3g} \; \log(t)$".format(lowerbound), lw=3) plot_method(X[::self.delta_t_plot], anandkumar_lowerbound * T[::self.delta_t_plot], 'k--', label="Anandkumar L-B = ${:.3g} \; \log(t)$".format(anandkumar_lowerbound), lw=2) plot_method(X[::self.delta_t_plot], centralized_lowerbound * T[::self.delta_t_plot], 'k:', label="Centralized L-B = ${:.3g} \; \log(t)$".format(centralized_lowerbound), lw=2) except AssertionError: print("Error: Unable to compute and display the lower-bound...") # DEBUG # Labels and legends legend() plt.xlabel("Time steps $t = 1...T$, horizon $T = {}$, {}{}".format(self.horizon, self.strPlayers() if len(evaluators) == 1 else "", self.signature)) if self.nb_break_points > 0: plt.ylabel("{}umulative non-stationary centralized regret\n{}".format("Normalized c" if normalized else "C", r"$\sum_{s=1}^{t} \sum_{k=1}^{%d} \mu_k^*(s) - \sum_{s=1}^{t} \sum_{k=1}^{%d} \mu_k(s) \mathbb{P}_{%d}[A^j(t)=k,\overline{C}^j(t)]$" % (self.nbPlayers, self.envs[envId].nbArms, self.repetitions) if moreAccurate else r"$\mathbb{E}_{%d}[R_t]$" % self.repetitions)) else: plt.ylabel("{}umulative centralized regret {}".format("Normalized c" if normalized else "C", r"$t \sum_{k=1}^{%d} \mu_k^* - \sum_{s=1}^{t} \sum_{k=1}^{%d} \mu_k(s) \mathbb{P}_{%d}[A^j(t)=k,\overline{C}^j(t)]$" % (self.nbPlayers, self.envs[envId].nbArms, self.repetitions) if moreAccurate else r"$\mathbb{E}_{%d}[R_t]$" % self.repetitions)) plt.title("Multi-players $M = {}$ : {}umulated centralized regret, averaged ${}$ times\n${}$ arms{}: {}".format(self.nbPlayers, "Normalized c" if normalized else "C", self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def plotNbSwitchs(self, envId=0, savefig=None, semilogx=False, cumulated=False): """Plot cumulated number of switchs (to evaluate the switching costs), comparing each player.""" X = self._times - 1 fig = plt.figure() ymin = 0 colors = palette(self.nbPlayers) markers = makemarkers(self.nbPlayers) plot_method = plt.semilogx if semilogx else plt.plot for playerId, player in enumerate(self.players): label = 'Player #{:>2}: {}'.format(playerId + 1, _extract(player.__cachedstr__)) Y = self.getNbSwitchs(playerId, envId) if cumulated: Y = np.cumsum(Y) ymin = min(ymin, np.min(Y)) plot_method(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[playerId], marker=markers[playerId], markevery=(playerId / 50., 0.1), linestyle='-' if cumulated else '', lw=2) legend() plt.xlabel("Time steps $t = 1...T$, horizon $T = {}${}".format(self.horizon, self.signature)) plt.ylim(ymin, max(plt.ylim()[1], 1)) if not cumulated: add_percent_formatter("yaxis", 1.0) plt.ylabel("{} of switches by player".format("Cumulated number" if cumulated else "Frequency")) plt.title("Multi-players $M = {}$ : {}umber of switches for each player, averaged ${}$ times\n{} arm{}s: {}".format(self.nbPlayers, "Cumulated n" if cumulated else "N", self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def plotNbSwitchsCentralized(self, envId=0, savefig=None, semilogx=False, cumulated=False, evaluators=()): """Plot the centralized cumulated number of switchs (to evaluate the switching costs), support more than one environments (use evaluators to give a list of other environments).""" X = self._times - 1 fig = plt.figure() ymin = 0 evaluators = [self] + list(evaluators) # Default to only [self] colors = palette(len(evaluators)) markers = makemarkers(len(evaluators)) plot_method = plt.semilogx if semilogx else plt.plot for evaId, eva in enumerate(evaluators): label = "" if len(evaluators) == 1 else eva.strPlayers(short=True) Y = eva.getCentralizedNbSwitchs(envId) if cumulated: Y = np.cumsum(Y) ymin = min(ymin, np.min(Y)) plot_method(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[evaId], marker=markers[evaId], markevery=(evaId / 50., 0.1), linestyle='-' if cumulated else '', lw=2) if len(evaluators) > 1: legend() plt.xlabel("Time steps $t = 1...T$, horizon $T = {}$, {}{}".format(self.horizon, self.strPlayers() if len(evaluators) == 1 else "", self.signature)) if not cumulated: add_percent_formatter("yaxis", 1.0) plt.ylabel("{} of switches (changes of arms)".format("Cumulated number" if cumulated else "Frequency")) plt.title("Multi-players $M = {}$ : Total {}number of switches, averaged ${}$ times\n${}$ arms{}: {}".format(self.nbPlayers, "cumulated " if cumulated else "", self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def plotBestArmPulls(self, envId=0, savefig=None): """Plot the frequency of pulls of the best channel. - Warning: does not adapt to dynamic settings! """ X = self._times - 1 fig = plt.figure() colors = palette(self.nbPlayers) markers = makemarkers(self.nbPlayers) for playerId, player in enumerate(self.players): label = 'Player #{:>2}: {}'.format(playerId + 1, _extract(player.__cachedstr__)) Y = self.getBestArmPulls(playerId, envId) plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=label, color=colors[playerId], marker=markers[playerId], markevery=(playerId / 50., 0.1), lw=2) legend() plt.xlabel("Time steps $t = 1...T$, horizon $T = {}${}".format(self.horizon, self.signature)) add_percent_formatter("yaxis", 1.0) # FIXME fix computation in case of non stationary bandits if self.nb_break_points > 0: print("WARNING the computation of Frequency of pulls of the optimal arm is wrong for non-stationary bandits...") # DEBUG plt.ylabel("Frequency of pulls of the optimal arm") plt.title("Multi-players $M = {}$ : Best arm pulls frequency for each players, averaged ${}$ times\n{} arm{}s: {}".format(self.nbPlayers, self.cfg['repetitions'], self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def plotAllPulls(self, envId=0, savefig=None, cumulated=True, normalized=False): """Plot the frequency of use of every channels, one figure for each channel. Not so useful.""" X = self._times - 1 mainfig = savefig colors = palette(self.nbPlayers) markers = makemarkers(self.nbPlayers) figs = [] for armId in range(self.envs[envId].nbArms): figs.append(plt.figure()) for playerId, player in enumerate(self.players): Y = self.getAllPulls(playerId, armId, envId) if cumulated: Y = np.cumsum(Y) if normalized: Y /= 1 + X plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], label=player.__cachedstr__, color=colors[playerId], linestyle='', marker=markers[playerId], markevery=(playerId / 50., 0.1), lw=2) legend() plt.xlabel("Time steps $t = 1...T$, horizon $T = {}${}".format(self.horizon, self.signature)) s = ("Normalized " if normalized else "") + ("Cumulated number" if cumulated else "Frequency") plt.ylabel("{} of pulls of the arm #{}".format(s, armId + 1)) plt.title("Multi-players $M = {}$ : {} of pulls of the arm #{} for each players, averaged ${}$ times\n{} arm{}s: {}".format(self.nbPlayers, s.lower(), armId + 1, self.cfg['repetitions'], self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) maximizeWindow() if savefig is not None: savefig = mainfig.replace("allPulls", "allPulls_Arm{}".format(armId + 1)) print("Saving to", savefig, "...") # DEBUG plt.savefig(savefig, bbox_inches=BBOX_INCHES) plt.show() if self.showplot else plt.close() return figs
[docs] def plotFreeTransmissions(self, envId=0, savefig=None, cumulated=False): """Plot the frequency free transmission.""" X = self._times - 1 fig = plt.figure() colors = palette(self.nbPlayers) for playerId, player in enumerate(self.players): Y = self.getfreeTransmissions(playerId, envId) if cumulated: Y = np.cumsum(Y) plt.plot(X[::self.delta_t_plot], Y[::self.delta_t_plot], '.', label=player.__cachedstr__, color=colors[playerId], markersize=1, lw=2) # should only plot with markers legend() plt.xlabel("Time steps $t = 1...T$, horizon $T = {}${}".format(self.horizon, self.signature)) add_percent_formatter("yaxis", 1.0) plt.ylabel("{}ransmission on a free channel".format("Cumulated T" if cumulated else "T")) plt.title("Multi-players $M = {}$ : {}free transmission for each players, averaged ${}$ times\n{} arm{}s: {}".format(self.nbPlayers, "Cumulated " if cumulated else "", self.cfg['repetitions'], self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
# TODO I should plot the evolution of the occupation ratio of each channel, as a function of time # Starting from the average occupation (by primary users), as given by [1 - arm.mean], it should increase occupation[arm] when users chose it # The reason/idea is that good arms (low occupation ration) are pulled a lot, thus becoming not as available as they seemed
[docs] def plotNbCollisions(self, envId=0, savefig=None, semilogx=False, semilogy=False, loglog=False, cumulated=False, upperbound=False, evaluators=()): """Plot the frequency or cum number of collisions, support more than one environments (use evaluators to give a list of other environments).""" X = self._times - 1 fig = plt.figure() evaluators = [self] + list(evaluators) # Default to only [self] colors = palette(len(evaluators)) markers = makemarkers(len(evaluators)) plot_method = plt.loglog if loglog else plt.plot plot_method = plt.semilogy if semilogy else plot_method plot_method = plt.semilogx if semilogx else plot_method for evaId, eva in enumerate(evaluators): Y = np.zeros(eva.horizon) for armId in range(eva.envs[envId].nbArms): Y += eva.getCollisions(armId, envId) if cumulated: Y = np.cumsum(Y) Y /= eva.nbPlayers # To normalized the count? plot_method(X[::self.delta_t_plot], Y[::self.delta_t_plot], (markers[evaId] + '-') if cumulated else '.', markevery=((evaId / 50., 0.1) if cumulated else None), label=eva.strPlayers(short=True), color=colors[evaId], alpha=1. if cumulated else 0.7, lw=2) if not cumulated: add_percent_formatter("yaxis", 1.0) # We also plot our lower bound if upperbound and cumulated: upperboundLog = self.envs[envId].upperbound_collisions(self.nbPlayers, X) print("Anandkumar et al. upper bound for the non-cumulated number of collisions is {:.3g} * log(t) here ...".format(upperboundLog[-1])) # DEBUG plot_method(X, upperboundLog, 'k-', label="Anandkumar et al. upper bound", lw=3) else: print("No upper bound for the non-cumulated number of collisions...") # DEBUG # Start the figure plt.xlabel("Time steps $t = 1...T$, horizon $T = {}${}".format(self.horizon, self.signature)) plt.ylabel("{} of collisions on all arms".format("Cumulated number" if cumulated else "Frequency")) legend() plt.title("Multi-players $M = {}$ : {}of collisions, averaged ${}$ times\n{} arm{}s: {}".format(self.nbPlayers, "Cumulated number " if cumulated else "Frequency ", self.cfg['repetitions'], self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def plotFrequencyCollisions(self, envId=0, savefig=None, piechart=True, semilogy=False): """Plot the frequency of collision, in a pie chart (histogram not supported yet).""" nbArms = self.envs[envId].nbArms Y = np.zeros(1 + nbArms) # One extra arm for "no collision" labels = [''] * (1 + nbArms) # Empty labels colors = palette(1 + nbArms) # Get colors # All the other arms for armId, arm in enumerate(self.envs[envId].arms): # Y[armId] = np.sum(self.getCollisions(armId, envId) >= 1) # XXX no, we should not count just the fact that there were collisions, but instead count all collisions Y[armId] = np.sum(self.getCollisions(armId, envId)) Y /= (self.horizon * self.nbPlayers) assert 0 <= np.sum(Y) <= 1, "Error: the sum of collisions = {}, averaged by horizon and nbPlayers, cannot be outside of [0, 1] ...".format(np.sum(Y)) # DEBUG for armId, arm in enumerate(self.envs[envId].arms): labels[armId] = "#${}$: ${}$ (${:.1%}$$\%$)".format(armId, repr(arm), Y[armId]) print(" - For {},\tfrequency of collisions is {:.5g} ...".format(labels[armId], Y[armId])) # DEBUG if Y[armId] < 1e-4: # Do not display small slices labels[armId] = '' if np.isclose(np.sum(Y), 0): print("==> No collisions to plot ... Stopping now ...") # DEBUG return # Special arm: no collision Y[-1] = 1 - np.sum(Y) if np.sum(Y) < 1 else 0 labels[-1] = "No collision (${:.1%}$$\%$)".format(Y[-1]) if Y[-1] > 1e-4 else '' colors[-1] = 'lightgrey' # Start the figure fig = plt.figure() plt.xlabel("{}{}".format(self.strPlayers(), self.signature)) if piechart: plt.axis('equal') plt.pie(Y, labels=labels, colors=colors, explode=[0.07] * len(Y), startangle=45) else: if semilogy: Y = np.log10(Y) # use semilogy scale! Y -= np.min(Y) # project back to [0, oo) Y /= np.sum(Y) # project back to [0, 1) for i in range(len(Y)): plt.axvspan(i - 0.25, i + 0.25, 0, Y[i], label=labels[i], color=colors[i]) plt.xticks(np.arange(len(Y)), ["Arm #$%i$" % i for i in range(nbArms)] + ["No collision"]) plt.ylabel("Frequency of collision, in logarithmic scale" if semilogy else "Frequency of collision") if not semilogy: add_percent_formatter("yaxis", 1.0) legend() plt.title("Multi-players $M = {}$ : Frequency of collision for each arm, averaged ${}$ times\n{} arm{}s: {}".format(self.nbPlayers, self.cfg['repetitions'], self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def printRunningTimes(self, envId=0, precision=3, evaluators=()): """Print the average+-std runnning time of the different players.""" print("\nGiving the mean and std running times ...") try: from IPython.core.magics.execution import _format_time except ImportError: _format_time = str evaluators = [self] + list(evaluators) # Default to only [self] for eva in evaluators: means, vars, _ = eva.getRunningTimes(envId) mean_time, std_time = np.sum(means), np.mean(vars) print("\nFor players called '{}' ...".format(eva.strPlayers(latex=False, short=True))) if eva.repetitions <= 1: print(u" {} (mean of 1 run)".format(_format_time(mean_time, precision))) else: print(u" {} ± {} per loop (mean ± std. dev. of {} run)".format(_format_time(mean_time, precision), _format_time(std_time, precision), eva.repetitions))
[docs] def printMemoryConsumption(self, envId=0, evaluators=()): """Print the average+-std memory consumption of the different players.""" print("\nGiving the mean and std memory consumption ...") evaluators = [self] + list(evaluators) # Default to only [self] for eva in evaluators: means, vars, _ = eva.getMemoryConsumption(envId) print("\nFor players called '{}' ...".format(eva.strPlayers(latex=False, short=True))) mean_time, std_time = np.sum(means), np.mean(vars) if eva.repetitions <= 1: print(u" {} (mean of 1 run)".format(sizeof_fmt(mean_time))) else: print(u" {} ± {} (mean ± std. dev. of {} runs)".format(sizeof_fmt(mean_time), sizeof_fmt(std_time), eva.repetitions))
[docs] def plotRunningTimes(self, envId=0, savefig=None, base=1, unit="seconds", evaluators=()): """Plot the running times of the different players, as a box plot for each evaluators.""" means, all_times, labels = [], [], [] evaluators = [self] + list(evaluators) # Default to only [self] for eva in evaluators: _means, _, _all_times = eva.getRunningTimes(envId=envId) means.append(np.sum(_means)) all_times.append(np.sum(_all_times, axis=0)) labels.append(eva.strPlayers(latex=False, short=True)) # order by increasing mean time index_of_sorting = np.argsort(means) labels = [ labels[i] for i in index_of_sorting ] all_times = [ np.asarray(all_times[i]) / float(base) for i in index_of_sorting ] fig = plt.figure() violin_or_box_plot(all_times, labels=labels, boxplot=self.use_box_plot) plt.xlabel("Policies{}".format(self.signature)) ylabel = "Running times (in {}), for {} repetitions".format(unit, self.repetitions) plt.ylabel(ylabel) adjust_xticks_subplots(ylabel=ylabel, labels=labels) plt.title("Running times for different MP bandit algorithms, horizon $T={}$, averaged ${}$ times\n${}$ arms{}: {}".format(self.horizon, self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=True) return fig
[docs] def plotMemoryConsumption(self, envId=0, savefig=None, base=1024, unit="KiB", evaluators=()): """Plot the memory consumption of the different players, as a box plot for each.""" means, all_memories, labels = [], [], [] evaluators = [self] + list(evaluators) # Default to only [self] for eva in evaluators: _means, _, _all_memories = eva.getMemoryConsumption(envId=envId) means.append(np.sum(_means)) all_memories.append(np.sum(_all_memories, axis=0)) labels.append(eva.strPlayers(latex=False, short=True)) # order by increasing mean memory consumption index_of_sorting = np.argsort(means) labels = [ labels[i] for i in index_of_sorting ] all_memories = [ np.asarray(all_memories[i]) / float(base) for i in index_of_sorting ] fig = plt.figure() violin_or_box_plot(all_memories, labels=labels, boxplot=self.use_box_plot) plt.xlabel("Policies{}".format(self.signature)) ylabel = "Memory consumption (in {}), for {} repetitions".format(unit, self.repetitions) plt.ylabel(ylabel) adjust_xticks_subplots(ylabel=ylabel, labels=labels) plt.title("Memory consumption for different MP bandit algorithms, horizon $T={}$, averaged ${}$ times\n${}$ arms{}: {}".format(self.horizon, self.repetitions, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) show_and_save(self.showplot, savefig, fig=fig, pickleit=True) return fig
[docs] def printFinalRanking(self, envId=0, verb=True): """Compute and print the ranking of the different players.""" if verb: print("\nGiving the final ranks ...") assert 0 < self.averageOn < 1, "Error, the parameter averageOn of a EvaluatorMultiPlayers class has to be in (0, 1) strictly, but is = {} here ...".format(self.averageOn) # DEBUG if verb: print("\nFinal ranking for this environment #{:>2} : {} ...".format(envId, self.strPlayers(latex=False, short=True))) # DEBUG lastY = np.zeros(self.nbPlayers) for playerId, player in enumerate(self.players): Y = self.getRewards(playerId, envId) if self.finalRanksOnAverage: lastY[playerId] = np.mean(Y[-int(self.averageOn * self.horizon)]) # get average value during the last averageOn% of the iterations else: lastY[playerId] = Y[-1] # get the last value # Sort lastY and give ranking index_of_sorting = np.argsort(-lastY) # Get them by INCREASING rewards, not decreasing regrets if verb: for i, k in enumerate(index_of_sorting): player = self.players[k] print("- Player #{:>2} / {}, {}\twas ranked\t{} / {} for this simulation (last rewards = {:.5g}).".format(k + 1, self.nbPlayers, _extract(player.__cachedstr__), i + 1, self.nbPlayers, lastY[k])) # DEBUG return lastY, index_of_sorting
[docs] def printFinalRankingAll(self, envId=0, evaluators=()): """Compute and print the ranking of the different players.""" evaluators = [self] + list(evaluators) # Default to only [self] allLastY = np.zeros(len(evaluators)) for evaId, eva in enumerate(evaluators): lastY, _ = eva.printFinalRanking(envId=envId, verb=False) allLastY[evaId] = np.sum(lastY) # Sort allLastY and give ranking index_of_sorting = np.argsort(-allLastY) # Get them by INCREASING rewards, not decreasing regrets for i, k in enumerate(index_of_sorting): print("- Group of players #{:>2} / {}, {}\twas ranked\t{} / {} for this simulation (last rewards = {:.5g}).".format(k + 1, len(evaluators), evaluators[k].strPlayers(latex=False, short=True), i + 1, len(evaluators), allLastY[k])) # DEBUG return allLastY, index_of_sorting
[docs] def printLastRegrets(self, envId=0, evaluators=(), moreAccurate=None): """Print the last regrets of the different evaluators.""" print("\nGiving the vector of final regrets ...") evaluators = [self] + list(evaluators) # Default to only [self] for evaId, eva in enumerate(evaluators): print("\nFor evaluator #{:>2}/{} : {} (players {}) ...".format(1 + evaId, len(evaluators), eva, eva.strPlayers(latex=False, short=True))) last_regrets = eva.getLastRegrets(envId=envId, moreAccurate=moreAccurate) print(" Last regrets vector (for all repetitions) is:") print("Min of last regrets R_T =", np.min(last_regrets)) print("Mean of last regrets R_T =", np.mean(last_regrets)) print("Median of last regrets R_T =", np.median(last_regrets)) print("Max of last regrets R_T =", np.max(last_regrets)) print("STD var last regrets R_T =", np.std(last_regrets))
[docs] def printLastRegretsPM(self, envId=0, evaluators=(), moreAccurate=None): """Print the average+-std last regret of the different players.""" print("\nGiving the mean and std last regret ...") evaluators = [self] + list(evaluators) # Default to only [self] for eva in evaluators: last_regrets = eva.getLastRegrets(envId=envId, moreAccurate=moreAccurate) print("\nFor players called '{}' ...".format(eva.strPlayers(latex=False, short=True))) mean_regret, std_regret = np.mean(last_regrets), np.std(last_regrets) # FIXME mean_regret, std_regret = np.round(mean_regret), np.round(std_regret) if eva.repetitions <= 1: print(u" {:g} (mean of 1 run)".format(mean_regret)) else: print(u" {:g} ± {:g} (mean ± std. dev. of {} runs)".format(mean_regret, std_regret, eva.repetitions))
[docs] def plotLastRegrets(self, envId=0, normed=False, subplots=True, nbbins=15, log=False, all_on_separate_figures=False, sharex=False, sharey=False, boxplot=False, normalized_boxplot=True, savefig=None, moreAccurate=None, evaluators=()): """Plot histogram of the regrets R_T for all evaluators.""" moreAccurate = moreAccurate if moreAccurate is not None else self.moreAccurate if len(evaluators) == 0: # no need for a subplot subplots = False evaluators = [self] + list(evaluators) # Default to only [self] N = len(evaluators) colors = palette(N) if self.repetitions == 1: boxplot = True if boxplot: all_last_regrets = [] labels = [] for evaId, eva in enumerate(evaluators): last_regret = eva.getLastRegrets(envId=envId, moreAccurate=moreAccurate) if normalized_boxplot: last_regret /= np.log(self.horizon) all_last_regrets.append(last_regret) labels.append(eva.strPlayers(short=True)) means = [ np.mean(last_regrets) for last_regrets in all_last_regrets ] # order by increasing mean regret index_of_sorting = np.argsort(means) labels = [ labels[i] for i in index_of_sorting ] all_last_regrets = [ np.asarray(all_last_regrets[i]) for i in index_of_sorting ] fig = plt.figure() plt.xlabel("Bandit algorithms{}".format(self.signature)) ylabel = "{}egret value $R_T{}$,\nfor $T = {}$, for {} repetitions".format("Normalized r" if normalized_boxplot else "R", r"/\log(T)" if normalized_boxplot else "", self.horizon, self.repetitions) plt.ylabel(ylabel, fontsize="x-small") plt.title("Multi-players $M = {}$ : regrets for different bandit algorithms\n${}$ arms{}: {}".format(self.nbPlayers, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) violin_or_box_plot(data=all_last_regrets, labels=labels, boxplot=self.use_box_plot) adjust_xticks_subplots(ylabel=ylabel, labels=labels) legend() elif all_on_separate_figures: figs = [] for evaId, eva in enumerate(evaluators): fig = plt.figure() plt.title("Multi-players $M = {}$ : Histogram of regrets for {}\n${}$ arms{}: {}".format(self.nbPlayers, eva.strPlayers(short=True), self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) plt.xlabel("Regret value $R_T$ at the end of simulation, for $T = {}${}".format(self.horizon, self.signature)) plt.ylabel("{} of observations, ${}$ repetitions".format("Frequency" if normed else "Number", self.repetitions)) last_regrets = eva.getLastRegrets(envId=envId, moreAccurate=moreAccurate) n, returned_bins, patches = plt.hist(last_regrets, density=normed, color=colors[evaId], bins=nbbins) addTextForWorstCases(plt, n, returned_bins, patches, normed=normed) legend() show_and_save(self.showplot, None if savefig is None else "{}__Algo_{}_{}".format(savefig, 1 + evaId, 1 + N), fig=fig, pickleit=USE_PICKLE) figs.append(fig) return figs elif subplots: nrows, ncols = nrows_ncols(N) fig, axes = plt.subplots(nrows, ncols, sharex=sharex, sharey=sharey) # now for the figure fig.suptitle("Histogram of regrets for different multi-players bandit algorithms\n${}$ arms{}: {}".format(self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(nbPlayers=self.nbPlayers, latex=True))) # XXX See https://stackoverflow.com/a/36542971/ ax0 = fig.add_subplot(111, frame_on=False) # add a big axes, hide frame ax0.grid(False) # hide grid ax0.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) # hide tick and tick label of the big axes # Add only once the ylabel, xlabel, in the middle ax0.set_ylabel("{} of observations, ${}$ repetitions".format("Frequency" if normed else "Number", self.repetitions)) ax0.set_xlabel("Regret value $R_T$ at the end of simulation, for $T = {}${}".format(self.horizon, self.signature)) # now for the subplots for evaId, eva in enumerate(evaluators): i, j = evaId % nrows, evaId // nrows ax = axes[i, j] if ncols > 1 else axes[i] # print("evaId = {}, i = {}, j = {}, nrows = {}, ncols = {}, ax = {} ...".format(evaId, i, j, nrows, ncols, ax)) # DEBUG last_regrets = eva.getLastRegrets(envId=envId, moreAccurate=moreAccurate) n, returned_bins, patches = ax.hist(last_regrets, density=normed, color=colors[evaId], bins=nbbins, log=log) addTextForWorstCases(ax, n, returned_bins, patches, normed=normed) ax.vlines(np.mean(last_regrets), 0, min(np.max(n), self.repetitions)) # display mean regret on a vertical line ax.set_title(eva.strPlayers(short=True), fontdict={'fontsize': 'small'}) # XXX one of x-large, medium, small, None, xx-large, x-small, xx-small, smaller, larger, large ax.tick_params(axis='both', labelsize=10) # XXX https://stackoverflow.com/a/11386056/ else: fig = plt.figure() plt.title("Multi-players $M = {}$ : Histogram of regrets for different bandit algorithms\n${}$ arms{}: {}".format(self.nbPlayers, self.envs[envId].nbArms, self.envs[envId].str_sparsity(), self.envs[envId].reprarms(self.nbPlayers, latex=True))) plt.xlabel("Regret value $R_T$ at the end of simulation, for $T = {}${}".format(self.horizon, self.signature)) plt.ylabel("{} of observations, ${}$ repetitions".format("Frequency" if normed else "Number", self.repetitions)) all_last_regrets = [] labels = [] for evaId, eva in enumerate(evaluators): all_last_regrets.append(eva.getLastRegrets(envId=envId, moreAccurate=moreAccurate)) labels.append(eva.strPlayers(short=True)) ns, returned_bins, patchess = plt.hist(all_last_regrets, label=labels, density=normed, color=colors, bins=nbbins) for n, patches in zip(ns, patchess): addTextForWorstCases(plt, n, returned_bins, patches, normed=normed) legend() # Common part show_and_save(self.showplot, savefig, fig=fig, pickleit=USE_PICKLE) return fig
[docs] def plotHistoryOfMeans(self, envId=0, horizon=None, savefig=None): """ Plot the history of means, as a plot with x axis being the time, y axis the mean rewards, and K curves one for each arm.""" if horizon is None: horizon = self.horizon env = self.envs[envId] if hasattr(env, 'plotHistoryOfMeans'): fig = env.plotHistoryOfMeans(horizon=horizon, savefig=savefig, showplot=self.showplot) # FIXME https://github.com/SMPyBandits/SMPyBandits/issues/175#issuecomment-455637453 # For one trajectory, we can ask Evaluator.Evaluator to store not only the number of detections, but more! We can store the times of detections, for each arms (as a list of list). # If we have these data (for each repetitions), we can plot the detection times (for each arm) on a plot like the following return fig else: print("Warning: environment {} did not have a method plotHistoryOfMeans...".format(env)) # DEBUG
[docs] def strPlayers(self, short=False, latex=True): """Get a string of the players for this environment.""" listStrPlayers = [_extract(player.__cachedstr__) for player in self.players] if len(set(listStrPlayers)) == 1: # Unique user # if latex: # text = r'${} \times$ {}'.format(self.nbPlayers, listStrPlayers[0]) # else: # text = r'{} x {}'.format(self.nbPlayers, listStrPlayers[0]) text = listStrPlayers[0] else: text = ', '.join(listStrPlayers) text = wraptext(text) if not short: text = '{} players: {}'.format(self.nbPlayers, text) return text
[docs]def delayed_play(env, players, horizon, collisionModel, seed=None, repeatId=0, count_ranks_markov_chain=False, useJoblib=False): """Helper function for the parallelization.""" start_time = time.time() start_memory = getCurrentMemory(thread=useJoblib) # Give a unique seed to random & numpy.random for each call of this function if seed is not None: np.random.seed(seed) random.seed(seed) means = env.means if hasattr(env, "currentInterval"): env.currentInterval = 0 if env.isChangingAtEachRepetition: means = env.newRandomArms() players = deepcopy(players) nbArms = env.nbArms nbPlayers = len(players) # random_arm_orders = [np.random.permutation(nbArms) for i in range(nbPlayers)] # Start game for player in players: player.startGame() # Store results result = ResultMultiPlayers(env.nbArms, horizon, nbPlayers, means=means) rewards = np.zeros(nbPlayers) choices = np.zeros(nbPlayers, dtype=np.int32) pulls = np.zeros((nbPlayers, nbArms), dtype=np.int32) collisions = np.zeros(nbArms, dtype=np.int32) # print the ranks if possible # DEBUG all_players_have_ranks = count_ranks_markov_chain and (repeatId == 0) and all([hasattr(p, 'rank') for p in players]) # DEBUG # this will count all the transitions in the Markov chain, to count their empirical probability at the end # DEBUG if all_players_have_ranks: markov_chain_transitions = dict() # DEBUG ranks = [p.rank for p in players] binranks = tuple(np.bincount(ranks, minlength=nbPlayers + 1)[1:]) state = binranks prettyRange = tqdm(range(horizon), desc="Time t") if repeatId == 0 else range(horizon) for t in prettyRange: # Reset the array, faster than reallocating them! rewards.fill(0) pulls.fill(0) collisions.fill(0) # Every player decides which arm to pull for playerId, player in enumerate(players): # XXX here, the environment should apply ONCE a random permutation to each player, in order for the non-modified UCB-like algorithms to work fine in case of collisions (their initial exploration phase is non-random hence leading to only collisions in the first steps, and ruining the performance) # choices[i] = random_arm_orders[i][player.choice()] choices[playerId] = player.choice() # # print(" Round t = \t{}, player \t#{:>2}/{} ({}) \tchose : {} ...".format(t, playerId + 1, len(players), player, choices[playerId])) # DEBUG # Then we decide if there is collisions and what to do why them # XXX It is here that the player may receive a reward, if there is no collisions collisionModel(t, env.arms, players, choices, rewards, pulls, collisions) # Finally we store the results result.store(t, choices, rewards, pulls, collisions) if env.isDynamic and t in env.changePoints: means = env.newRandomArms(t) if repeatId == 0: print("\nNew means vector = {}, at time t = {} ...".format(means, t)) # DEBUG # XXX During the simulation, if using rhoRand or other ranks policy if all_players_have_ranks and t > 1: ranks = [p.rank for p in players] binranks = tuple(np.bincount(ranks, minlength=nbPlayers + 1)[1:]) # print(" Round t = \t{}, the list of ranks is \t{}\n and the point of view of ranks it is \t{} ...".format(t, ranks, binranks)) # DEBUG previous_state, state = state, binranks markov_chain_transitions[(previous_state, state)] = markov_chain_transitions.get((previous_state, state), 0) + 1 # print(" One more transition from {} to {} ... Currently it was seen {} times ...".format(previous_state, state, markov_chain_transitions[(previous_state, state)])) # Print the quality of estimation of arm ranking for this policy, just for 1st repetition if repeatId == 0: if all_players_have_ranks: # At the end, print the information about the markov chain states and transitions print("==> Information about the markov chain states:") # DEBUG states = {s1 for (s1, _) in markov_chain_transitions} or {s2 for (_, s2) in markov_chain_transitions} states = sorted(list(states)) # sort it, once and for all print(" The Markov chain has {:>4} = (2M-1 choose M) differents states ...".format(len(states))) # DEBUG for s in states: print(" ", s) print("==> Information about the markov chain transitions:") # DEBUG count_states = {} for (sum_count_out, s1) in sorted(zip([ sum( markov_chain_transitions.get((s11, s3), 0) for s3 in states ) for s11 in states], states)): print("\nState s1 = {} was seen {:>6} times ...".format(s1, sum_count_out)) # DEBUG count_states[tuple(sorted(s1))] = \ count_states.get(tuple(sorted(s1)), 0) + sum_count_out for (count, s2) in sorted(zip([ markov_chain_transitions.get((s1, s3), 0) for s3 in states], states)): if count > 0: print(" The transition {} --> {} was seen {:>7} times ({:.2%}) ...".format(s1, s2, count, count / float(horizon))) # DEBUG if sum_count_out > 0: print(" So the estimated proba is {:.3g} ...".format(count / sum_count_out)) # now from the set point of view print("\n\nNow with states just counting the strong partitions of M = {} ...".format(nbPlayers)) # DEBUG suniques = list({tuple(sorted(s1)) for s1 in states}) for (seen, sunique) in sorted(zip( [count_states[s] for s in suniques], suniques)): print(" The state {} was seen {:>7} times ({:.2%}) ...".format(sunique, seen, seen / float(horizon))) # DEBUG # DONE for this visualization for playerId, player in enumerate(players): try: order = player.estimatedOrder() print("\nEstimated order by the policy {} after {} steps: {} ...".format(player, horizon, order)) print(" ==> Optimal arm identification: {:.2%} (relative success)...".format(weightedDistance(order, env.means, n=nbPlayers))) # print(" ==> Manhattan distance from optimal ordering: {:.2%} (relative success)...".format(manhattan(order))) # # print(" ==> Kendell Tau distance from optimal ordering: {:.2%} (relative success)...".format(kendalltau(order))) # # print(" ==> Spearman distance from optimal ordering: {:.2%} (relative success)...".format(spearmanr(order))) # print(" ==> Gestalt distance from optimal ordering: {:.2%} (relative success)...".format(gestalt(order))) print(" ==> Mean distance from optimal ordering: {:.2%} (relative success)...".format(meanDistance(order))) except AttributeError: print("Unable to print the estimated ordering, no method estimatedOrder was found!") # Finally, store running time and consumed memory result.running_time = time.time() - start_time memory_consumption = getCurrentMemory(thread=useJoblib) - start_memory if memory_consumption == 0: # XXX https://stackoverflow.com/a/565382/ memory_consumption = sys.getsizeof(pickle.dumps(players)) # if repeatId == 0: print("Warning: unable to get the memory consumption for players {}, so we used a trick to measure {} bytes.".format(players, memory_consumption)) # DEBUG result.memory_consumption = memory_consumption return result
[docs]def _extract(text): """ Extract the str of a player, if it is a child, printed as '#[0-9]+<...>' --> ... """ try: m = search("<[^>]+>", text).group(0) if m[0] == '<' and m[-1] == '>': return m[1:-1] # Extract text between < ... > else: return text except AttributeError: return text