{ "cells": [ { "cell_type": "markdown", "metadata": { "toc": "true" }, "source": [ "# Table of Contents\n", "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "# An example of a small Multi-Player simulation, with rhoRand and Selfish, for different algorithms\n", "\n", "First, be sure to be in the main folder, or to have [SMPyBandits](https://github.com/SMPyBandits/SMPyBandits) installed, and import `EvaluatorMultiPlayers` from `Environment` package:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: SMPyBandits in ./venv3/lib/python3.6/site-packages (0.9.4)\n", "Requirement already satisfied: watermark in ./venv3/lib/python3.6/site-packages (1.7.0)\n", "Requirement already satisfied: joblib in ./venv3/lib/python3.6/site-packages (from SMPyBandits) (0.13.0)\n", "Requirement already satisfied: scikit-optimize in ./venv3/lib/python3.6/site-packages (from SMPyBandits) (0.5.2)\n", "Requirement already satisfied: seaborn in ./venv3/lib/python3.6/site-packages (from SMPyBandits) (0.9.0)\n", "Requirement already satisfied: numpy in ./venv3/lib/python3.6/site-packages (from SMPyBandits) (1.15.4)\n", "Requirement already satisfied: scikit-learn in ./venv3/lib/python3.6/site-packages (from SMPyBandits) (0.20.0)\n", "Requirement already satisfied: matplotlib>=2 in ./venv3/lib/python3.6/site-packages (from SMPyBandits) (3.0.2)\n", "Requirement already satisfied: scipy>0.9 in ./venv3/lib/python3.6/site-packages (from SMPyBandits) (1.1.0)\n", "Requirement already satisfied: ipython in ./venv3/lib/python3.6/site-packages (from watermark) (7.1.1)\n", "Requirement already satisfied: pandas>=0.15.2 in ./venv3/lib/python3.6/site-packages (from seaborn->SMPyBandits) (0.23.4)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in ./venv3/lib/python3.6/site-packages (from matplotlib>=2->SMPyBandits) (1.0.1)\n", "Requirement already satisfied: python-dateutil>=2.1 in ./venv3/lib/python3.6/site-packages (from matplotlib>=2->SMPyBandits) (2.7.5)\n", "Requirement already satisfied: cycler>=0.10 in ./venv3/lib/python3.6/site-packages (from matplotlib>=2->SMPyBandits) (0.10.0)\n", "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in ./venv3/lib/python3.6/site-packages (from matplotlib>=2->SMPyBandits) (2.3.0)\n", "Requirement already satisfied: pexpect; sys_platform != \"win32\" in ./venv3/lib/python3.6/site-packages (from ipython->watermark) (4.6.0)\n", "Requirement already satisfied: pygments in ./venv3/lib/python3.6/site-packages (from ipython->watermark) (2.2.0)\n", "Requirement already satisfied: backcall in ./venv3/lib/python3.6/site-packages (from ipython->watermark) (0.1.0)\n", "Requirement already satisfied: jedi>=0.10 in ./venv3/lib/python3.6/site-packages (from ipython->watermark) (0.13.1)\n", "Requirement already satisfied: decorator in ./venv3/lib/python3.6/site-packages (from ipython->watermark) (4.3.0)\n", "Requirement already satisfied: pickleshare in ./venv3/lib/python3.6/site-packages (from ipython->watermark) (0.7.5)\n", "Requirement already satisfied: traitlets>=4.2 in ./venv3/lib/python3.6/site-packages (from ipython->watermark) (4.3.2)\n", "Requirement already satisfied: prompt-toolkit<2.1.0,>=2.0.0 in ./venv3/lib/python3.6/site-packages (from ipython->watermark) (2.0.7)\n", "Requirement already satisfied: setuptools>=18.5 in ./venv3/lib/python3.6/site-packages (from ipython->watermark) (40.6.2)\n", "Requirement already satisfied: pytz>=2011k in ./venv3/lib/python3.6/site-packages (from pandas>=0.15.2->seaborn->SMPyBandits) (2018.7)\n", "Requirement already satisfied: six>=1.5 in ./venv3/lib/python3.6/site-packages (from python-dateutil>=2.1->matplotlib>=2->SMPyBandits) (1.11.0)\n", "Requirement already satisfied: ptyprocess>=0.5 in ./venv3/lib/python3.6/site-packages (from pexpect; sys_platform != \"win32\"->ipython->watermark) (0.6.0)\n", "Requirement already satisfied: parso>=0.3.0 in ./venv3/lib/python3.6/site-packages (from jedi>=0.10->ipython->watermark) (0.3.1)\n", "Requirement already satisfied: ipython-genutils in ./venv3/lib/python3.6/site-packages (from traitlets>=4.2->ipython->watermark) (0.2.0)\n", "Requirement already satisfied: wcwidth in ./venv3/lib/python3.6/site-packages (from prompt-toolkit<2.1.0,>=2.0.0->ipython->watermark) (0.1.7)\n", "Info: Using the Jupyter notebook version of the tqdm() decorator, tqdm_notebook() ...\n", "Lilian Besson \n", "\n", "CPython 3.6.6\n", "IPython 7.1.1\n", "\n", "SMPyBandits 0.9.4\n", "\n", "compiler : GCC 8.0.1 20180414 (experimental) [trunk revision 259383\n", "system : Linux\n", "release : 4.15.0-38-generic\n", "machine : x86_64\n", "processor : x86_64\n", "CPU cores : 4\n", "interpreter: 64bit\n" ] } ], "source": [ "!pip install SMPyBandits watermark\n", "%load_ext watermark\n", "%watermark -v -m -p SMPyBandits -a \"Lilian Besson\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Local imports\n", "from SMPyBandits.Environment import EvaluatorMultiPlayers, tqdm" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We also need arms, for instance `Bernoulli`-distributed arm:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Import arms\n", "from SMPyBandits.Arms import Bernoulli" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And finally we need some single-player and multi-player Reinforcement Learning algorithms:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Import algorithms\n", "from SMPyBandits.Policies import *\n", "from SMPyBandits.PoliciesMultiPlayers import *" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "code_folding": [ 0 ] }, "outputs": [], "source": [ "# Just improving the ?? in Jupyter. Thanks to https://nbviewer.jupyter.org/gist/minrk/7715212\n", "from __future__ import print_function\n", "from IPython.core import page\n", "def myprint(s):\n", " try:\n", " print(s['text/plain'])\n", " except (KeyError, TypeError):\n", " print(s)\n", "page.page = myprint" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For instance, this imported the `Thompson` algorithm:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[0;31mInit signature:\u001b[0m \u001b[0mThompson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnbArms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mposterior\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m<\u001b[0m\u001b[0;32mclass\u001b[0m \u001b[0;34m'SMPyBandits.Policies.Posterior.Beta.Beta'\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlower\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mamplitude\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mDocstring:\u001b[0m \n", "The Thompson (Bayesian) index policy.\n", "\n", "- By default, it uses a Beta posterior (:class:`Policies.Posterior.Beta`), one by arm.\n", "- Prior is initially flat, i.e., :math:`a=\\alpha_0=1` and :math:`b=\\beta_0=1`.\n", "\n", "- A non-flat prior for each arm can be given with parameters ``a`` and ``b``, for instance::\n", "\n", " nbArms = 2\n", " prior_failures = a = 100\n", " prior_successes = b = 50\n", " policy = Thompson(nbArms, a=a, b=b)\n", " np.mean([policy.choice() for _ in range(1000)]) # 0.515 ~= 0.5: each arm has same prior!\n", "\n", "- A different prior for each arm can be given with parameters ``params_for_each_posterior``, for instance::\n", "\n", " nbArms = 2\n", " params0 = { 'a': 10, 'b': 5} # mean 1/3\n", " params1 = { 'a': 5, 'b': 10} # mean 2/3\n", " params = [params0, params1]\n", " policy = Thompson(nbArms, params_for_each_posterior=params)\n", " np.mean([policy.choice() for _ in range(1000)]) # 0.9719 ~= 1: arm 1 is better than arm 0 !\n", "\n", "- Reference: [Thompson - Biometrika, 1933].\n", "\u001b[0;31mInit docstring:\u001b[0m Create a new Bayesian policy, by creating a default posterior on each arm.\n", "\u001b[0;31mFile:\u001b[0m /tmp/SMPyBandits/notebooks/venv3/lib/python3.6/site-packages/SMPyBandits/Policies/Thompson.py\n", "\u001b[0;31mType:\u001b[0m type\n", "\n" ] } ], "source": [ "Thompson?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As well as the `rhoRand` and `Selfish` multi-player policy:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[0;31mInit signature:\u001b[0m \u001b[0mrhoRand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnbPlayers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbArms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mplayerAlgo\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmaxRank\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morthogonalRanks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlower\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mamplitude\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mDocstring:\u001b[0m \n", "rhoRand: implementation of the multi-player policy from [Distributed Algorithms for Learning..., Anandkumar et al., 2010](http://ieeexplore.ieee.org/document/5462144/).\n", " \n", "\u001b[0;31mInit docstring:\u001b[0m\n", "- nbPlayers: number of players to create (in self._players).\n", "- playerAlgo: class to use for every players.\n", "- nbArms: number of arms, given as first argument to playerAlgo.\n", "- maxRank: maximum rank allowed by the rhoRand child (default to nbPlayers, but for instance if there is 2 × rhoRand[UCB] + 2 × rhoRand[klUCB], maxRank should be 4 not 2).\n", "- `*args`, `**kwargs`: arguments, named arguments, given to playerAlgo.\n", "\n", "Example:\n", "\n", ">>> import sys; sys.path.insert(0, '..'); from Policies import *\n", ">>> import random; random.seed(0); import numpy as np; np.random.seed(0)\n", ">>> nbArms = 17\n", ">>> nbPlayers = 6\n", ">>> s = rhoRand(nbPlayers, nbArms, UCB)\n", ">>> [ child.choice() for child in s.children ]\n", "[12, 15, 0, 3, 3, 7]\n", ">>> [ child.choice() for child in s.children ]\n", "[9, 4, 6, 12, 1, 6]\n", "\n", "- To get a list of usable players, use ``s.children``.\n", "- Warning: ``s._players`` is for internal use ONLY!\n", "\u001b[0;31mFile:\u001b[0m /tmp/SMPyBandits/notebooks/venv3/lib/python3.6/site-packages/SMPyBandits/PoliciesMultiPlayers/rhoRand.py\n", "\u001b[0;31mType:\u001b[0m type\n", "\n" ] } ], "source": [ "rhoRand?" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[0;31mInit signature:\u001b[0m \u001b[0mSelfish\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnbPlayers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbArms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mplayerAlgo\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpenalty\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlower\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mamplitude\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mDocstring:\u001b[0m \n", "Selfish: a multi-player policy where every player is selfish, playing on their side.\n", "\n", "- without nowing how many players there is, and\n", "- not even knowing that they should try to avoid collisions. When a collision happens, the algorithm simply receives a 0 reward for the chosen arm (can be changed with penalty= argument).\n", "\u001b[0;31mInit docstring:\u001b[0m\n", "- nbPlayers: number of players to create (in self._players).\n", "- playerAlgo: class to use for every players.\n", "- nbArms: number of arms, given as first argument to playerAlgo.\n", "- `*args`, `**kwargs`: arguments, named arguments, given to playerAlgo.\n", "\n", "Examples:\n", "\n", ">>> import sys; sys.path.insert(0, '..'); from Policies import *\n", ">>> import random; random.seed(0); import numpy as np; np.random.seed(0)\n", ">>> nbArms = 17\n", ">>> nbPlayers = 6\n", ">>> s = Selfish(nbPlayers, nbArms, Uniform)\n", ">>> [ child.choice() for child in s.children ]\n", "[12, 13, 1, 8, 16, 15]\n", ">>> [ child.choice() for child in s.children ]\n", "[12, 9, 15, 11, 6, 16]\n", "\n", "- To get a list of usable players, use ``s.children``.\n", "- Warning: ``s._players`` is for internal use ONLY!\n", "\n", ".. warning:: I want my code to stay compatible with Python 2, so I cannot use the `new syntax of keyword-only argument