notebook.community

Edit and run



In [1]:

    
import pmlib
import numpy as np



In [2]:

    
# uncomment to list all pmlib functionalities
# dir(pmlib)



In [3]:

    
# We create a PM instance for a stochatic 3-armed Bernoulli Multi-Armed Bandit (MAB)
# the parameters are the independent expected arm rewards
bandit = pmlib.BernoulliBandit([0.75,0.5,0.25])



In [4]:

    
# show the PM game matrices and outcomes distributions
bandit.dump(plot=True, nice=True)

# note that for this example we have 3 actions (one for each arm) 
# and 2^3 outcomes (one for each possible reward vector)









    



***** 3-armed bandit *****
Actions: N=3 Outcomes: M=8

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      000
      001
      010
      011
      100
      101
      110
      111
    
  
  
    
      arm 0
      1.0
      1.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      arm 1
      1.0
      1.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
    
    
      arm 2
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      000
      001
      010
      011
      100
      101
      110
      111
    
  
  
    
      arm 0
      loss
      loss
      loss
      loss
      win
      win
      win
      win
    
    
      arm 1
      loss
      loss
      win
      win
      loss
      loss
      win
      win
    
    
      arm 2
      loss
      win
      loss
      win
      loss
      win
      loss
      win
    
  








    



Outcomes distribution (for stochastic games):
P(000)=0.09375 P(001)=0.03125 P(010)=0.09375 P(011)=0.03125 P(100)=0.28125 P(101)=0.09375 P(110)=0.28125 P(111)=0.09375



In [ ]:

    
# We can also consider bandits with strongly correlated arms
# by specifying a for instance an outcome distribution 
# where only two arms can be winning at the same time
bandit.OutcomeDist = np.array([0,0,0,1/3.,0,1/3.,1/3.,0])
bandit.dump(plot=True, nice=True)



In [5]:

    
# Other well known instances of PM are dynamic pricing and Apple tasting

dp = pmlib.DynamicPricingPM([0.1,0.1,0.7,0.1], 2.)
at = pmlib.AppleTasting([0.05,0.95])

at.dump(nice=True)









    



***** Apple tasting game *****
Actions: N=2 Outcomes: M=2

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      rotten
      good
    
  
  
    
      sell apple
      1.0
      0.0
    
    
      taste apple
      0.0
      1.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      rotten
      good
    
  
  
    
      sell apple
      blind
      blind
    
    
      taste apple
      rotten
      good
    
  








    



Outcomes distribution (for stochastic games):
P(rotten)=0.05 P(good)=0.95



In [ ]:

    
# We can also create a dueling bandit instance where the action is a couple of arms 
# and the feedback is the relative reward.
print "** Dueling bandit problem"    
dueling = pmlib.BinaryUtilityDuelingBanditPM([0.75,0.5,0.5,0.25])
dueling.dump(plot=False,nice=True)



In [6]:

    
# We provide a list of benchmark games settings:
# pmlib.benchmark_games


# the pmlib.problemClass() function computes the complexity class of any finite game in the PM hierarchy.
#
# It can be either:
# * trivial     gives \Theta(1) minmax regret
# * easy        gives \Theta(\sqrt(T)) minmax regret
# * hard        gives \Theta(T^{2/3}) minmax regret
# * or intractable with a linear minmax regret
#
# (see Bartok et al. "Partial monitoring – classification, regret bounds, and algorithms" 2013)
# 
# This function and many others are based on the python wrapper to the Parma Polyhedra Library (ppl)
# see http://bugseng.com/products/ppl/
# and https://pypi.python.org/pypi/pplpy/0.6
import ppl

# We can analyze all the games of the benchmark list:

for i in range(len(pmlib.benchmark_games)):
    print
    print
    print "*****", pmlib.benchmark_names[i], "*****"
    game = pmlib.benchmark_games[i]
    game.dump(plot=False, nice = True) # set plot=True to plot the outcome distributions
    # gives game hierarchy
    hierarchy, why = pmlib.ProblemClass(game)
    print
    print
    print "======> This game is", hierarchy.upper() + ", because", why









    




***** Easy Bandit *****
***** 3-armed bandit *****
Actions: N=3 Outcomes: M=8

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      000
      001
      010
      011
      100
      101
      110
      111
    
  
  
    
      arm 0
      1.0
      1.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      arm 1
      1.0
      1.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
    
    
      arm 2
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      000
      001
      010
      011
      100
      101
      110
      111
    
  
  
    
      arm 0
      loss
      loss
      loss
      loss
      win
      win
      win
      win
    
    
      arm 1
      loss
      loss
      win
      win
      loss
      loss
      win
      win
    
    
      arm 2
      loss
      win
      loss
      win
      loss
      win
      loss
      win
    
  








    



Outcomes distribution (for stochastic games):
P(000)=0.045 P(001)=0.005 P(010)=0.045 P(011)=0.005 P(100)=0.405 P(101)=0.045 P(110)=0.405 P(111)=0.045

======> This game is EASY, because all neighbouring pairs are observable.


***** Hard Bandit *****
***** 4-armed bandit *****
Actions: N=4 Outcomes: M=16

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      0000
      0001
      0010
      0011
      0100
      0101
      0110
      0111
      1000
      1001
      1010
      1011
      1100
      1101
      1110
      1111
    
  
  
    
      arm 0
      1.0
      1.0
      1.0
      1.0
      1.0
      1.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      arm 1
      1.0
      1.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      1.0
      1.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      arm 2
      1.0
      1.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
    
    
      arm 3
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      0000
      0001
      0010
      0011
      0100
      0101
      0110
      0111
      1000
      1001
      1010
      1011
      1100
      1101
      1110
      1111
    
  
  
    
      arm 0
      loss
      loss
      loss
      loss
      loss
      loss
      loss
      loss
      win
      win
      win
      win
      win
      win
      win
      win
    
    
      arm 1
      loss
      loss
      loss
      loss
      win
      win
      win
      win
      loss
      loss
      loss
      loss
      win
      win
      win
      win
    
    
      arm 2
      loss
      loss
      win
      win
      loss
      loss
      win
      win
      loss
      loss
      win
      win
      loss
      loss
      win
      win
    
    
      arm 3
      loss
      win
      loss
      win
      loss
      win
      loss
      win
      loss
      win
      loss
      win
      loss
      win
      loss
      win
    
  








    



Outcomes distribution (for stochastic games):
P(0000)=0.05 P(0001)=0.05 P(0010)=0.05 P(0011)=0.05 P(0100)=0.05 P(0101)=0.05 P(0110)=0.05 P(0111)=0.05 P(1000)=0.075 P(1001)=0.075 P(1010)=0.075 P(1011)=0.075 P(1100)=0.075 P(1101)=0.075 P(1110)=0.075 P(1111)=0.075

======> This game is EASY, because all neighbouring pairs are observable.


***** Four levels easy Dynamic Pricing (c=2) *****
***** 4-levels dynamic pricing *****
Actions: N=4 Outcomes: M=4

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      0$
      1$
      2$
      3$
    
  
  
    
      0$
      0.0
      1.0
      2.0
      3.0
    
    
      1$
      2.0
      0.0
      1.0
      2.0
    
    
      2$
      2.0
      2.0
      0.0
      1.0
    
    
      3$
      2.0
      2.0
      2.0
      0.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      0$
      1$
      2$
      3$
    
  
  
    
      0$
      sold
      sold
      sold
      sold
    
    
      1$
      not-sold
      sold
      sold
      sold
    
    
      2$
      not-sold
      not-sold
      sold
      sold
    
    
      3$
      not-sold
      not-sold
      not-sold
      sold
    
  








    



Outcomes distribution (for stochastic games):
P(0$)=0.1 P(1$)=0.1 P(2$)=0.7 P(3$)=0.1

======> This game is HARD, because [0$,2$] pair is not locally observable.


***** Five levels hard Dynamic Pricing (c=2) *****
***** 6-levels dynamic pricing *****
Actions: N=6 Outcomes: M=6

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      0$
      1$
      2$
      3$
      4$
      5$
    
  
  
    
      0$
      0.0
      1.0
      2.0
      3.0
      4.0
      5.0
    
    
      1$
      2.0
      0.0
      1.0
      2.0
      3.0
      4.0
    
    
      2$
      2.0
      2.0
      0.0
      1.0
      2.0
      3.0
    
    
      3$
      2.0
      2.0
      2.0
      0.0
      1.0
      2.0
    
    
      4$
      2.0
      2.0
      2.0
      2.0
      0.0
      1.0
    
    
      5$
      2.0
      2.0
      2.0
      2.0
      2.0
      0.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      0$
      1$
      2$
      3$
      4$
      5$
    
  
  
    
      0$
      sold
      sold
      sold
      sold
      sold
      sold
    
    
      1$
      not-sold
      sold
      sold
      sold
      sold
      sold
    
    
      2$
      not-sold
      not-sold
      sold
      sold
      sold
      sold
    
    
      3$
      not-sold
      not-sold
      not-sold
      sold
      sold
      sold
    
    
      4$
      not-sold
      not-sold
      not-sold
      not-sold
      sold
      sold
    
    
      5$
      not-sold
      not-sold
      not-sold
      not-sold
      not-sold
      sold
    
  








    



Outcomes distribution (for stochastic games):
P(0$)=0.3 P(1$)=0.1 P(2$)=0.1 P(3$)=0.1 P(4$)=0.1 P(5$)=0.3

======> This game is HARD, because [0$,2$] pair is not locally observable.


***** G. Bartok's thesis game *****
***** Bartok game *****
Actions: N=3 Outcomes: M=3

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      0
      1
      2
    
  
  
    
      0
      1.0
      1.0
      0.0
    
    
      1
      0.0
      1.0
      1.0
    
    
      2
      1.0
      0.0
      1.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      0
      1
      2
    
  
  
    
      0
      a
      b
      b
    
    
      1
      b
      a
      b
    
    
      2
      b
      b
      a
    
  








    



Outcomes distribution (for stochastic games):
P(0)=0.333333333333 P(1)=0.333333333333 P(2)=0.333333333333

======> This game is EASY, because all neighbouring pairs are observable.


***** Apple tasting (organic food) *****
***** Apple tasting game *****
Actions: N=2 Outcomes: M=2

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      rotten
      good
    
  
  
    
      sell apple
      1.0
      0.0
    
    
      taste apple
      0.0
      1.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      rotten
      good
    
  
  
    
      sell apple
      blind
      blind
    
    
      taste apple
      rotten
      good
    
  








    



Outcomes distribution (for stochastic games):
P(rotten)=0.05 P(good)=0.95

======> This game is EASY, because all neighbouring pairs are observable.


***** Apple tasting (supermarket) *****
***** Apple tasting game *****
Actions: N=2 Outcomes: M=2

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      rotten
      good
    
  
  
    
      sell apple
      1.0
      0.0
    
    
      taste apple
      0.0
      1.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      rotten
      good
    
  
  
    
      sell apple
      blind
      blind
    
    
      taste apple
      rotten
      good
    
  








    



Outcomes distribution (for stochastic games):
P(rotten)=0.5 P(good)=0.5

======> This game is EASY, because all neighbouring pairs are observable.


***** Horse race *****
***** Full-information (horse race) *****
Actions: N=4 Outcomes: M=4

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      0
      1
      2
      3
    
  
  
    
      bet on horse 0
      0.0
      1.0
      1.0
      1.0
    
    
      bet on horse 1
      1.0
      0.0
      1.0
      1.0
    
    
      bet on horse 2
      1.0
      1.0
      0.0
      1.0
    
    
      bet on horse 3
      1.0
      1.0
      1.0
      0.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      0
      1
      2
      3
    
  
  
    
      bet on horse 0
      0
      1
      2
      3
    
    
      bet on horse 1
      0
      1
      2
      3
    
    
      bet on horse 2
      0
      1
      2
      3
    
    
      bet on horse 3
      0
      1
      2
      3
    
  








    



Outcomes distribution (for stochastic games):
P(0)=0.1 P(1)=0.6 P(2)=0.1 P(3)=0.2

======> This game is EASY, because all neighbouring pairs are observable.


***** Intractable *****
***** Intractable *****
Actions: N=2 Outcomes: M=2

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      no
      yes
    
  
  
    
      ask
      1.0
      0.0
    
    
      not-ask
      0.0
      1.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      no
      yes
    
  
  
    
      ask
      maybe
      maybe
    
    
      not-ask
      who-knows
      who-knows
    
  








    



Outcomes distribution (for stochastic games):
P(no)=0.75 P(yes)=0.25

======> This game is INTRACTABLE, because [ask,not-ask] pair is not globally observable.


***** Label efficient prediction *****
***** Label-efficient prediction *****
Actions: N=3 Outcomes: M=2

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      ham
      spam
    
  
  
    
      ask user
      1.0
      1.0
    
    
      transfer email
      0.0
      1.0
    
    
      drop email
      2.0
      0.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      ham
      spam
    
  
  
    
      ask user
      ham
      spam
    
    
      transfer email
      blind
      blind
    
    
      drop email
      blind
      blind
    
  








    



Outcomes distribution (for stochastic games):
P(ham)=0.75 P(spam)=0.25

======> This game is HARD, because [transfer email,drop email] pair is not locally observable.


***** Easy Dueling Bandit *****
***** 3-armed utility-based dueling bandit *****
Actions: N=6 Outcomes: M=8

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      000
      001
      010
      011
      100
      101
      110
      111
    
  
  
    
      (0,0)
      1.0
      1.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      (0,1)
      1.0
      1.0
      0.5
      0.5
      0.5
      0.5
      0.0
      0.0
    
    
      (0,2)
      1.0
      0.5
      1.0
      0.5
      0.5
      0.0
      0.5
      0.0
    
    
      (1,1)
      1.0
      1.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
    
    
      (1,2)
      1.0
      0.5
      0.5
      0.0
      1.0
      0.5
      0.5
      0.0
    
    
      (2,2)
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      000
      001
      010
      011
      100
      101
      110
      111
    
  
  
    
      (0,0)
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
    
    
      (0,1)
      tie
      tie
      loss
      loss
      win
      win
      tie
      tie
    
    
      (0,2)
      tie
      loss
      tie
      loss
      win
      tie
      win
      tie
    
    
      (1,1)
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
    
    
      (1,2)
      tie
      loss
      win
      tie
      tie
      loss
      win
      tie
    
    
      (2,2)
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
    
  








    



Outcomes distribution (for stochastic games):
P(000)=0.045 P(001)=0.005 P(010)=0.045 P(011)=0.005 P(100)=0.405 P(101)=0.045 P(110)=0.405 P(111)=0.045

======> This game is EASY, because all neighbouring pairs are observable.


***** Hard Dueling Bandit *****
***** 4-armed utility-based dueling bandit *****
Actions: N=10 Outcomes: M=16

Loss Matrix (with actions as row indices and outcomes as column indices):






    







  
    
      
      0000
      0001
      0010
      0011
      0100
      0101
      0110
      0111
      1000
      1001
      1010
      1011
      1100
      1101
      1110
      1111
    
  
  
    
      (0,0)
      1.0
      1.0
      1.0
      1.0
      1.0
      1.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      (0,1)
      1.0
      1.0
      1.0
      1.0
      0.5
      0.5
      0.5
      0.5
      0.5
      0.5
      0.5
      0.5
      0.0
      0.0
      0.0
      0.0
    
    
      (0,2)
      1.0
      1.0
      0.5
      0.5
      1.0
      1.0
      0.5
      0.5
      0.5
      0.5
      0.0
      0.0
      0.5
      0.5
      0.0
      0.0
    
    
      (0,3)
      1.0
      0.5
      1.0
      0.5
      1.0
      0.5
      1.0
      0.5
      0.5
      0.0
      0.5
      0.0
      0.5
      0.0
      0.5
      0.0
    
    
      (1,1)
      1.0
      1.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      1.0
      1.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      (1,2)
      1.0
      1.0
      0.5
      0.5
      0.5
      0.5
      0.0
      0.0
      1.0
      1.0
      0.5
      0.5
      0.5
      0.5
      0.0
      0.0
    
    
      (1,3)
      1.0
      0.5
      1.0
      0.5
      0.5
      0.0
      0.5
      0.0
      1.0
      0.5
      1.0
      0.5
      0.5
      0.0
      0.5
      0.0
    
    
      (2,2)
      1.0
      1.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
    
    
      (2,3)
      1.0
      0.5
      0.5
      0.0
      1.0
      0.5
      0.5
      0.0
      1.0
      0.5
      0.5
      0.0
      1.0
      0.5
      0.5
      0.0
    
    
      (3,3)
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
    
  








    



Feedback Matrix (symbolic form):






    







  
    
      
      0000
      0001
      0010
      0011
      0100
      0101
      0110
      0111
      1000
      1001
      1010
      1011
      1100
      1101
      1110
      1111
    
  
  
    
      (0,0)
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
    
    
      (0,1)
      tie
      tie
      tie
      tie
      loss
      loss
      loss
      loss
      win
      win
      win
      win
      tie
      tie
      tie
      tie
    
    
      (0,2)
      tie
      tie
      loss
      loss
      tie
      tie
      loss
      loss
      win
      win
      tie
      tie
      win
      win
      tie
      tie
    
    
      (0,3)
      tie
      loss
      tie
      loss
      tie
      loss
      tie
      loss
      win
      tie
      win
      tie
      win
      tie
      win
      tie
    
    
      (1,1)
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
    
    
      (1,2)
      tie
      tie
      loss
      loss
      win
      win
      tie
      tie
      tie
      tie
      loss
      loss
      win
      win
      tie
      tie
    
    
      (1,3)
      tie
      loss
      tie
      loss
      win
      tie
      win
      tie
      tie
      loss
      tie
      loss
      win
      tie
      win
      tie
    
    
      (2,2)
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
    
    
      (2,3)
      tie
      loss
      win
      tie
      tie
      loss
      win
      tie
      tie
      loss
      win
      tie
      tie
      loss
      win
      tie
    
    
      (3,3)
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
      tie
    
  








    



Outcomes distribution (for stochastic games):
P(0000)=0.05 P(0001)=0.05 P(0010)=0.05 P(0011)=0.05 P(0100)=0.05 P(0101)=0.05 P(0110)=0.05 P(0111)=0.05 P(1000)=0.075 P(1001)=0.075 P(1010)=0.075 P(1011)=0.075 P(1100)=0.075 P(1101)=0.075 P(1110)=0.075 P(1111)=0.075

======> This game is EASY, because all neighbouring pairs are observable.



In [9]:

    
# The present version only include 4 variants of FeedExp3 algorithm.
# The BasicFeedexp3 class only works with some forms of numeric feedback matrices where
# there exists an NxN matrix K such that L=KF (See Piccolboni & Schindelhauer 2000).
# The GeneralFeedexp3 uses Cell decomposition to handle more general forms of feedbacks.
# The eta and gamma parameters can be optimized for known horizon.
# When these parameters are set to zero we use an anytime version whith dynamic eta and gamma parameters
# as specified in (Cesa-Bianchi et al. 2006).

from multiprocessing import cpu_count
nbCores = max(1,cpu_count() - 2)
nbReps = nbCores*10
horizon = 10000

pm_game = pmlib.AppleTasting([0.05,0.95])

import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [10.,7.]


eta, gamma = pmlib.optimal_Feedexp3_parameters(pm_game, horizon)
gfx3 = pmlib.GeneralFeedexp3(pm_game, eta , gamma)
cumRegrets1 = pmlib.eval_policy_parallel(nbCores, nbReps, horizon, pm_game, gfx3)
pmlib.init_plot("Average regret curve")
pmlib.plot_regret(cumRegrets1, mylabel= "General FeedExp3 (known horizon)", mycolor = 'green')
pmlib.show_plot()









    



kstar= 2.0 eta0= 0.347065959622 gamma0= 1.99716267569
2 x 2 Link matrix
Loss estimation error: 3.14018491737e-16
nbCores: 38 nbReps: 380 Horizon: 10000



In [10]:

    
# Here is a generic plot function

def plot_game(pm_game):

    # Feedexp3
    print "** FeedExp3"
    eta, gamma = pmlib.optimal_Feedexp3_parameters(pm_game, horizon)
    gfx3 = pmlib.GeneralFeedexp3(pm_game, eta , gamma)
    cumRegrets1 = pmlib.eval_policy_parallel(nbCores, nbReps, horizon, pm_game, gfx3)

    #Rex3
    if pm_game.game_type=="dueling":
        print "** Rex3"
        rex3 = pmlib.Rex3(pm_game, pmlib.optimal_gamma(pm_game, horizon))
        cumRegrets2 = pmlib.eval_policy_parallel(nbCores, nbReps, horizon, pm_game, rex3)

    # Random
    print "** Random"
    baseline = pmlib.BasicPolicy(pm_game)
    cumRegrets3 = pmlib.eval_policy_parallel(nbCores, nbReps, horizon, pm_game, baseline)

    pmlib.init_plot("Generic PM versus adhoc DB")
    pmlib.plot_regret(cumRegrets1, mylabel= "General FeedExp3 (known horizon)", mycolor = 'black')
    if pm_game.game_type=="dueling":
        # It is an open question whether a general PM algorithm can be as tight as an 
        # adhoc dueling bandits algorithm.
        pmlib.plot_regret(cumRegrets2, mylabel= "Rex3 (known horizon)", mycolor = 'orange')
    pmlib.plot_regret(cumRegrets3, mylabel= "Random", mycolor = 'red', autoscale = False)
    pmlib.show_plot()



In [11]:

    
# We can also plot all these games (change horizon and nbReps at will)
horizon = 2000
nbReps = nbCores*10

for i in range(len(pmlib.benchmark_games)):
    print
    print
    print "*****", pmlib.benchmark_names[i], "*****"
    game = pmlib.benchmark_games[i]
    print
    plot_game(game)









    




***** Easy Bandit *****

** FeedExp3
kstar= 1.0 eta0= 0.550932869406 gamma0= 2.51626729517
4 x 4 Link matrix
Loss estimation error: 2.21419223791e-15
nbCores: 38 nbReps: 380 Horizon: 2000
** Random
nbCores: 38 nbReps: 380 Horizon: 2000






    












    




***** Hard Bandit *****

** FeedExp3
kstar= 1.0 eta0= 0.524450522302 gamma0= 3.06880791227
5 x 5 Link matrix
Loss estimation error: 2.44375194559e-15
nbCores: 38 nbReps: 380 Horizon: 2000
** Random
nbCores: 38 nbReps: 380 Horizon: 2000






    












    




***** Four levels easy Dynamic Pricing (c=2) *****

** FeedExp3
kstar= 3.0 eta0= 0.264860898057 gamma0= 5.23404689515
4 x 4 Link matrix
Loss estimation error: 3.54750569113e-15
nbCores: 38 nbReps: 380 Horizon: 2000
** Random
nbCores: 38 nbReps: 380 Horizon: 2000






    












    




***** Five levels hard Dynamic Pricing (c=2) *****

** FeedExp3
kstar= 5.0 eta0= 0.170611123381 gamma0= 10.5020085075
6 x 6 Link matrix
Loss estimation error: 1.16737192607e-14
nbCores: 38 nbReps: 380 Horizon: 2000
** Random
nbCores: 38 nbReps: 380 Horizon: 2000






    












    




***** G. Bartok's thesis game *****

** FeedExp3
kstar= 2.0 eta0= 0.347065959622 gamma0= 3.99432535138
4 x 4 Link matrix
Loss estimation error: 9.93013661299e-16
nbCores: 38 nbReps: 380 Horizon: 2000
** Random
nbCores: 38 nbReps: 380 Horizon: 2000






    












    




***** Apple tasting (organic food) *****

** FeedExp3
kstar= 2.0 eta0= 0.347065959622 gamma0= 1.99716267569
2 x 2 Link matrix
Loss estimation error: 3.14018491737e-16
nbCores: 38 nbReps: 380 Horizon: 2000
** Random
nbCores: 38 nbReps: 380 Horizon: 2000






    












    




***** Apple tasting (supermarket) *****

** FeedExp3
kstar= 2.0 eta0= 0.347065959622 gamma0= 1.99716267569
2 x 2 Link matrix
Loss estimation error: 3.14018491737e-16
nbCores: 38 nbReps: 380 Horizon: 2000
** Random
nbCores: 38 nbReps: 380 Horizon: 2000






    












    




***** Horse race *****

** FeedExp3
kstar= 1.0 eta0= 0.475610611348 gamma0= 4.091393469
7 x 7 Link matrix
Loss estimation error: 0.0
nbCores: 38 nbReps: 380 Horizon: 2000
** Random
nbCores: 38 nbReps: 380 Horizon: 2000






    












    




***** Intractable *****

** FeedExp3
kstar= 1.0 eta0= 0.550932869406 gamma0= 1.25813364758
2 x 2 Link matrix
Loss estimation error: 1.41421356237
nbCores: 38 nbReps: 380 Horizon: 2000
** Random
nbCores: 38 nbReps: 380 Horizon: 2000






    












    




***** Label efficient prediction *****

** FeedExp3
kstar= 2.0 eta0= 0.347065959622 gamma0= 3.99432535138
4 x 4 Link matrix
Loss estimation error: 0.0
nbCores: 38 nbReps: 380 Horizon: 2000
** Random
nbCores: 38 nbReps: 380 Horizon: 2000






    












    




***** Easy Dueling Bandit *****

** FeedExp3
kstar= 2.0 eta0= 0.28649663731 gamma0= 7.25817085047
8 x 8 Link matrix
Loss estimation error: 3.30839010093e-15
nbCores: 38 nbReps: 380 Horizon: 2000
** Rex3
Number of arms: K=3
nbCores: 38 nbReps: 380 Horizon: 2000
** Random
nbCores: 38 nbReps: 380 Horizon: 2000






    












    




***** Hard Dueling Bandit *****

** FeedExp3
kstar= 2.0 eta0= 0.238399498807 gamma0= 10.7590383801
13 x 13 Link matrix
Loss estimation error: 1.48330545706e-14
nbCores: 38 nbReps: 380 Horizon: 2000
** Rex3
Number of arms: K=4
nbCores: 38 nbReps: 380 Horizon: 2000
** Random
nbCores: 38 nbReps: 380 Horizon: 2000



In [ ]:

	000	001	010	011	100	101	110
arm 0	1.0	1.0	1.0	1.0	0.0	0.0	0.0
arm 1	1.0	1.0	0.0	0.0	1.0	1.0	0.0
arm 2	1.0	0.0	1.0	0.0	1.0	0.0	1.0

	000	001	010	011	100	101	110	111
arm 0	loss	loss	loss	loss	win	win	win	win
arm 1	loss	loss	win	win	loss	loss	win	win
arm 2	loss	win	loss	win	loss	win	loss	win

	0$	1$	2$	3$
0$	sold	sold	sold	sold
1$	not-sold	sold	sold	sold
2$	not-sold	not-sold	sold	sold
3$	not-sold	not-sold	not-sold	sold

	0$	1$	2$	3$	4$	5$
0$	0.0	1.0	2.0	3.0	4.0	5.0
1$	2.0	0.0	1.0	2.0	3.0	4.0
2$	2.0	2.0	0.0	1.0	2.0	3.0
3$	2.0	2.0	2.0	0.0	1.0	2.0
4$	2.0	2.0	2.0	2.0	0.0	1.0
5$	2.0	2.0	2.0	2.0	2.0	0.0

	0	1	2	3
bet on horse 0	0.0	1.0	1.0	1.0
bet on horse 1	1.0	0.0	1.0	1.0
bet on horse 2	1.0	1.0	0.0	1.0
bet on horse 3	1.0	1.0	1.0	0.0

	000	001	010	011	100	101	110
(0,0)	1.0	1.0	1.0	1.0	0.0	0.0	0.0
(0,1)	1.0	1.0	0.5	0.5	0.5	0.5	0.0
(0,2)	1.0	0.5	1.0	0.5	0.5	0.0	0.5
(1,1)	1.0	1.0	0.0	0.0	1.0	1.0	0.0
(1,2)	1.0	0.5	0.5	0.0	1.0	0.5	0.5
(2,2)	1.0	0.0	1.0	0.0	1.0	0.0	1.0

	0$	1$	2$	3$	4$	5$
0$	0.0	1.0	2.0	3.0	4.0	5.0
1$	2.0	0.0	1.0	2.0	3.0	4.0
2$	2.0	2.0	0.0	1.0	2.0	3.0
3$	2.0	2.0	2.0	0.0	1.0	2.0
4$	2.0	2.0	2.0	2.0	0.0	1.0
5$	2.0	2.0	2.0	2.0	2.0	0.0

	0$	1$	2$	3$	4$	5$
0$	0.0	1.0	2.0	3.0	4.0	5.0
1$	2.0	0.0	1.0	2.0	3.0	4.0
2$	2.0	2.0	0.0	1.0	2.0	3.0
3$	2.0	2.0	2.0	0.0	1.0	2.0
4$	2.0	2.0	2.0	2.0	0.0	1.0
5$	2.0	2.0	2.0	2.0	2.0	0.0