In [419]:
import numpy as np
import sys
if "../" not in sys.path:
  sys.path.append("../") 
from lib.envs.blackjack import BlackjackEnv

In [420]:
env = BlackjackEnv()

In [422]:
def print_observation(observation):
    score, dealer_score, usable_ace = observation
    print("Player Score: {} (Usable Ace: {}), Dealer Score: {}".format(
          score, usable_ace, dealer_score))

def strategy(observation):
    score, dealer_score, usable_ace = observation
    # Stick (action 0) if the score is > 20, hit (action 1) otherwise
    return 0 if score >= 20 else 1

for i_episode in range(20):
    observation = env.reset()
    for t in range(100):
        print_observation(observation)
        action = strategy(observation)
        print("Taking action: {}".format( ["Stick", "Hit"][action]))
        observation, reward, done, _ = env.step(action)
        if done:
            print_observation(observation)
            print("Game end. Reward: {}\n".format(float(reward)))
            break


Player Score: 17 (Usable Ace: False), Dealer Score: 10
Taking action: Hit
Player Score: 18 (Usable Ace: False), Dealer Score: 10
Taking action: Hit
Player Score: 28 (Usable Ace: False), Dealer Score: 10
Game end. Reward: -1.0

Player Score: 6 (Usable Ace: False), Dealer Score: 9
Taking action: Hit
Player Score: 16 (Usable Ace: False), Dealer Score: 9
Taking action: Hit
Player Score: 26 (Usable Ace: False), Dealer Score: 9
Game end. Reward: -1.0

Player Score: 12 (Usable Ace: False), Dealer Score: 6
Taking action: Hit
Player Score: 21 (Usable Ace: False), Dealer Score: 6
Taking action: Stick
Player Score: 21 (Usable Ace: False), Dealer Score: 6
Game end. Reward: 1.0

Player Score: 17 (Usable Ace: True), Dealer Score: 8
Taking action: Hit
Player Score: 17 (Usable Ace: False), Dealer Score: 8
Taking action: Hit
Player Score: 22 (Usable Ace: False), Dealer Score: 8
Game end. Reward: -1.0

Player Score: 17 (Usable Ace: False), Dealer Score: 8
Taking action: Hit
Player Score: 27 (Usable Ace: False), Dealer Score: 8
Game end. Reward: -1.0

Player Score: 16 (Usable Ace: False), Dealer Score: 10
Taking action: Hit
Player Score: 19 (Usable Ace: False), Dealer Score: 10
Taking action: Hit
Player Score: 28 (Usable Ace: False), Dealer Score: 10
Game end. Reward: -1.0

Player Score: 13 (Usable Ace: False), Dealer Score: 7
Taking action: Hit
Player Score: 14 (Usable Ace: False), Dealer Score: 7
Taking action: Hit
Player Score: 24 (Usable Ace: False), Dealer Score: 7
Game end. Reward: -1.0

Player Score: 17 (Usable Ace: False), Dealer Score: 5
Taking action: Hit
Player Score: 25 (Usable Ace: False), Dealer Score: 5
Game end. Reward: -1.0

Player Score: 20 (Usable Ace: False), Dealer Score: 5
Taking action: Stick
Player Score: 20 (Usable Ace: False), Dealer Score: 5
Game end. Reward: 1.0

Player Score: 12 (Usable Ace: True), Dealer Score: 10
Taking action: Hit
Player Score: 20 (Usable Ace: True), Dealer Score: 10
Taking action: Stick
Player Score: 20 (Usable Ace: True), Dealer Score: 10
Game end. Reward: 0.0

Player Score: 12 (Usable Ace: False), Dealer Score: 10
Taking action: Hit
Player Score: 19 (Usable Ace: False), Dealer Score: 10
Taking action: Hit
Player Score: 24 (Usable Ace: False), Dealer Score: 10
Game end. Reward: -1.0

Player Score: 19 (Usable Ace: False), Dealer Score: 4
Taking action: Hit
Player Score: 22 (Usable Ace: False), Dealer Score: 4
Game end. Reward: -1.0

Player Score: 16 (Usable Ace: False), Dealer Score: 10
Taking action: Hit
Player Score: 20 (Usable Ace: False), Dealer Score: 10
Taking action: Stick
Player Score: 20 (Usable Ace: False), Dealer Score: 10
Game end. Reward: 0.0

Player Score: 4 (Usable Ace: False), Dealer Score: 3
Taking action: Hit
Player Score: 14 (Usable Ace: False), Dealer Score: 3
Taking action: Hit
Player Score: 24 (Usable Ace: False), Dealer Score: 3
Game end. Reward: -1.0

Player Score: 21 (Usable Ace: True), Dealer Score: 10
Taking action: Stick
Player Score: 21 (Usable Ace: True), Dealer Score: 10
Game end. Reward: 1.0

Player Score: 16 (Usable Ace: True), Dealer Score: 10
Taking action: Hit
Player Score: 12 (Usable Ace: False), Dealer Score: 10
Taking action: Hit
Player Score: 20 (Usable Ace: False), Dealer Score: 10
Taking action: Stick
Player Score: 20 (Usable Ace: False), Dealer Score: 10
Game end. Reward: 1.0

Player Score: 9 (Usable Ace: False), Dealer Score: 10
Taking action: Hit
Player Score: 19 (Usable Ace: False), Dealer Score: 10
Taking action: Hit
Player Score: 26 (Usable Ace: False), Dealer Score: 10
Game end. Reward: -1.0

Player Score: 12 (Usable Ace: False), Dealer Score: 5
Taking action: Hit
Player Score: 15 (Usable Ace: False), Dealer Score: 5
Taking action: Hit
Player Score: 21 (Usable Ace: False), Dealer Score: 5
Taking action: Stick
Player Score: 21 (Usable Ace: False), Dealer Score: 5
Game end. Reward: 1.0

Player Score: 11 (Usable Ace: False), Dealer Score: 9
Taking action: Hit
Player Score: 13 (Usable Ace: False), Dealer Score: 9
Taking action: Hit
Player Score: 17 (Usable Ace: False), Dealer Score: 9
Taking action: Hit
Player Score: 19 (Usable Ace: False), Dealer Score: 9
Taking action: Hit
Player Score: 29 (Usable Ace: False), Dealer Score: 9
Game end. Reward: -1.0

Player Score: 14 (Usable Ace: False), Dealer Score: 7
Taking action: Hit
Player Score: 19 (Usable Ace: False), Dealer Score: 7
Taking action: Hit
Player Score: 29 (Usable Ace: False), Dealer Score: 7
Game end. Reward: -1.0