In [1]:
import os
import numpy as np
import shutil, requests
import pickle
In [2]:
models_dir = './models/game'
rcl_base_dir = 'http://redcatlabs.com/downloads/deep-learning-workshop/notebooks/models/game/'
if not os.path.exists(models_dir):
os.makedirs(models_dir)
for py in ['__init__.py', 'crush.py', 'crush_ui.py']:
py_path = os.path.join(models_dir, py)
if not os.path.isfile( py_path ):
print("Downloading %s" % (py_path,))
response = requests.get(rcl_base_dir+py, stream=True)
with open(py_path, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
print("Model mechanics code available locally")
In [3]:
from models.game import crush
from models.game import crush_ui
# These are pure numpy (no fancy code - just the game mechanics/UI)
import importlib
importlib.reload(crush)
importlib.reload(crush_ui);
In [4]:
crush.new_board(10, 14, n_colours=5)
Key trick for this notebook is the ability for Jupyter to go 'round-trip' from Python back-end to Javascript in the browser, and back again. There's a block of helper javascript in the (Python) file crush-ui:
In [5]:
from IPython.display import HTML
#HTML(crush_ui.javascript_test)
HTML(crush_ui.javascript_base+'<code>Javascript Loaded</code>')
Having imported that base code, we can now create UI elements for javascript to manipulate :
In [6]:
javascript = """
<div id="board_10_14_playme"></div>
<script type="text/Javascript">create_board("#board_10_14_playme",10,14,5,'board_playme');</script>
"""
HTML(javascript)
And, now initialise a board and display it:
In [7]:
board_playme = crush.new_board(10, 14, n_colours=5)
HTML(crush_ui.display_via_javascript_script("#board_10_14_playme", board_playme))
But - because of the Python-javascript-Python roundtripping - you can now play the game (click on linked cells)!
Once you run out of moves to do, the game is over. You can restart it by refreshing the board generation cell above.
In [8]:
class board_param_set:
def __init__(self, width, height, n_colours):
self.width, self.height, self.n_colours = width, height, n_colours
In [11]:
p_small = board_param_set(5,8,4)
javascript = """
<div id="board_small_playme"></div>
<script type="text/Javascript">create_board("#board_small_playme",%d,%d,%d, 'board_small');</script>
""" % (p_small.width, p_small.height, p_small.n_colours)
HTML(javascript)
In [12]:
board_small = crush.new_board(p_small.width, p_small.height, p_small.n_colours)
HTML(crush_ui.display_via_javascript_script("#board_small_playme", board_small))
Now, there's quite a lot of machinery required to do Q() learning. So we'll take it one step at a time.
In [13]:
def make_features_in_layers(board):
feature_layers = [] # These are effectively 'colours' for the CNN
mask = np.greater( board[:, :], 0 )*1.
feature_layers.append( mask.astype('float32') )
# This works out whether each cell is the same as the cell 'above it'
for shift_down in [1,2,3,4,5,]:
sameness = np.zeros_like(board, dtype='float32')
sameness[:,:-shift_down] = np.equal( board[:, :-shift_down], board[:, shift_down:] )*1.
feature_layers.append( sameness )
# This works out whether each cell is the same as the cell in to columns 'to the left of it'
for shift_right in [1,2,3,]:
sameness = np.zeros_like(board, dtype='float32')
sameness[:-shift_right,:] = np.equal( board[:-shift_right, :], board[shift_right:, :] )*1.
feature_layers.append( sameness )
stacked = np.dstack( feature_layers )
# Return a single feature map stack ~ (input channels, input rows, input columns)
# == Theano order (need to cope with this ordering in TensorFlow, since it is 'unnatural' there)
return np.rollaxis( stacked, 2, 0 )
In [14]:
features_shape_small = make_features_in_layers(board_small).shape
print("('feature layers', width, height) : %s" % (features_shape_small, ))
In [ ]:
# Execute for Theano / Lasagne version
raise Exception('Use the TensorFlow version!')
import theano
import lasagne
def build_cnn_theano_lasagne(input_var, features_shape):
# Create a CNN of two convolution layers and
# a fully-connected hidden layer in front of the output 'action score' layer
lasagne.random.set_rng( np.random )
# input layer
network = lasagne.layers.InputLayer(
shape=(None, features_shape[0], features_shape[1], features_shape[2]),
input_var=input_var)
# Two convolutional layers (no dropout, no pooling)
network = lasagne.layers.Conv2DLayer(
network, num_filters=32, filter_size=(3,3),
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform(),
)
network = lasagne.layers.Conv2DLayer(
network, num_filters=16, filter_size=(3,3),
nonlinearity=lasagne.nonlinearities.rectify,
)
# Two fully-connected layers - leading to ONE output value : the Q(features(board))
network = lasagne.layers.DenseLayer(
network, num_units=32,
nonlinearity=lasagne.nonlinearities.rectify,
)
network = lasagne.layers.DenseLayer(
network, num_units=1,
nonlinearity=lasagne.nonlinearities.linear,
)
return network
class rl_model_theano_lasagne:
is_tensorflow=False
def __init__(self, features_shape):
board_input = theano.tensor.tensor4('inputs')
board_score = theano.tensor.vector('targets')
np.random.seed(0) # This is for the initialisation inside the CNN
cnn = build_cnn_theano_lasagne(board_input, features_shape)
self.network = cnn
# This is for running the model (training, etc)
estimate_q_value = lasagne.layers.get_output(cnn) # 'running'
self.evaluate_features = theano.function([board_input], estimate_q_value)
# This is for repeatedly testing the model (deterministic)
predict_q_value = lasagne.layers.get_output(cnn, deterministic=True)
self.evaluate_features_deterministic = theano.function([board_input], predict_q_value)
# This is used for training
mse = lasagne.objectives.squared_error( estimate_q_value.reshape( (-1,) ), board_score).mean()
params = lasagne.layers.get_all_params(cnn, trainable=True)
#updates = lasagne.updates.nesterov_momentum( mse, params, learning_rate=0.01, momentum=0.9 )
updates = lasagne.updates.adam( mse, params )
#updates = lasagne.updates.rmsprop( mse, params )
self.train = theano.function([board_input, board_score], mse, updates=updates)
def explain(self):
res, params=[], 0
for i, l in enumerate( lasagne.layers.get_all_layers(self.network) ):
params, params_before = lasagne.layers.count_params(l), params
res.append( "Layer %2d : %6d params in %s" % (i, params-params_before, type(l),) )
return res
def get_param_values(self):
return lasagne.layers.get_all_param_values(self.network)
def set_param_values(self, params):
lasagne.layers.set_all_param_values(self.network, params)
rl_model = rl_model_theano_lasagne
In [15]:
# Execute for Tensorflow / Keras version
#raise Exception('Use the Theano/Lasagne version!')
import tensorflow as tf
import tensorflow.contrib.keras as keras
from tensorflow.contrib.keras.api.keras import backend as K
from tensorflow.contrib.keras.api.keras.layers import Input, Permute, Conv2D, Flatten, Dense
from tensorflow.contrib.keras.api.keras.models import Model
def build_cnn_tf_keras(input_var): #, features_shape
# Create a CNN of two convolution layers and
# a fully-connected hidden layer in front of the output 'action score' layer
# Fix up the board_features being created in Theano ordering...
#K.set_image_data_format('channels_first')
x = Permute( (2,3,1) )(input_var) # Probably more efficient to fix up instead
# Two convolutional layers (no dropout, no pooling, getting smaller)
x = Conv2D(32, (3,3), strides=(1, 1), padding='valid', use_bias=True, activation=K.relu)(x)
x = Conv2D(16, (3,3), strides=(1, 1), padding='valid', use_bias=True, activation=K.relu)(x)
x = Flatten()(x)
# Two fully-connected layers - leading to ONE output value : the Q(features(board))
x = Dense(32, activation=K.relu)(x)
x = Dense(1)(x) # , activation=K.linear
return x
class rl_model_tf_keras:
is_tensorflow=True
def __init__(self, features_shape):
board_input = Input(shape=(features_shape[0], features_shape[1], features_shape[2]),
name="board_features")
np.random.seed(0) # This is for the initialisation inside the CNN
board_score = build_cnn_tf_keras(board_input)
# https://keras.io/getting-started/faq/
# #how-can-i-obtain-the-output-of-an-intermediate-layer
self.get_q_value = K.function([board_input, K.learning_phase()], [board_score])
# This is used for training
model = Model(inputs=board_input, outputs=board_score)
#opt = keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
#opt = keras.optimizers.Adam()
opt = keras.optimizers.RMSprop()
model.compile(opt,'mean_squared_error')
self.model = model
def evaluate_features(self, board_features_batch): # run the model (during training, etc)
# This would be helpful :: https://github.com/fchollet/keras/issues/3072
return self.get_q_value([board_features_batch, 1])[0] # training-mode=1
def evaluate_features_deterministic(self, board_features_batch):
# testing the model (deterministic)
return self.model.predict(x=board_features_batch)
def train(self, boards, targets):
hist = self.model.fit( x=np.array(boards), y=np.array(targets),
batch_size=len(boards), verbose=0)
#print(hist.history.keys())
#print(hist.history['loss'])
return hist.history['loss'][0] # want MSE as a scalar
def explain(self): return self.model.summary()
def get_param_values(self):
return [ layer.get_weights() for layer in self.model.layers ]
def set_param_values(self, params):
for i, layer in enumerate(self.model.layers):
layer.set_weights(params[i])
rl_model = rl_model_tf_keras
In [16]:
model_small = rl_model(features_shape_small)
model_small.explain()
In [17]:
b = crush.new_board(p_small.width, p_small.height, p_small.n_colours)
In [18]:
f = make_features_in_layers(b)
f_arr = np.array( [f,10*f,f] )
#print(model_small.evaluate_features_deterministic( f_arr ))
print(model_small.evaluate_features( f_arr ))
In [28]:
#model_small.model.fit( x=np.array([f]), y=np.array([5.]), batch_size=1)
model_small.train( [f], [5.] )
In [30]:
def play_game(game_id, model, board_params,
per_step_discount_factor=0.95, prob_exploration=0.1, capture_game=None):
training_data = dict( board=[], target=[] )
np.random.seed(game_id)
board = crush.new_board(board_params.width, board_params.height, board_params.n_colours)
score_total, new_cols_total, moves_total, game_step = 0,0,0,0
while True:
if capture_game:
capture_game(board, score_total)
moves = crush.potential_moves(board)
moves_total += len(moves)
if len(moves)==0:
# Need to add a training example : This is a zero-score outcome
training_data['board'].append( make_features_in_layers(board) )
training_data['target'].append( 0. )
break
# Let's find the highest-scoring of those moves: First, get all the features
next_step_features = []
next_step_target = []
for (h,v) in moves: # [0:2]
b, score, n_cols = crush.after_move(board, h,v, -1) # Added columns are unknown
next_step_features.append( make_features_in_layers(b) )
#next_step_target.append( score )
next_step_target.append( n_cols )
# Now evaluate the Q() values of the resulting postion for each possible move in one go
all_features = np.array(next_step_features) # , dtype='float32'
#print("all_features.shape", all_features.shape )
remember_training, next_mv = False, -1
if prob_exploration<0: # This is testing only - just need to pick the best move
next_step_q = model.evaluate_features_deterministic( all_features )
else:
if np.random.uniform(0.0, 1.0)<prob_exploration:
## Choose a random move, and do it
next_mv = np.random.randint( len(moves) )
else:
next_step_q = model.evaluate_features( all_features )
remember_training=True
if next_mv<0:
next_step_aggregate = ( np.array( next_step_target, dtype='float32') +
per_step_discount_factor * next_step_q.flatten() )
next_mv = np.argmax( next_step_aggregate )
(h,v) = moves[next_mv]
#print("Move : (%2d,%2d)" % (h,v))
#crush.show_board(board, highlight=(h,v))
if remember_training: # Only collect training data if not testing
training_data['board'].append( make_features_in_layers(board) )
# This value includes a Q() that looks at the 'blank cols', rather than the actuals
training_data['target'].append( next_step_aggregate[next_mv] )
board, score, new_cols = crush.after_move(board, h,v, board_params.n_colours) # Now we do the move 'for real'
score_total += score
new_cols_total += new_cols
#print("Move[%2d]=(%2d,%2d) -> Score : %3d, new_cols=%1d" % (i, h,v, score,new_cols))
#crush.show_board(board, highlight=(0,0))
game_step += 1
stats=dict(
steps=game_step, av_potential_moves=float(moves_total) / game_step,
score=score_total, new_cols=new_cols_total
)
return stats, training_data
def stats_aggregates(log, prefix, last=None):
stats_cols = "steps av_potential_moves new_cols score model_err".split()
if last:
stats_overall = np.array([ [s[c] for c in stats_cols] for s in log[-last:] ])
else:
stats_overall = np.array([ [s[c] for c in stats_cols] for s in log ])
print()
print(prefix + " #steps #moves_av new_cols score model_err")
print(" Min : ", ["%6.1f" % (v,) for v in np.min(stats_overall, axis=0).tolist()] )
print(" Max : ", ["%6.1f" % (v,) for v in np.max(stats_overall, axis=0).tolist()] )
print(" Mean : ", ["%6.1f" % (v,) for v in np.mean(stats_overall, axis=0).tolist()] )
print()
def run_test(i, model, board_params):
# Run a test set of 10 games (not in training examples)
stats_test_log=[]
for j in range(0,10):
stats_test, _ = play_game(1000*1000*1000+j, model, board_params, prob_exploration=-1.0)
stats_test['model_err'] = -999.
stats_test_log.append( stats_test )
stats_aggregates(stats_test_log, "=Test[%5d]" % (i,))
return stats_test_log
In [31]:
# Initial run, testing the score of an untrained SMALL network
_ = run_test(0, model_small, p_small)
In [32]:
model, board_params = model_small, p_small
#model, board_params = model_trained, p_trained
In [33]:
import datetime
t0,i0 = datetime.datetime.now(),0
t_start=t0
stats_log=[]
training_data=dict( board=[], target=[])
In [34]:
stats_log_test = run_test(0, model, board_params)
In [35]:
#n_games, test_every = 20*1000, 1000
n_games, test_every = 1*1000, 100
batchsize=512
for i in range(0, n_games):
# PLAY the game with seed==i
stats, training_data_new = play_game(i, model, board_params)
if False:
print("game[%d]" % (i,))
print(" steps = %d" % (stats['steps'],))
print(" average moves = %5.1f" % (stats['av_potential_moves'], ) )
print(" new_cols = %d" % (stats['new_cols'],))
print(" score_total = %d" % (stats['score'],))
training_data['board'] += training_data_new['board']
training_data['target'] += training_data_new['target']
# This keeps the window from growing too big
if len(training_data['target'])>batchsize*2:
training_data['board'] = training_data['board'][-batchsize:]
training_data['target'] = training_data['target'][-batchsize:]
for iter in range(0,1):
err = model.train( training_data['board' ][-batchsize:],
training_data['target'][-batchsize:] )
stats['model_err'] = err
stats_log.append( stats )
if ((i+1) % (test_every/5))==0:
t_now = datetime.datetime.now()
t_elapsed = (t_now - t0).total_seconds()
t_end_projected = t0 + datetime.timedelta( seconds=(n_games-i0) * (t_elapsed/(i-i0)) )
print((" Time(1000 games)~%.0fsec. "+
"Will end in ~%.0f sec (~ %s), stored_data.length=%d") %
(1000.*t_elapsed/(i-i0), ((n_games-i))*t_elapsed/(i-i0),
t_end_projected.strftime("%H:%M"), len(training_data['target']), ))
t0, i0 = datetime.datetime.now(), i
#if ((i+1) % test_every)==0:
# stats_aggregates(stats_log, "Train[%5d]" % (i,), last=1000)
if ((i+1) % test_every)==0:
stats_log_test.extend( run_test(i, model, board_params) )
stats_aggregates(stats_log, "FINAL[%5d]" % (n_games,) )
In [36]:
%matplotlib inline
import matplotlib.pyplot as plt
for c in ['new_cols', 'score']: # "steps av_potential_moves new_cols score model_err"
plt.figure(figsize=(6, 4))
for log in [ stats_log_test ]: #[stats_log, stats_log_test]:
t = np.arange(0.0, len(stats_log), len(stats_log) / len(log))
v = np.log( [ l[c]+1. for l in log ] )
plt.ylabel('log( '+c+' )', fontsize=16)
plt.plot(t, v)
plt.plot(t, np.poly1d( np.polyfit(t, v, 1) )(t))
plt.grid(b=True, which='major', color='b', axis='y', linestyle='-')
plt.show()
In [37]:
javascript = """
<div id="board_small_watch"></div>
<script type="text/Javascript">create_board("#board_small_watch",%d,%d,%d,'board_small');</script>
""" % (p_small.width, p_small.height, p_small.n_colours)
HTML(javascript)
In [39]:
seed = 10
board = crush.new_board(p_small.width, p_small.height, p_small.n_colours)
boards_for_game, scores_for_game=[],[]
def capture_game(b,s): boards_for_game.append(b);scores_for_game.append(s)
stats, _ = play_game(seed, model_small, p_small, capture_game=capture_game)
HTML( crush_ui.display_gameplay("#board_small_watch", boards_for_game, scores_for_game, 0.1) )
In [ ]:
model_save, p_save = model_small, p_small
#model_save, p_save = model_trained, p_trained
In [ ]:
if False:
param_dictionary = dict(
param_values = model_save.get_param_values(),
width=p_save.width, height=p_save.height, n_colours=p_save.n_colours,
batchsize=batchsize,
i=i, )
with open('./data/game/crush/rl_%dx%dx%d_%s.%06d.pkl' % (
p_save.width, p_save.height, p_save.n_colours,
t_start.strftime("%Y-%m-%d_%H-%M"), i,), 'wb') as f:
pickle.dump(param_dictionary, f)
The GPU speed-up factor isn't very large here (2-3x), since a lot of the training time is spent in the Python game-play and feature generation code. Moreover, the network isn't very large, so the GPU speed is dominated by PCI transfer times.
In [40]:
param_dictionary=dict(width=10, height=14, n_colours=5)
if rl_model.is_tensorflow:
print("Loading a TensorFlow/Keras Model")
#with open('./data/game/crush/rl_10x14x5_2017-05-17_16-05.019999.pkl', 'rb') as f:
#with open('./data/game/crush/rl_10x14x5_2017-05-17_18-23.039999.pkl', 'rb') as f:
#with open('./data/game/crush/rl_10x14x5_2017-05-17_18-23.229999.pkl', 'rb') as f:
#with open('./data/game/crush/rl_10x14x5_2017-05-17_18-23.389999.pkl', 'rb') as f:
#with open('./data/game/crush/rl_10x14x5_2017-05-17_18-23.989999.pkl', 'rb') as f:
with open('./data/game/crush/rl_10x14x5_2017-05-17_18-23.999999.pkl', 'rb') as f:
param_dictionary=pickle.load(f, encoding='iso-8859-1')
else:
print("Loading a Theano/Lasagne Model")
with open('./data/game/crush/rl_10x14x5_2016-06-21_03-27.049999.pkl', 'rb') as f:
param_dictionary=pickle.load(f, encoding='iso-8859-1')
width, height, n_colours = ( param_dictionary[k] for k in 'width height n_colours'.split() )
p_trained = board_param_set( width, height, n_colours )
board_trained = crush.new_board(p_trained.width,
p_trained.height,
n_colours=p_trained.n_colours)
features_shape_trained = make_features_in_layers(board_trained).shape
print("('feature layers', width, height) : %s" %(features_shape_trained, ))
model_trained = rl_model( features_shape_trained )
In [ ]:
model_trained.explain()
In [ ]:
model_trained.set_param_values( param_dictionary['param_values'] )
In [ ]:
javascript = """
<div id="board_10_14_trained"></div>
<script type="text/Javascript">
create_board("#board_10_14_trained",%d,%d,%d, 'board_trained');
</script>
""" % (p_trained.width, p_trained.height, p_trained.n_colours)
HTML(javascript)
In [ ]:
seed = 1000
board_trained = crush.new_board(width, height, n_colours=n_colours)
boards_for_game, scores_for_game=[],[]
def capture_game(b,s): boards_for_game.append(b);scores_for_game.append(s)
stats, _ = play_game(seed, model_trained, p_trained, capture_game=capture_game)
print(stats)
HTML(crush_ui.display_gameplay("#board_10_14_trained", boards_for_game, scores_for_game, 0.1) )
In [ ]: