In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import tensorflow as tf
print(tf.__version__)
from keras.layers import Dense
from keras.models import Model, Sequential
from keras import initializers
In [57]:
from keras.callbacks import *
class CyclicLR(Callback):
"""This callback implements a cyclical learning rate policy (CLR).
The method cycles the learning rate between two boundaries with
some constant frequency, as detailed in this paper (https://arxiv.org/abs/1506.01186).
The amplitude of the cycle can be scaled on a per-iteration or
per-cycle basis.
This class has three built-in policies, as put forth in the paper.
"triangular":
A basic triangular cycle w/ no amplitude scaling.
"triangular2":
A basic triangular cycle that scales initial amplitude by half each cycle.
"exp_range":
A cycle that scales initial amplitude by gamma**(cycle iterations) at each
cycle iteration.
For more detail, please see paper.
# Example
```python
clr = CyclicLR(base_lr=0.001, max_lr=0.006,
step_size=2000., mode='triangular')
model.fit(X_train, Y_train, callbacks=[clr])
```
Class also supports custom scaling functions:
```python
clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.))
clr = CyclicLR(base_lr=0.001, max_lr=0.006,
step_size=2000., scale_fn=clr_fn,
scale_mode='cycle')
model.fit(X_train, Y_train, callbacks=[clr])
```
# Arguments
base_lr: initial learning rate which is the
lower boundary in the cycle.
max_lr: upper boundary in the cycle. Functionally,
it defines the cycle amplitude (max_lr - base_lr).
The lr at any cycle is the sum of base_lr
and some scaling of the amplitude; therefore
max_lr may not actually be reached depending on
scaling function.
step_size: number of training iterations per
half cycle. Authors suggest setting step_size
2-8 x training iterations in epoch.
mode: one of {triangular, triangular2, exp_range}.
Default 'triangular'.
Values correspond to policies detailed above.
If scale_fn is not None, this argument is ignored.
gamma: constant in 'exp_range' scaling function:
gamma**(cycle iterations)
scale_fn: Custom scaling policy defined by a single
argument lambda function, where
0 <= scale_fn(x) <= 1 for all x >= 0.
mode paramater is ignored
scale_mode: {'cycle', 'iterations'}.
Defines whether scale_fn is evaluated on
cycle number or cycle iterations (training
iterations since start of cycle). Default is 'cycle'.
"""
def __init__(self, base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular',
gamma=1., scale_fn=None, scale_mode='cycle'):
super(CyclicLR, self).__init__()
self.base_lr = base_lr
self.max_lr = max_lr
self.step_size = step_size
self.mode = mode
self.gamma = gamma
if scale_fn == None:
if self.mode == 'triangular':
self.scale_fn = lambda x: 1.
self.scale_mode = 'cycle'
elif self.mode == 'triangular2':
self.scale_fn = lambda x: 1/(2.**(x-1))
self.scale_mode = 'cycle'
elif self.mode == 'exp_range':
self.scale_fn = lambda x: gamma**(x)
self.scale_mode = 'iterations'
else:
self.scale_fn = scale_fn
self.scale_mode = scale_mode
self.clr_iterations = 0.
self.trn_iterations = 0.
self.history = {}
self._reset()
def _reset(self, new_base_lr=None, new_max_lr=None,
new_step_size=None):
"""Resets cycle iterations.
Optional boundary/step size adjustment.
"""
if new_base_lr != None:
self.base_lr = new_base_lr
if new_max_lr != None:
self.max_lr = new_max_lr
if new_step_size != None:
self.step_size = new_step_size
self.clr_iterations = 0.
def clr(self):
cycle = np.floor(1+self.clr_iterations/(2*self.step_size))
x = np.abs(self.clr_iterations/self.step_size - 2*cycle + 1)
if self.scale_mode == 'cycle':
return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(cycle)
else:
return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(self.clr_iterations)
def on_train_begin(self, logs={}):
logs = logs or {}
if self.clr_iterations == 0:
K.set_value(self.model.optimizer.lr, self.base_lr)
else:
K.set_value(self.model.optimizer.lr, self.clr())
def on_batch_end(self, epoch, logs=None):
logs = logs or {}
self.trn_iterations += 1
self.clr_iterations += 1
self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
self.history.setdefault('iterations', []).append(self.trn_iterations)
for k, v in logs.items():
self.history.setdefault(k, []).append(v)
K.set_value(self.model.optimizer.lr, self.clr())
In [2]:
## ---------- Create our linear dataset ---------------
## Set the mean, standard deviation, and size of the dataset, respectively
mu, sigma, size = 0, 4, 100
## Set the slope (m) and y-intercept (b), respectively
m, b = 2, 100
## Create a uniformally distributed set of X values between 0 and 10 and store in pandas dataframe
x = np.random.uniform(0,10, size)
df = pd.DataFrame({'x':x})
## Find the "perfect" y value corresponding to each x value given
df['y_perfect'] = df['x'].apply(lambda x: m*x+b)
## Create some noise and add it to each "perfect" y value to create a realistic y dataset
df['noise'] = np.random.normal(mu, sigma, size=(size,))
df['y'] = df['y_perfect']+df['noise']
## Plot our noisy dataset with a standard linear regression
## (note seaborn, the plotting library, does the linear regression by default)
ax1 = sns.regplot(x='x', y='y', data=df)
In [147]:
from keras.callbacks import *
es = EarlyStopping(min_delta=0.00001,patience=50,verbose=2)
cp = ModelCheckpoint("bst_model_wts.hdf5",save_best_only=True)
rlop = ReduceLROnPlateau(patience=15,factor=0.999,verbose=1,min_delta=10)
clr = CyclicLR(base_lr=0.0001, max_lr=0.03,
step_size=20., mode='triangular')
In [65]:
class SGDLearningRateTracker(Callback):
def on_epoch_end(self, epoch, logs={}):
optimizer = self.model.optimizer
lr = K.eval( tf.cast(optimizer.lr , tf.float32)
* (1. / (1. +
tf.cast(optimizer.decay , tf.float32)*
tf.cast(optimizer.iterations , tf.float32)
)))
print('\nLR: {:.6f}\n'.format(lr))
lr_track=SGDLearningRateTracker()
In [148]:
##---------- Create our Keras Model -----------------
## Create our model with a single dense layer, with a linear activation function and glorot (Xavier) input normalization
model = Sequential([
Dense(1, activation='linear', input_shape=(1,), kernel_initializer='glorot_uniform')
])
from keras.optimizers import SGD
sgd=SGD(0.03,decay=0.0001)
## Compile our model using the method of least squares (mse) loss function
## and a stochastic gradient descent (sgd) optimizer
model.compile(loss='mse', optimizer=sgd) ## To try our model with an Adam optimizer simple replace 'sgd' with 'Adam'
## Set our learning rate to 0.01 and print it
# model.optimizer.lr.set_value(.001)
# print (model.optimizer.lr.get_value())
## Fit our model to the noisy data we create above. Notes:
## The validation split parameter reserves 20% of our data for validation (ie 80% will be used for training)
## I don't really know if using a batch size of 1 makes sense
history = model.fit(x=df['x'], y=df['y'], validation_split=0.2
, batch_size=1, epochs=1000 # bs=100
, callbacks=[es,rlop,cp]
)
In [149]:
# list all data in history
print(history.history.keys())
import matplotlib.pyplot as plt
# # summarize history for accuracy
# plt.plot(history.history['acc'])
# plt.plot(history.history['val_acc'])
# plt.title('model accuracy')
# plt.ylabel('accuracy')
# plt.xlabel('epoch')
# plt.legend(['train', 'test'], loc='upper left')
# # axes = plt.gca()
# # axes.set_xlim([0,120])
# # axes.set_ylim([90,100])
# plt.savefig('acc.png') # save the figure to file
# plt.show()
# plt.close()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('loss.png')
plt.show()
In [151]:
model.load_weights("bst_model_wts.hdf5")
# Compile model (required to make predictions)
# model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
In [152]:
## ---------- Review our weights -------------------
## Save and print our final weights
predicted_m = model.get_weights()[0][0][0]
predicted_b = model.get_weights()[1][0]
print ("\nm=%.2f b=%.2f\n" % (predicted_m, predicted_b))
## Create our predicted y's based on the model
df['y_predicted'] = df['x'].apply(lambda x: predicted_m*x + predicted_b)
## Plot the original data with a standard linear regression
ax1 = sns.regplot(x='x', y='y', data=df, label='real')
## Plot our predicted line based on our Keras model's slope and y-intercept
ax2 = sns.regplot(x='x', y='y_predicted', data=df, scatter=False, label='predicted')
ax2.legend(loc="upper left")
Out[152]:
In [ ]: