In [1]:
FN = '160306-train'
In [2]:
seed = 1000
In [3]:
isplit = 0 # -1 = all
In [4]:
ch2 = False
if ch2:
FN += '.2ch'
In [5]:
with_fc2 = True
if not with_fc2:
FN += '.fc1'
In [6]:
with_std = True # predict logsigma and compute NLL as score
if with_std:
FN += '.std'
In [7]:
FN += '.seed%d'%seed
In [8]:
FN
Out[8]:
In [9]:
FN0 = '160306-crop'
if ch2:
FN0 += '.2ch'
FN1 = '160306-patient'
In [10]:
pre_load = False
start = 0 # calibrate start to match the number of steps already done in pre_load
If the run of this notebook did not complete (e.g., your spot price on AWS EC2 was too low) you can continue from the last stored weights on S3 by modifing pre_load and by modifing start to the next step after the step in which the weights where saved. For example
pre_load = '160306-train.std.seed1000.weights.iter-531.0.hdf5'
start = 532
In [11]:
from utils import Dataset, params, Nt, Nv, Ns, temp_dir, out_dir, awscp
Na = Nt+Nv+Ns
Nt, Nv, Ns
Out[11]:
In [12]:
awscp(pre_load)
In [13]:
C=10 # number of examples in a batch from same study, sampled from all slices of the study
CC=1 # number of slices in each example
zoom_range = 0.2 # log uniform from 1/1+... to 1+...
rotation_range = 15. # degrees
shift_range = 0.1 # .15 # part of image
shear_range = 10 # in degrees
scale_range = .3 # log uniform from 1/1+... to 1+...
label_range = 0.02 # multiply labels by log uniform from 1/1+... to 1+...
In [14]:
S=200 # image size we want to read
S0 = 128 # size of image we are going to train on
TT=1/3. # Number of time frames before/after peak. If < 0 compute gradient. If < 1 compute DFT
In [60]:
DENSE = 1024
L2 = 1e-6
LR = 3e-4
NB_EPOCH_0 = 500 # Number of epochs with LR
NB_EPOCH_1 = 200 # Number of epochs with LR/3
In [16]:
import warnings ; warnings.filterwarnings('ignore')
import re
import os
import dicom
import numpy as np
from collections import Counter
from itertools import chain, izip
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from joblib import Parallel, delayed
import itertools
import random
In [17]:
awscp(FN1+'.pkl')
patient = pd.read_pickle(os.path.join(temp_dir,FN1+'.pkl'))
In [18]:
Ntraining = len(patient[['Systole','Diastole']].dropna())
assert Ntraining == Nt or Ntraining == Nt+Nv
Ntraining
Out[18]:
In [19]:
Y = patient[['Systole','Diastole']].values.astype(float)
Y = Y[:Ntraining]
Y.shape
Out[19]:
In [20]:
if TT < 1.:
W = int(1./TT)
T = W - 1
WW = np.ones((30,2*W-1))
for w in range(W):
if w == 0:
WW[:,w] = np.ones(30)/30.
else:
# maximal amp is actually 30/2 and not sqrt(30/2)
WW[:,2*w-1] = np.cos(np.linspace(0.,2*np.pi*w,31)[:30])/np.sqrt(30/2.)
WW[:,2*w] = np.sin(np.linspace(0.,2*np.pi*w,31)[:30])/np.sqrt(30/2.)
else:
T = TT
WW = None
WW.shape,T
Out[20]:
In [21]:
import cPickle as pickle
fn = FN0 + '.xresults.pkl'
awscp(fn)
with open(os.path.join(temp_dir,fn),'rb') as fp:
Xresults = pickle.load(fp)
len(Xresults)
Out[21]:
In [22]:
def crop(img, S=S, S0=S0):
s = (S-S0)//2
if len(img.shape) == 2:
return img[s:s+S0,s:s+S0]
else:
return img[:,s:s+S0,s:s+S0]
In [23]:
from scipy import ndimage
import math
def transform(x,angle=0,shear=0,zoom=1,out_shape=None,shift_x=0,shift_y=0):
fill_mode="nearest"
cval=0.
shape = x.shape
shape2 = shape[-2:]
if out_shape is None:
out_shape = shape2
x = x.reshape((-1,)+shape2)
xout = np.empty(x.shape)
# shear matrix saves area [[1,s1],[0,1]]
shear = np.pi / 180 * shear
s1 = -math.sin(shear)
#rotation matrix [[m11, m12], [m21, m22]]
angle = np.pi / 180 * angle
m11 = math.cos(angle)
m12 = math.sin(angle)
m21 = -math.sin(angle)
m22 = math.cos(angle)
# zoom matriz is [[z1,0],[0,z1]]
z1 = 1./zoom
# multiple zoom*rotation*sear
matrix = np.array([[m11, m12+s1*m11], [m21, m22+s1*m21]], dtype=np.float64)
matrix *= z1
offset = np.zeros((2,), dtype=np.float64)
offset[0] = float(200) / 2.0 - 0.5
offset[1] = float(200) / 2.0 - 0.5
offset = np.dot(matrix, offset)
tmp = np.zeros((2,), dtype=np.float64)
tmp[0] = float(200) / 2.0 - 0.5
tmp[1] = float(200) / 2.0 - 0.5
offset = tmp - offset
order=3
prefilter=True
for ia,oa in itertools.izip(x,xout):
ndimage.affine_transform(ia, matrix, offset, shape2, oa, order, fill_mode,
cval, prefilter=prefilter)
s0 = (shape2[0] - out_shape[0])//2 + shift_x
s1 = (shape2[1] - out_shape[1])//2 + shift_y
xout = xout[:, s0:s0+out_shape[0], s1:s1+out_shape[1]]
return xout.reshape(shape[:-2]+out_shape)
In [24]:
def color_show(x):
x = x.astype(float)
intensity = x[0,:,:]
intensity = (intensity - intensity.min())/(intensity.max() - intensity.min())
saturation = 0.1
colors = np.array([[1,0.,0.],[0.,0.5,0.5]])
xx = saturation*np.ones((x.shape[1],x.shape[2],3))
for w in range(1,min(W,4)):
hue = np.sqrt(x[2*w-1,:,:]**2 + x[2*w,:,:]**2)
hue = (hue - hue.min())/(hue.max()-hue.min())
for c in range(3):
xx[:,:,c] += (1-saturation)*colors[w-1][c]*hue
xx *= intensity[:,:,None]
xx = np.log(xx+1/1000.)
xx = (xx - xx.min())/(xx.max() - xx.min())
plt.imshow(xx)
return xx
In [25]:
from keras.preprocessing.image import random_shift, random_rotation
study=268
(xs, _), ys, angle = Xresults[study]
x0 = xs[:,:,:,:].copy()
sft=int(S*shift_range)
zm = (1+zoom_range)
x0 = transform(x0, angle=rotation_range, shear=shear_range,zoom=zm,out_shape=(S0,S0),shift_x=sft, shift_y=sft)
print x0.shape
# x0 = crop(x0)
color_show(x0[3])
x0.shape
Out[25]:
In [26]:
if ch2:
(_, xs), ys, angle = Xresults[12]
x0 = xs[:,:,:,:].copy()
sft=int(S*shift_range)
# sft=0
zm = (1+zoom_range)/2.
x0 = transform(x0, angle=rotation_range, shear=shear_range,zoom=zm,out_shape=(S0,S0),shift_x=sft, shift_y=sft)
print x0.shape
# x0 = crop(x0)
color_show(x0[0])
print x0.shape
In [27]:
ymean, ystd = Y.mean(axis=0), Y.std(axis=0)
ymean, ystd
Out[27]:
In [28]:
ymean, ystd = (np.array([ 71.96 , 165.8668]), np.array([ 43.24805568, 59.27725076]))
In [29]:
T, WW is not None
Out[29]:
In [30]:
from keras.preprocessing.image import ImageDataGenerator
class GraphImageDataGenerator2(ImageDataGenerator):
def __init__(self,label_range=0.,zoom_range=0.,scale_range=0.,random_slice=False,random_time=False,with_std=False,**kwargs):
self.label_range = label_range
self.zoom_range = zoom_range
self.scale_range = scale_range
self.random_slice = random_slice
self.with_std=with_std
self.random_time = random_time
super(GraphImageDataGenerator2, self).__init__(**kwargs)
def flow(self, X, C=C, CC=CC, **kwargs):
"""
C=0 # generate all slices from a single example in a batch
C=8 # number of examples in a batch from same study (use power of 2)
CC=1 # number of slices in each example
"""
self.C = C
self.CC = CC
if C:
batch_size = kwargs['batch_size']
assert batch_size % C == 0
kwargs['batch_size'] = batch_size // C
else:
assert kwargs['batch_size'] == 1
return super(GraphImageDataGenerator2,self).flow(X, X, **kwargs)
def next(self):
# Keep under lock only the mechainsem which advance the indexing of each batch
# see # http://anandology.com/blog/using-iterators-and-generators/
with self.lock:
index_array, current_index, current_batch_size = next(self.flow_generator)
bX = []
if ch2:
bX_2ch = []
bY = []
bYscale = []
C = self.C
CC = self.CC
assert C > 0 or current_batch_size == 1
for idx in index_array:
study = self.X[idx]
(x, x_2ch), yscale, angle = Xresults[study]
nx = len(x)-CC+1 # number of slices we can use
Cx = nx if C == 0 else C # number of slices we want
if nx == Cx or not self.random_slice:
slices = np.linspace(0,nx-1,Cx).astype(int)
elif nx > Cx:
# we have more slices than we want, take without repeation
slices = np.sort(random.sample(xrange(nx),Cx))
else:
# We have less than we want.
# first make sure we take all we have
slices = range(nx)
# add more slices without repeation
slices += random.sample(xrange(nx),min(Cx-nx,nx))
if len(slices) < Cx:
# if we need more then add with repeation
slices += list(np.random.choice(nx,Cx-len(slices)))
slices = np.sort(slices)
# make sure the first/last slice are always selected
slices[0] = 0
slices[-1] = nx-1
for j,slc in enumerate(slices):
# process one example
slc_range = range(slc,slc+CC)
if T > 0:
if WW is not None:
time_range = range(2*T+1)
elif self.random_time:
time_range = [np.random.randint(2*T+1), 2*T+1+np.random.randint(2*T+1)]
else:
time_range = [T,3*T+1]
else:
time_range = range(x.shape[1])
x0 = np.array([x[s,t]/256. for s in slc_range for t in time_range])
if ch2:
x0_2ch = np.array([x_2ch[0,t]/256. for t in time_range])
angle0 = angle + np.random.uniform(-self.rotation_range, self.rotation_range)
shear = np.random.uniform(-self.shear_range, self.shear_range)
shift_x = int(S*np.random.uniform(-self.self.width_shift_range, self.self.width_shift_range))
shift_y = int(S*np.random.uniform(-self.self.height_shift_range, self.self.height_shift_range))
zoom = np.exp(np.random.uniform(np.log(1./(1.+self.zoom_range)), np.log(1.+self.zoom_range)))
x0 = transform(x0, angle=angle0, shear=shear, zoom=zoom,
out_shape=(S0,S0),
shift_x=shift_x, shift_y=shift_y)
if ch2:
# use different augmentation for 2ch than for sax except for zoom because we want them to have same scale
# ignore the 2D sax angle
angle0 = np.random.uniform(-self.rotation_range, self.rotation_range)
shear = np.random.uniform(-self.shear_range, self.shear_range)
shift_x = int(S*np.random.uniform(-self.self.width_shift_range, self.self.width_shift_range))
shift_y = int(S*np.random.uniform(-self.self.height_shift_range, self.self.height_shift_range))
# keep the same propotyion of zoom as sax
if np.random.randint(2):
for i in range(x0_2ch.shape[0]):
x0_2ch[i] = np.fliplr(x0_2ch[i])
x0_2ch = transform(x0_2ch, angle=0, shear=shear, zoom=zoom/2.,
out_shape=(S0,S0),
shift_x=shift_x, shift_y=shift_y)
if self.scale_range:
s = np.exp(np.random.uniform(np.log(1./(1.+self.scale_range)), np.log(1.+self.scale_range)))
x0 *= s
if ch2:
s = np.exp(np.random.uniform(np.log(1./(1.+self.scale_range)), np.log(1.+self.scale_range)))
x0_2ch *= s
ys = yscale / (zoom*zoom)
bX.append(x0)
if ch2:
bX_2ch.append(x0_2ch)
if study < Ntraining:
y0 = Y[study, :]
y0 = y0/ystd # dont use ymean because we cant make predictions using the same CNN for different yscale
if self.label_range:
r = np.exp(np.random.uniform(np.log(1./(1.+self.label_range)), np.log(1.+self.label_range)))
y0 *= r
else:
y0 = np.zeros(2)
# distances in slices between slice of current example and the next example
s = slices[j+1]-slc if j < Cx-1 else len(x)-CC-slc
if self.with_std:
# dummy space for the logstd prediction
y0 = np.concatenate((y0,np.ones(len(y0)),np.array([ys,s])))
else:
y0 = np.concatenate((y0,np.array([ys,s])))
bY.append(y0)
bYscale.append([ys,s])
bX = np.array(bX)
if ch2:
bX_2ch = np.array(bX_2ch)
bY = np.array(bY)
bYscale = np.array(bYscale)
if ch2:
return {'input':bX, 'input_2ch':bX_2ch, 'output':bY, 'yscale':bYscale}
else:
return {'input':bX, 'output':bY, 'yscale':bYscale}
traingen = GraphImageDataGenerator2(with_std=with_std,
featurewise_center=False, featurewise_std_normalization=False,
rotation_range=rotation_range, label_range=label_range,
width_shift_range=shift_range, height_shift_range=shift_range,
zoom_range=zoom_range, scale_range=scale_range, shear_range=shear_range,
random_slice=True,random_time=True)
valgen = GraphImageDataGenerator2(with_std=with_std,
featurewise_center=False, featurewise_std_normalization=False)
In [31]:
f = traingen.flow(np.arange(Ntraining), batch_size=8*C, shuffle=False)
In [32]:
data = next(f) # get a single batch
In [33]:
data.keys()
Out[33]:
In [34]:
data['output'][0]
Out[34]:
In [35]:
data['yscale'][0]
Out[35]:
In [36]:
print data['input'].shape
if ch2:
print data['input_2ch'].shape
In [37]:
nC = len(data['input'])
plt.figure(figsize=(15,12))
plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)
for c in range(nC):
plt.subplot(nC//C,C,c+1)
color_show(data['input'][c])
plt.axis('off')
In [38]:
if ch2:
nC = len(data['input_2ch'])
plt.figure(figsize=(15,12))
plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)
for c in range(nC):
plt.subplot(nC//C,C,c+1)
color_show(data['input_2ch'][c])
plt.axis('off')
In [39]:
f = valgen.flow(np.arange(Ntraining), batch_size=8*C, shuffle=False)
data = next(f) # get a single batch
nC = len(data['input'])
plt.figure(figsize=(15,12))
plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)
for c in range(nC):
plt.subplot(nC//C,C,c+1)
color_show(data['input'][c])
plt.axis('off')
In [40]:
input_shape = data['input'].shape[1:]
print input_shape
if ch2:
input_2ch_shape = data['input_2ch'].shape[1:]
print input_2ch_shape
In [41]:
output_dim = data['output'].shape[1]
output_dim
Out[41]:
In [42]:
yscale_dim = data['yscale'].shape[1]
yscale_dim
Out[42]:
In [43]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten, Merge
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import ELU, LeakyReLU
from keras.regularizers import l2
def gen_model(input_shape):
model = Sequential()
# model.add(Activation(activation=center_normalize, input_shape=input_shape))
model.add(Convolution2D(64, 1, 1, border_mode='same', input_shape=input_shape))
# model.add(BatchNormalization())
# model.add(ELU())
model.add(Activation('relu')) # Activation('relu')
model.add(Convolution2D(64, 3, 3, border_mode='valid',subsample=(2,2)))
# model.add(ELU()) # Activation('relu')
model.add(BatchNormalization())
model.add(Activation('relu')) # Activation('relu')
# model.add(ZeroPadding2D(padding=(1, 1)))
# model.add(AveragePooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.1))
model.add(Convolution2D(96, 3, 3, border_mode='same'))
# model.add(BatchNormalization())
# model.add(ELU())
model.add(Activation('relu')) # Activation('relu')
model.add(Convolution2D(96, 3, 3, border_mode='valid',subsample=(2,2)))
model.add(BatchNormalization())
model.add(Activation('relu')) # Activation('relu')
# model.add(ELU())
# model.add(ZeroPadding2D(padding=(1, 1)))
# model.add(AveragePooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))
model.add(Convolution2D(128, 2, 2, border_mode='same'))
# model.add(BatchNormalization())
model.add(Activation('relu')) # Activation('relu')
# model.add(ELU())
model.add(Convolution2D(128, 2, 2, border_mode='valid',subsample=(2,2)))
model.add(BatchNormalization())
model.add(Activation('relu')) # Activation('relu')
# model.add(ELU())
# model.add(AveragePooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.3))
model.add(Convolution2D(128, 2, 2, border_mode='same'))
# model.add(BatchNormalization())
model.add(Activation('relu')) # Activation('relu')
# model.add(ELU())
model.add(Convolution2D(128, 2, 2, border_mode='valid',subsample=(2,2)))
model.add(BatchNormalization())
model.add(Activation('relu')) # Activation('relu')
# model.add(ELU())
# model.add(AveragePooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.4))
model.add(Flatten())
return model
In [44]:
# seed the weight initializations and dropout internal seed
random.seed(seed)
np.random.seed(seed)
from keras.models import Graph
graph = Graph()
graph.add_input(name='input', input_shape=input_shape)
graph.add_node(gen_model(input_shape), name='model', input='input')
if ch2:
graph.add_input(name='input_2ch', input_shape=input_2ch_shape)
graph.add_node(gen_model(input_2ch_shape), name='model_2ch', input='input_2ch')
if ch2:
graph.add_node(Dense(DENSE, W_regularizer=l2(L2)), name='fc1', inputs=['model','model_2ch'])
else:
graph.add_node(Dense(DENSE, W_regularizer=l2(L2)), name='fc1', input='model')
graph.add_node(BatchNormalization(),name='fc1_bn',input='fc1')
graph.add_node(Activation('relu'),name='fc1_act',input='fc1_bn')
graph.add_node(Dropout(0.5), name='fc1_dropout',input='fc1_act')
if with_fc2:
graph.add_node(Dense(DENSE, W_regularizer=l2(L2)), name='fc2', input='fc1_dropout')
graph.add_node(BatchNormalization(),name='fc2_bn',input='fc2')
graph.add_node(Activation('relu'),name='fc2_act',input='fc2_bn')
graph.add_node(Dropout(0.5), name='fc2_dropout',input='fc2_act')
last_fc_layer = 'fc2_dropout'
else:
last_fc_layer = 'fc1_dropout'
# predict sys and dia together
graph.add_node(Dense(output_dim-yscale_dim),name='volume', input=last_fc_layer)
graph.add_input(name='yscale', input_shape=(yscale_dim,))
graph.add_node(Activation('linear'), inputs=['volume','yscale'], name='output', merge_mode='concat', create_output=True)
In [45]:
from keras.objectives import mse
import keras.backend as K
# Square of minimal error dividend by std normalization (~50ml)
# Square of minimal error dividend by std normalization (~50ml)
min_var=0.001 # ~1.6ml
CLIP=0.5 # dont allow negative values
def nll(y,mean,var):
return K.mean(K.log(var) + K.square(y - mean + K.minimum(mean,0))/var + CLIP*K.square(K.minimum(mean,0)), axis=-1)
def np_nll(y,mean,var):
return np.mean(np.log(var) + np.square(y - mean)/var + CLIP*np.square(np.minimum(mean,0)), axis=-1)
def nll_study(y_true, y_pred,C=C, output_dim=output_dim, yscale_dim=yscale_dim):
"""
y - [sys, dia, logstd-sys, logstd-dia, yscale, yslice]
"""
assert yscale_dim == 2 and output_dim == 6
# collapse the true values to one value per example.
y_true1 = K.mean(K.reshape(y_true, (y_true.shape[0]/C, C, output_dim)), axis=1)
y_pred = K.reshape(y_pred, (y_pred.shape[0]/C, C, output_dim))
yscale = y_pred[:,:,output_dim-2].dimshuffle([0,1,'x'])
yslice = y_pred[:,:-1,output_dim-1].dimshuffle([0,1,'x'])
# yslice_mask = K.clip(yslice,0,1)
# the sys and dia of every slice have different yscale
y_mean = y_pred[:,:,:2] * yscale
# average area between each pair of slices
y_mean = (y_mean[:,:-1,:] + y_mean[:,1:,:])/2. # + K.sqrt(y_pred[:,:-1,:2]*y_pred[:,1:,:2]))/3.
# multiply by the height of each pair of slices
y_mean = y_mean * yslice
# volume along slices
y_mean = K.sum(y_mean, axis=1) # /K.sum(yslice, axis=1)
# convert form logsigma to sigma and scale
y_sigma = K.exp(y_pred[:,:,2:4]) * yscale
# convert to variance
y_var = K.square(y_sigma)
# variance of pair average
y_var = (y_var[:,:-1,:] + y_var[:,1:,:])/2. # + K.sqrt(y_pred[:,:-1,:2]*y_pred[:,1:,:2]))/3.
# correct for height of pair
# Below should be yslice*yslice but we are simulating a missing slice which is uncorelated
# emprical comparisong between using large C and small C shows that using just yslice is better
y_var = y_var * yslice
# variance along slices
y_var = K.sum(y_var, axis=1) # /K.sum(yslice_mask, axis=1)
y_var += min_var
# compute negative log liklihood (NLL) of true values given mean and variance
loss0 = nll(y_true1[:,0], y_mean[:,0], y_var[:,0])
loss1 = nll(y_true1[:,1], y_mean[:,1], y_var[:,1])
return (loss0 + loss1)/2.
def np_study(y_pred,C=C, output_dim=output_dim, yscale_dim=yscale_dim):
# compute mean and std in the same way as nll_study with numpy instead of Theano
assert yscale_dim == 2 and output_dim == 6
y_pred = np.reshape(y_pred, (y_pred.shape[0]/C, C, output_dim))
yscale = y_pred[:,:,output_dim-2][:,:,None]
yslice = y_pred[:,:-1,output_dim-1][:,:,None]
# yslice_mask = np.clip(yslice,0,1)
y_mean = y_pred[:,:,:2] * yscale
y_mean = (y_mean[:,:-1,:] + y_mean[:,1:,:])/2. # + K.sqrt(y_pred[:,:-1,:2]*y_pred[:,1:,:2]))/3.
y_mean = y_mean * yslice
y_mean = np.sum(y_mean, axis=1) # /np.sum(yslice, axis=1)
y_sigma = np.exp(y_pred[:,:,2:4]) * yscale
y_var = np.square(y_sigma)
y_var = (y_var[:,:-1,:] + y_var[:,1:,:])/2. # + K.sqrt(y_pred[:,:-1,:2]*y_pred[:,1:,:2]))/3.
y_var = y_var * yslice # * yslice
y_var = np.sum(y_var, axis=1) # /np.sum(yslice_mask, axis=1)
y_var += min_var
return np.hstack((y_mean, np.sqrt(y_var)))
def mseC1(y_true, y_pred,C=C, output_dim=output_dim):
y_true1 = K.mean(K.reshape(y_true, (y_true.shape[0]/C, C, output_dim)), axis=1)
y_pred_mean = y_pred[:,:2]*y_pred[:,output_dim-1].dimshuffle([0,'x'])
y_pred_mean = K.mean(K.reshape(y_pred_mean, (y_pred.shape[0]/C, C, 2)), axis=1)
loss0 = mse(y_true1[:,0], y_pred_mean[:,0])
loss1 = mse(y_true1[:,1], y_pred_mean[:,1])
return (loss0 + loss1)/2.
def mean_squared_percentage_error(y_true, y_pred):
var = 0.2*K.square(y_true)
mean = y_pred
y = y_true
return K.mean(K.square(y - mean)/var + CLIP*K.square(K.minimum(mean,0)), axis=-1)
def mseC(y_true, y_pred, C=C, output_dim=output_dim, yscale_dim=yscale_dim):
assert yscale_dim == 2 and output_dim == 4
y_true1 = K.mean(K.reshape(y_true, (y_true.shape[0]/C, C, output_dim)), axis=1)
y_pred = K.reshape(y_pred, (y_pred.shape[0]/C, C, output_dim))
yscale = y_pred[:,:,output_dim-2].dimshuffle([0,1,'x'])
yslice = y_pred[:,:-1,output_dim-1].dimshuffle([0,1,'x'])
y_pred = y_pred[:,:,:2] * yscale
y_pred = (y_pred[:,:-1,:] + y_pred[:,1:,:])/2.
#you cant because we removed mean + K.sqrt(y_pred[:,:-1,:2]*y_pred[:,1:,:2]))/3.
y_pred = y_pred * yslice
y_pred = K.sum(y_pred, axis=1) # /K.sum(yslice, axis=1)
loss0 = mean_squared_percentage_error(y_true1[:,0], y_pred[:,0])
loss1 = mean_squared_percentage_error(y_true1[:,1], y_pred[:,1])
return (loss0 + loss1)/2.
# same unpacking of y_pred done by mseC but in numpy
def np_C(y_pred, C=C, output_dim=output_dim, yscale_dim=yscale_dim):
assert yscale_dim == 2 and output_dim == 4
y_pred = np.reshape(y_pred, (y_pred.shape[0]/C, C, output_dim))
yscale = y_pred[:,:,output_dim-2][:,:,None]
yslice = y_pred[:,:-1,output_dim-1][:,:,None]
y_pred = y_pred[:,:,:2]*yscale
y_pred = (y_pred[:,:-1,:] + y_pred[:,1:,:])/2.
# + K.sqrt(y_pred[:,:-1,:2]*y_pred[:,1:,:2]))/3.
y_pred = y_pred * yslice
y_pred = np.sum(y_pred, axis=1) # /np.sum(yslice, axis=1)
return y_pred
In [46]:
output_dim,C
Out[46]:
In [47]:
W_start = graph.get_weights()
W_start = [w.copy() for w in W_start]
In [48]:
from keras.optimizers import SGD, Adam
optimizer = Adam(lr=LR)
# optimizer = SGD(1e-5)
if output_dim == 6:
loss = nll_study
elif output_dim == 4:
loss = mseC
else:
assert False
graph.compile(optimizer=optimizer, loss={'output': loss})
In [49]:
import random
idx = np.arange(Ntraining)
# seed the split
random.seed(seed)
np.random.seed(seed)
np.random.shuffle(idx)
In [50]:
split_size = Ntraining // 3
split_size
Out[50]:
In [51]:
from scipy.stats import linregress
from tqdm import tqdm
def test_call(ipred, index):
gen = valgen if ipred == 0 else traingen
return next(gen.flow(np.array([index]), C=0, batch_size=1, shuffle=False))
def pred(itest, npred=1, seed=seed, n_jobs=-1):
"""
npred when making predictions perform 1 prediction using valgen and npred-1 using traingen and average
n_jobs using value different from 1 may cause results to be underminstic
"""
n = len(itest)
if output_dim == 6:
yp_corrected = np.zeros((n,4))
elif output_dim == 4:
yp_corrected = np.zeros((n,2))
else:
assert False
if npred > 1:
# reset seed so we will get demterminstic results when calling traingen
last_seed = np.random.randint(10e6)
random.seed(seed)
np.random.seed(seed)
# each batch will have all slices from the same example, different batches will have different size
for i, index in enumerate(itest):
# reset seed so we will get demterminstic results when calling traingen
random.seed(seed)
np.random.seed(seed)
process = Parallel(n_jobs=n_jobs if npred > 1 else 1, verbose=0, backend='threading')
test_data = process(delayed(test_call)(ipred,index) for ipred in range(npred))
data = {}
for d in test_data:
for k,v in d.iteritems():
if k in data:
data[k] = np.concatenate((data[k],v),axis=0)
else:
data[k] = v
nslices = len(data['input'])//npred
assert nslices*npred == len(data['input'])
predict = graph.predict(data, batch_size=nslices*npred)
yp = predict['output']
# collapse all slices to one result
yp = np_study(yp, C=nslices) if output_dim == 6 else np_C(yp, C=nslices)
assert len(yp) == npred
if output_dim == 6:
# move from std to var before avarging
yp[:,2:4] = yp[:,2:4]**2
yp = yp.mean(axis=0)
yp_corrected[i,:2] = yp[:2]*ystd # we did not remove ymean and we dont add it here
if output_dim == 6:
# move back from var to std
yp_corrected[i,2:4] = np.sqrt(yp[2:4])*ystd
if npred > 1:
# return seed to where it was so the training will not repeat itself
random.seed(last_seed)
np.random.seed(last_seed)
return yp_corrected
def test_pred(itest, plot=False, npred=1, seed=seed, n_jobs=-1):
yp_corrected = pred(itest, npred=npred, seed=seed, n_jobs=n_jobs)
itest_in_Nt = np.where(itest < Ntraining)[0]
rmse = 0.
avg_score = 0.
if len(itest_in_Nt):
itest0 = itest[itest_in_Nt]
for measure in range(2):
r = np.sqrt(np.mean(np.square(yp_corrected[itest_in_Nt,measure]-Y[itest0,measure])))
rmse += r/2.
print "RMSE", r,
print linregress(Y[itest0,measure], yp_corrected[itest_in_Nt,measure])
if plot:
if measure == 0:
plt.figure(figsize=(12,5))
plt.subplot(1,2,measure+1)
if output_dim == 6:
plt.errorbar(Y[itest0,measure],yp_corrected[itest_in_Nt,measure],
yerr=yp_corrected[itest_in_Nt,measure+2],
fmt='.',alpha=0.3)
else:
plt.scatter(Y[itest0,measure],yp_corrected[itest_in_Nt,measure])
if output_dim == 6:
score = np_nll(yp_corrected[itest_in_Nt,measure]/ystd[measure],
Y[itest0,measure]/ystd[measure],
(yp_corrected[itest_in_Nt,measure+2]/ystd[measure])**2)
print 'score',score
avg_score += score/2.
print 'avg', rmse, avg_score
return yp_corrected, rmse, avg_score
In [52]:
from keras.callbacks import Callback
class SaveBestModel(Callback):
def __init__(self, fn, isplit, start=0, histories=[], best_val_loss=np.Inf):
super(Callback, self).__init__()
self.epoch_counter = start
self.fn = fn
self.isplit = isplit
self.histories = histories
self.best_val_loss = best_val_loss
def on_epoch_end(self, epoch, logs={}):
i = self.epoch_counter
self.epoch_counter += 1
isplit = self.isplit
val_loss_label = 'val_loss' if isplit >= 0 else 'loss'
val_loss = logs.get(val_loss_label)
self.histories.append((logs.get('loss'),val_loss))
fname = '%s.history.%d.pkl'%(self.fn,isplit)
with open(os.path.join(temp_dir,fname), 'wb') as fp:
pickle.dump(self.histories,fp,-1)
awscp(fname, upload=True)
if val_loss < self.best_val_loss:
print val_loss
print
test_pred(itest)
self.best_val_loss = val_loss
fname = '%s.weights.iter-%d.%d.hdf5'%(self.fn,i,isplit)
print "SAVING MODEL", self.best_val_loss
self.model.save_weights(os.path.join(temp_dir,fname), overwrite=True)
awscp(fname, upload=True)
In [53]:
batch_size = C*5 # must be a multiple of C
In [54]:
# this version of set/load weights is capable of doing transfer learning by
# copying just the bottom layers from a model which has a different
# top structure.
def set_weights(graph, weights):
for i, layer in enumerate(graph.nodes.values()):
nb_param = len(layer.get_weights())
try:
layer.set_weights(weights[:nb_param])
weights = weights[nb_param:]
except:
print "load failed at", i
break
def load_weights(graph, filepath):
'''Load weights from a HDF5 file.
'''
import h5py
f = h5py.File(filepath)
g = f['graph']
weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
set_weights(graph, weights)
f.close()
In [55]:
if isplit >= 0:
istart = isplit*split_size
if isplit < 2 and isplit >= 0:
iend = (isplit+1)*split_size
else:
iend = Ntraining
itest = idx[istart:iend]
itrain = np.concatenate((idx[0:istart],idx[iend:Ntraining]), axis=0)
validation_data = next(valgen.flow(itest, batch_size=len(itest)*C, shuffle=False))
else:
itest = np.arange(Ntraining)
itrain = np.arange(Ntraining)
# small validation set just to make the code the same for all and split
validation_size = 4
validation_data = next(valgen.flow(np.arange(validation_size), batch_size=validation_size*C, shuffle=False))
In [56]:
start
Out[56]:
In [61]:
if pre_load:
print 'Loading', pre_load
fname = os.path.join(temp_dir, pre_load)
load_weights(graph, fname)
_, _, avg_score = test_pred(itest, plot=True, npred=1);
callback = SaveBestModel(FN, isplit, start=start, best_val_loss=avg_score)
else:
graph.set_weights(W_start)
assert start == 0
callback = SaveBestModel(FN, isplit)
In [ ]:
if start == 0:
# seed the generator flow
random.seed(seed)
np.random.seed(seed)
gen = traingen.flow(itrain, batch_size=batch_size, shuffle=True)
nb_epoch = NB_EPOCH_0 - start
if nb_epoch > 0:
graph.fit_generator(gen,
samples_per_epoch=len(itrain)*C,
nb_epoch=nb_epoch, verbose=1,
validation_data=validation_data,
nb_worker=8, callbacks=[callback])
In [ ]:
nb_epoch = NB_EPOCH_1
if start > NB_EPOCH_0:
nb_epoch -= start - NB_EPOCH_0
if nb_epoch > 0:
optimizer = Adam(lr=LR/3.)
graph.compile(optimizer=optimizer, loss={'output': loss})
graph.fit_generator(gen,
samples_per_epoch=len(itrain)*C,
nb_epoch=nb_epoch, verbose=1,
validation_data=validation_data,
nb_worker=8, callbacks=[callback])
In [ ]:
fn0 = '%s.weights.iter-'%FN
odir = out_dir
if not odir.endswith('/'):
odir += '/'
if out_dir.startswith('s3://'):
res = !aws s3 ls {odir} | grep {fn0} | grep {'.%d.hdf5'%isplit}
else:
res = !ls -1 {odir} | grep {fn0} | grep {'.%d.hdf5'%isplit}
res = [r.split()[-1] for r in res]
def widx(x):
return -int(x.split('.')[-3].split('-')[1])
res = sorted(res,key=widx)[0]
itr0 = -widx(res)
pre_load = res
print pre_load
awscp(pre_load)
In [ ]:
itr0
In [ ]:
fname = os.path.join(temp_dir, pre_load)
load_weights(graph, fname)
yp_corrected, _, _ = test_pred(itest, plot=True, npred=12, seed=seed, n_jobs=-1) # npred=12 is better (in jumps of 4)
yp_test, _, _ = test_pred(np.arange(Ntraining,Na), npred=12, seed=seed, n_jobs=-1)
In [ ]:
fname_yp = '%s.yp.iter-%d.%d.pkl'%(FN,itr0,isplit)
print fname_yp
with open(os.path.join(temp_dir, fname_yp), 'wb') as fp:
pickle.dump((yp_corrected,Y[itest],itest,yp_test),fp,-1)
awscp(fname_yp, upload=True)
Self terminate if running on AWS EC2 instance.
In [ ]:
!aws ec2 terminate-instances --instance-ids `curl http://169.254.169.254/latest/meta-data/instance-id`
In [ ]: