In [1]:
##MULTI TASK DEEP LEARNING NEURAL NETWORK

%matplotlib inline
import pandas as pd
import numpy as np
import scipy as sp
import torch
from bayes_opt import BayesianOptimization
from torch.autograd import Variable
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from math import sqrt
import random
import itertools

#Load some of the data
exp_data = pd.read_csv('../exp.tab', sep='\t', index_col=0)
cnv_data = pd.read_csv('../cnv.tab', sep='\t', index_col=0)
ydat = pd.read_csv('../labels.tab', sep='\t', index_col=0)
train_activity_data = pd.read_csv('../train_activity.tab', sep='\t')
test_activity_data = pd.read_csv('../test_activity.tab', sep ='\t')

#labels
traininglabels = train_activity_data.columns[1:]
testinglabels = test_activity_data.columns[1:]

#concatenate two data frames
frames = [exp_data, cnv_data]

xdatw = pd.concat(frames)
traininglabels


Out[1]:
Index(['786O_KIDNEY', 'A1207_CENTRAL_NERVOUS_SYSTEM',
       'A172_CENTRAL_NERVOUS_SYSTEM', 'A204_SOFT_TISSUE', 'A2058_SKIN',
       'A549_LUNG', 'AGS_STOMACH', 'AML193_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE',
       'ASPC1_PANCREAS', 'BT20_BREAST',
       ...
       'TE9_OESOPHAGUS', 'THP1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE',
       'TOV112D_OVARY', 'TYKNU_OVARY', 'U178_CENTRAL_NERVOUS_SYSTEM',
       'U343_CENTRAL_NERVOUS_SYSTEM', 'U87MG_CENTRAL_NERVOUS_SYSTEM',
       'UOK101_KIDNEY', 'VCAP_PROSTATE', 'ZR7530_BREAST'],
      dtype='object', length=142)

In [2]:
#Deep Learning Net Class

class EssentialityNet:

    def __init__(self):
        self.inputnum = xdatw.shape[0]
        self.trainscores = []
        self.testscoreslist = []
        self.learning_rate = 0.00009
        self.H = 100
        self.n_iter = 300 #training iterations
        self.minimum = 100000
        self.stopcounter = 3
        self.layernum = 1
        self.layers = []
                
        #model
        self.model = torch.nn.Sequential(
        torch.nn.Linear(self.inputnum, self.H),
        torch.nn.ReLU(),
        torch.nn.Linear(self.H, 1138),
        )
        
        #set loss function and optimizer
        self.loss = torch.nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
    
    #plot scores
    def plot(self, trainscores, testscores):
        x = np.arange(self.n_iter)
        plt.plot(x, self.trainscores, label='Train')
        plt.title('Training vs Test Accuracy')
        plt.xlabel('NN Training Iterations')
        plt.ylabel('Accuracy')
    
        plt.plot(np.asarray(x), np.asarray(testscores), label='Test') #plot
        plt.legend()
        
    #sets the proper method
    def setModel(self, Layernum, Neuronnum):  
        
        self.layernum = int(round(Layernum))
        self.H = int(round(Neuronnum))
        
        #initial input layer
        self.layers.append(torch.nn.Linear(self.inputnum, self.H))
        
        for n in range(self.layernum):
            if n != 0:
                self.layers.append(torch.nn.Linear(self.H, self.H))
            self.layers.append(torch.nn.ReLU())
            
        self.layers.append(torch.nn.Linear(self.H, 1138))
        
        #set the method to whatever layers were chosen
        self.model = torch.nn.Sequential(*self.layers)
    
    def setRegularization(self, L2Reg):
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay= L2Reg)

    def fit(self, xtrain, ytrain, xtest, ytest):
      
        #convert to variables
        xtrain_var = Variable(torch.FloatTensor(xtrain))
        xtest_var = Variable(torch.FloatTensor(xtest))
        ytrain_var = Variable(torch.FloatTensor(ytrain))
        ytest_var = Variable(torch.FloatTensor(ytest))
        
        for t in range(self.n_iter):
        
            #calculate loss
            ypred = self.model(xtrain_var)

            diff = self.loss(ypred, ytrain_var)
            self.trainscores.append(diff.data[0])
            
            #test performance
            ypredtest = self.model(xtest_var)
            difftest = self.loss(ypredtest, ytest_var)
            
            #find the best point
            if t > 10 and self.minimum < difftest.data[0]:
                self.stopcounter -= 1

                if self.stopcounter == 0:
                    self.n_iter = t
                    self.trainscores.pop()
                    break
            elif t > 10 and self.stopcounter < 3:
                self.stopcounter += 1
            
            self.minimum = difftest.data[0]
            
            self.testscoreslist.append(difftest.data[0])
            
            #zero gradients
            self.optimizer.zero_grad()
            #backpropagate
            diff.backward() 
            #update weights
            self.optimizer.step() 

    # predict with the test data
    def predict(self, X):
        
        X_var = Variable(torch.FloatTensor(X))
        return self.model(X_var)

In [3]:
#other functions for running the nn

def figureoutnetwork(layernum, neuronnum, l2reg):
    n = EssentialityNet()
    n.setModel(layernum, neuronnum)
    n.setRegularization(l2reg)
            
    n.fit(xtrain_val, ytrain_val, xtest_val, ytest_val)
    predictions = n.predict(xtest)
    return(calculateRMSE(predictions, ytest))

def figureoutnetwork3(neuronnum, l2reg):
    n = EssentialityNet()
    n.setModel(3, neuronnum)
    n.setRegularization(l2reg)
            
    n.fit(xtrain_val, ytrain_val, xtest_val, ytest_val)
    predictions = n.predict(xtest)
    return(calculateRMSE(predictions, ytest))
    
#calculate RMSE function
def calculateRMSE(predicts, actuals):
    mses = []  
    multitaskrmses = []
    preds = predicts.data.numpy()

    for i in range(preds.shape[1]):
        mses.append(((preds[:,i] - actuals[:,i])**2).mean())
        multitaskrmses.append(sqrt(mses[i]))
            
    return(np.mean(multitaskrmses))

In [11]:
#sample runs

# figureoutnetwork(5,200,0.002) #good
# figureoutnetwork(3,356,0.013) 

# figureoutnetwork(3,356,0.012) #--> 1.0088
# figureoutnetwork(3,350,0.011) 1.01
# figureoutnetwork(3,358,0.013) --> 1.011
figureoutnetwork(3,358,0.012)
#BEST SO FAR 1.008


Out[11]:
1.0114680793730153

In [5]:
#best ~1000 tasks
top_tasks = pd.read_csv("../combined_stats.tab", sep='\t')
tasks = top_tasks.iloc[:,0].values
    
ydat_best = ydat.transpose()[tasks]
ydat_best = ydat_best.transpose()

In [6]:
#index the data with the proper labels
xtrain_not_norm = xdatw[traininglabels].transpose()
xtest_not_norm = xdatw[testinglabels].transpose()
ytrain = ydat_best[traininglabels].transpose().values
ytest = ydat_best[testinglabels].transpose().values
    
#normalize inputs
xtrain = preprocessing.scale(xtrain_not_norm)
xtest = preprocessing.scale(xtest_not_norm)

#create validation set
xtrain_val, xtest_val, ytrain_val, ytest_val = train_test_split(xtrain, ytrain, test_size=0.2, random_state=434)

In [10]:
#sample network 1
n = EssentialityNet()
n.setModel(3, 355)
n.setRegularization(0.013)
            
n.fit(xtrain_val, ytrain_val, xtest_val, ytest_val)
predictions = n.predict(xtest)

In [172]:
#sample network 2
n2 = EssentialityNet()
n2.setModel(3, 350)
n2.setRegularization(0.012)
            
n2.fit(xtrain_val, ytrain_val, xtest_val, ytest_val)
predictions2 = n2.predict(xtest)

calculateRMSE(predictions2, ytest)


Out[172]:
1.0071005369030421

In [22]:
all_layers = [m for m in n.model.modules()][1:]

In [33]:
results = [all_layers[0](Variable(torch.FloatTensor(xtraind)))]
for layer in all_layers[1:]:
    results.append(layer(results[-1]))

In [78]:
tissues = ytraindat.index.map(lambda x: '_'.join(x.split('_')[1:])).values
np.unique(tissues)


Out[78]:
array(['BONE', 'BREAST', 'CENTRAL_NERVOUS_SYSTEM', 'ENDOMETRIUM',
       'HAEMATOPOIETIC_AND_LYMPHOID_TISSUE', 'KIDNEY', 'LARGE_INTESTINE',
       'LIVER', 'LUNG', 'OESOPHAGUS', 'OVARY', 'PANCREAS', 'PLEURA',
       'PROSTATE', 'SKIN', 'SMALL_INTESTINE', 'SOFT_TISSUE', 'STOMACH',
       'URINARY_TRACT'], dtype=object)

In [211]:
#PCA

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
sns.set_context('poster')
sns.set_palette(sns.color_palette("Set2", 22))
# sns.
last_layer_data = results[-1].data.numpy()
pca_tx = PCA(n_components=15).fit_transform(last_layer_data)
#tsne_tx = TSNE(n_components=2).fit_transform(pca_tx)
for tissue in np.unique(tissues):
    mask = tissues == tissue
    plt.scatter(pca_tx[mask, 0], pca_tx[mask, 1], label=tissue)

plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title("PCA of Different Cell Lines")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-211-4b0a32cffbb9> in <module>()
     11     plt.scatter(pca_tx[mask, 0], pca_tx[mask, 1], label=tissue)
     12 
---> 13 plt.xlabel('PC1')
     14 plt.ylabel('PC2')
     15 plt.title("PCA of Different Cell Lines")

TypeError: 'str' object is not callable

In [43]:
pca_tx


Out[43]:
array([[  3.35713174, -11.31866975],
       [ 21.98670631,  -4.63599279],
       [  6.12154625, -12.66230587],
       [  1.85565211, -18.26681846],
       [  5.57552753, -14.72323718],
       [ -1.29106478,  -6.29364497],
       [-20.03284309,  10.56670956],
       [ 13.26908714,  18.24568581],
       [-13.22756971,  -9.16450544],
       [-10.11575253, -11.97885387],
       [-13.23097574,  15.50898129],
       [-11.99998292,  -4.32379958],
       [-11.53382081, -13.26629366],
       [ 16.31846984,  -0.71189697],
       [ 19.67287668,  12.5917366 ],
       [  7.86389781, -10.26470401],
       [ -9.92000796,  -3.11171267],
       [ 12.8739056 , -12.47425396],
       [  1.43138064,  -1.24707758],
       [-12.71711155,  17.63021712],
       [  7.17640973, -12.95589618],
       [ 21.92950679,   6.16968055],
       [-20.99750806,  -1.71528777],
       [-22.74972479,  11.71246415],
       [  0.39668555,  -3.66945736],
       [  8.20083056,  -7.47478579],
       [ 16.00192794, -10.0763362 ],
       [-10.95468478, -13.05347315],
       [-27.46316233,   3.3253503 ],
       [  9.16500654,   3.52885821],
       [ 25.29739434,  22.98276653],
       [  4.09907797,  14.86859969],
       [-25.77438072,  11.56928898],
       [ -2.45445493,  -4.2837843 ],
       [ -4.68508024,  12.04425743],
       [-21.19998687,  19.79323258],
       [  1.99163819,  10.83790781],
       [  8.41638712,  -2.90808388],
       [ -6.51887921,  31.64817483],
       [ -9.53554763, -11.15372888],
       [ 16.4449964 ,  28.6072913 ],
       [  1.95070839, -17.51321752],
       [-18.97558031,   5.11366164],
       [ 14.00274666, -13.4887849 ],
       [  1.77201152, -18.4711647 ],
       [ -9.38172746, -11.84557249],
       [-12.34437545,  -9.31384577],
       [-16.96150676,  16.05878793],
       [-18.66251917,  16.68945808],
       [ -9.14039555,  -5.48855632],
       [ 32.53602838,   1.49550858],
       [-14.39150384,  -4.70280329],
       [-11.7447335 ,  -4.17444048],
       [ 14.71226599,   6.43640264],
       [ 20.91767734,  30.91561237],
       [-16.92664221,   0.92283553],
       [  7.81801689, -18.54186528],
       [ 12.81914373,  -4.33097079],
       [  7.4964681 , -12.55196221],
       [-13.161959  ,  20.75294047],
       [ -1.16189168, -14.61079287],
       [-29.94130879,   3.74560788],
       [ -5.26765397,  -7.88082549],
       [ -9.69296625, -11.15067041],
       [ -9.53761295,   5.54125019],
       [  0.9478478 ,   9.6038771 ],
       [  2.5080636 , -14.08392077],
       [  9.45077235,   6.3470124 ],
       [ 18.87379646,  -9.5544723 ],
       [ 22.48204654,  -2.26107223],
       [ 13.52992745,  -8.16983774],
       [ 14.54599771,  -0.25183586],
       [  2.2537736 ,  -4.39558882],
       [-15.63434993,   6.837689  ],
       [ -7.07521825,  -7.70038384],
       [ -7.55861396,  -3.72174125],
       [  2.79719828,   9.98070343],
       [ 18.03244074,  19.09843412],
       [ 15.35924212,  20.06876637],
       [ 13.95673935,   8.85162608],
       [ -4.58985723,   7.96095769],
       [-23.73786983,  15.02906412],
       [-13.02275645,  -1.81522441],
       [ -1.28347516, -11.36888446],
       [  7.09011262, -11.0821724 ],
       [  6.84836172, -12.48553549],
       [  0.76664869,  -6.9400051 ],
       [ -3.1587502 ,  -0.18406135],
       [  1.18221813,  -2.67893215],
       [  3.79786715, -15.86322684],
       [ -1.41534256,  -0.0658898 ],
       [ -7.76271139,   8.60491209],
       [ -9.72021991,  -1.20697292],
       [-16.9659776 ,  -1.73653688],
       [ 22.33907677,  26.04354046],
       [-11.91226864, -10.66097336],
       [ 16.05385964,  19.26930509],
       [ 15.01522248,  25.04696401],
       [-17.15911958,  -2.63876611],
       [ -2.69318278,   7.89558402],
       [ 24.74980124,  -2.11908715],
       [ -5.02174449, -11.24097079],
       [-16.52744441,  -0.32046606],
       [-15.89928288,  -2.89274825],
       [-33.23318056,  11.35462421],
       [-10.73563714, -14.79309986],
       [ -4.22073741, -22.07516326],
       [ 11.06913489,  11.22946241],
       [  1.42799227, -20.96311417],
       [-24.26584486,  12.21159007],
       [-17.16030992,  -5.04781791],
       [-14.8343178 ,  -8.9659829 ],
       [ 16.33869739,  36.14503989],
       [ 12.4884063 ,  19.16356109],
       [ 20.57260516,  -9.75867513],
       [ 17.09681753,  -6.72043553],
       [ -8.64496671,  -9.17304782],
       [  6.15369207, -18.64096889],
       [  3.33238009,  11.68774397],
       [ -3.85549976,  -2.87142556],
       [ -0.86267143, -11.13926338],
       [ -1.30291237,  -5.17783345],
       [ -1.34281503,  -8.7080693 ],
       [ -0.31665795,  -7.22608772],
       [ -9.2943109 ,   9.40096226],
       [-15.66857714,   3.3235727 ],
       [ -7.0425875 ,   3.08748016],
       [ 19.02389692,  -9.67637183],
       [ -7.00527417, -14.60906632],
       [ 26.22531421,  -1.01171026],
       [ 24.68799912,   1.41129359],
       [ -2.24155879,  -4.85524766],
       [  2.47231257,  -2.82259047],
       [  8.71673072,   8.04150353],
       [ 11.42928462,   3.02063186],
       [ -1.7331674 , -18.47857508],
       [  8.33726363, -15.91805841],
       [ 35.21888463,   1.27689279],
       [  8.86162772, -13.61844948],
       [ 14.01737828,  -6.27393565],
       [ -4.09552858,  30.3765034 ],
       [-20.83288617,  28.09180315]])

In [11]:
calculateRMSE(predictions, ytestdat)


Out[11]:
1.008651225931414

In [14]:
#print RMSEs and correlations without function

mses = []
cors = []
multitaskrmses = []

preds = predictions.data.numpy()
ytests = ytestdat.values

for i in range(preds.shape[1]):
    mses.append(((preds[:,i] - ytests[:,i])**2).mean())
    multitaskrmses.append(sqrt(mses[i]))
    
    cor = str(sp.stats.spearmanr(ytests[:,i], preds[:,i]))
    cors.append(float(cor[28:37]))
    

print(np.mean(multitaskrmses))
print(np.mean(cors))


1.00865122593
0.328818188752

In [6]:
#LOAD STUFF FOR COMPARISONS
myrmses = np.load('../myrmses2.npy')
mycors = np.load('../mycors2.npy')

myrmses = myrmses.tolist()
myrmses = list(myrmses.values())

mycors =  mycors.tolist()

In [7]:
#PLOT RMSES
plt.scatter(myrmses, multitaskrmses2)
sns.set_context('poster')

plt.xlabel('Single-Task')
plt.ylabel('Multi-Task')
plt.title('RMSE of Single-Task DNN vs Multi-Task DNN')
plt.plot([0.5, 2.8], [0.5, 2.8], color = 'red')


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-084048c182da> in <module>()
      1 #PLOT RMSES
----> 2 plt.scatter(myrmses, multitaskrmses2)
      3 sns.set_context('poster')
      4 
      5 plt.xlabel('Single-Task')

NameError: name 'multitaskrmses2' is not defined

In [189]:
#PLOT CORRELATIONS
plt.scatter(mycors, cors2)
plt.xlabel('Single-Task')
plt.ylabel('Multi-Task')
plt.title('Correlation of Single-Task DNN vs Multi-Task DNN')
plt.plot([-0.2, 1], [-0.2, 1], color = 'red')


Out[189]:
[<matplotlib.lines.Line2D at 0x7f2fc5684160>]

In [221]:
vladormses = top_tasks['jklm.rmse'].tolist()

print(np.mean(vladormses))


0.999549503206

In [224]:
plt.hist(multitaskrmses,40,label = "DNN")
plt.hist(vladormses,40, label = "JKLM")
plt.title("DNN RMSEs vs JKLM RMSEs")
plt.xlabel('RMSE scores')
# plt.ylabel('2 layer DNN')
plt.legend()


Out[224]:
<matplotlib.legend.Legend at 0x7f98d12ba588>

In [146]:
#Grid search for network parameters

x = [[[figureoutnetwork(k,i,j) for i in [200,350,450,500]] for j in [0.005,0.01,0.02,0.03]] for k in [3,4,5,6]]

In [120]:
neuronnum = [200,350,450,500]
l2reg = [0.005,0.01,0.02,0.03]

In [97]:
heatmap3layer = pd.DataFrame(x[0], columns=neuronnum, index=l2reg)
# heatmap3layer.insert(0,0.0001,supplementalgrid[0])
heatmap3layer.loc[0.0001] = supplementalgrid[0]
# heatmap3layer.index = heatmap
heatmap4layer= pd.DataFrame(x[1], columns=neuronnum, index=l2reg)
heatmap4layer.loc[0.0001] = supplementalgrid[1]
heatmap5layer= pd.DataFrame(x[2], columns=neuronnum, index=l2reg)
heatmap5layer.loc[0.0001] = supplementalgrid[2]
heatmap6layer= pd.DataFrame(x[3], columns=neuronnum, index=l2reg)
heatmap6layer.loc[0.0001] = supplementalgrid[3]
heatmap6layer = heatmap6layer.sort_index()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-97-0e49c3ccf40a> in <module>()
----> 1 heatmap3layer = pd.DataFrame(x[0], columns=neuronnum, index=l2reg)
      2 # heatmap3layer.insert(0,0.0001,supplementalgrid[0])
      3 heatmap3layer.loc[0.0001] = supplementalgrid[0]
      4 # heatmap3layer.index = heatmap
      5 heatmap4layer= pd.DataFrame(x[1], columns=neuronnum, index=l2reg)

NameError: name 'x' is not defined

In [208]:
# h3 = sns.heatmap(heatmap3layer, vmin = 1.005, vmax = 1.079, annot=True, fmt="f")
# h3.set_title("3 Layer Network RMSE Map")
h3


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-208-98b068d485a2> in <module>()
      2 # h3 = sns.heatmap(heatmap3layer, vmin = 1.005, vmax = 1.079, annot=True, fmt="f")
      3 # h3.set_title("3 Layer Network RMSE Map")
----> 4 h3

NameError: name 'h3' is not defined

In [183]:
h4 = sns.heatmap(heatmap4layer, vmin = 1.005, vmax = 1.079, annot=True, fmt="f")
h4.set_title("4 Layer Network RMSE Map")


Out[183]:
<matplotlib.text.Text at 0x7f9948abdeb8>

In [184]:
h5 = sns.heatmap(heatmap5layer, vmin = 1.005, vmax = 1.079, annot=True, fmt="f")
h5.set_title("5 Layer Network RMSE Map")


Out[184]:
<matplotlib.text.Text at 0x7f9948973c18>

In [186]:
h6 = sns.heatmap(heatmap6layer, vmin = 1.005, vmax = 1.079, annot=True, fmt="f")
h6.set_title("6 Layer Network RMSE Map")


Out[186]:
<matplotlib.text.Text at 0x7f99486f4a20>

In [21]:
#add to combined stats
combinedstats = pd.read_csv("../combined_stats.tab", sep='\t')
combinedstats['dnn.rmse'] = myrmses

combinedstatsrmses = combinedstats[['jklm.rmse', 'ranger.rmse', 'mkl.d9.rmse', 'rf.d9.rmse', 'glmm.dense.rmse', 'glmm.sparse.rmse', 'dnn.rmse']]
combinedstatsrmses


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-21-528295c02e0f> in <module>()
      1 #add to combined stats
      2 combinedstats = pd.read_csv("../combined_stats.tab", sep='\t')
----> 3 combinedstats['dnn.rmse'] = myrmses
      4 
      5 combinedstatsrmses = combinedstats[['jklm.rmse', 'ranger.rmse', 'mkl.d9.rmse', 'rf.d9.rmse', 'glmm.dense.rmse', 'glmm.sparse.rmse', 'dnn.rmse']]

NameError: name 'myrmses' is not defined

In [19]:
#Best RMSE performance among methods

best_rmse = {'dnn.rmse':0,'glmm.dense.rmse':0, 'mkl.d9.rmse':0, 'rf.d9.rmse':0,'jklm.rmse':0, 'glmm.sparse.rmse':0, 'ranger.rmse':0}

for i in range(1138):
    best_rmse[np.argmin(combinedstatsrmses.iloc[i])] += 1
#     print(np.argmin(combinedstatsrmses.iloc[i]))

best_rmse


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-d07a4cddf49d> in <module>()
      4 
      5 for i in range(1138):
----> 6     best_rmse[np.argmin(combinedstatsrmses.iloc[i])] += 1
      7 #     print(np.argmin(combinedstatsrmses.iloc[i]))
      8 

NameError: name 'combinedstatsrmses' is not defined

In [22]:
colors = 'rgbkymc'
keys = []

keys = [s.replace(".rmse", "").upper() for s in list(whodidbest.keys())]

plt.bar(range(len(best_rmse)), best_rmse.values(), align='center', color=colors)
plt.xticks(range(len(best_rmse)), keys, rotation='vertical')
plt.title('Number of Essentiality Scores Predicted Best for Each Method')

plt.show()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-22-91fe13911b7e> in <module>()
      2 keys = []
      3 
----> 4 keys = [s.replace(".rmse", "").upper() for s in list(whodidbest.keys())]
      5 
      6 plt.bar(range(len(best_rmse)), best_rmse.values(), align='center', color=colors)

NameError: name 'whodidbest' is not defined

In [1]:
predictions


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-1ad18da0080a> in <module>()
----> 1 predictions

NameError: name 'predictions' is not defined