MULTI TASK DEEP LEARNING NEURAL NETWORK

This contains a lot of the code from the other multitask file, but just what is required to run the multitask neural net with random splits multiple times.



In [2]:

    
%matplotlib inline
import pandas as pd
import numpy as np
import scipy as sp
import torch
from bayes_opt import BayesianOptimization
from torch.autograd import Variable
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from math import sqrt
import random
import itertools

#Load some of the data
exp_data = pd.read_csv('../exp.tab', sep='\t', index_col=0)
cnv_data = pd.read_csv('../cnv.tab', sep='\t', index_col=0)
ydat = pd.read_csv('../labels.tab', sep='\t', index_col=0)
train_activity_data = pd.read_csv('../train_activity.tab', sep='\t')
test_activity_data = pd.read_csv('../test_activity.tab', sep ='\t')

#best ~1000 tasks
top_tasks = pd.read_csv("../combined_stats.tab", sep='\t')
tasks = top_tasks.iloc[:,0].values
ydat_best = ydat.transpose()[tasks]
ydat_best = ydat_best.transpose()

#concatenate two data frames
frames = [exp_data, cnv_data]

xdatw = pd.concat(frames)



In [2]:

    
#Deep Learning Net Class

class EssentialityNet:

    def __init__(self):
        self.inputnum = xdatw.shape[0]
        self.trainscores = []
        self.testscoreslist = []
        self.learning_rate = 0.00009
        self.H = 100
        self.n_iter = 300 #training iterations
        self.minimum = 100000
        self.stopcounter = 3
        self.layernum = 1
        self.layers = []
                
        #model
        self.model = torch.nn.Sequential(
        torch.nn.Linear(self.inputnum, self.H),
        torch.nn.ReLU(),
        torch.nn.Linear(self.H, 1138),
        )
        
        #set loss function and optimizer
        self.loss = torch.nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
    
    #plot scores
    def plot(self, trainscores, testscores):
        x = np.arange(self.n_iter)
        plt.plot(x, self.trainscores, label='Train')
        plt.title('Training vs Test Accuracy')
        plt.xlabel('NN Training Iterations')
        plt.ylabel('Accuracy')
    
        plt.plot(np.asarray(x), np.asarray(testscores), label='Test') #plot
        plt.legend()
        
    #sets the proper method
    def setModel(self, Layernum, Neuronnum):  
        
        self.layernum = int(round(Layernum))
        self.H = int(round(Neuronnum))
        
        #initial input layer
        self.layers.append(torch.nn.Linear(self.inputnum, self.H))
        
        for n in range(self.layernum):
            if n != 0:
                self.layers.append(torch.nn.Linear(self.H, self.H))
            self.layers.append(torch.nn.ReLU())
            
        self.layers.append(torch.nn.Linear(self.H, 1138))
        
        #set the method to whatever layers were chosen
        self.model = torch.nn.Sequential(*self.layers)
    
    def setRegularization(self, L2Reg):
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay= L2Reg)

    def fit(self, xtrain, ytrain, xtest, ytest):
      
        #convert to variables
        xtrain_var = Variable(torch.FloatTensor(xtrain))
        xtest_var = Variable(torch.FloatTensor(xtest))
        ytrain_var = Variable(torch.FloatTensor(ytrain))
        ytest_var = Variable(torch.FloatTensor(ytest))
        
        for t in range(self.n_iter):
        
            #calculate loss
            ypred = self.model(xtrain_var)

            diff = self.loss(ypred, ytrain_var)
            self.trainscores.append(diff.data[0])
            
            #test performance
            ypredtest = self.model(xtest_var)
            difftest = self.loss(ypredtest, ytest_var)
            
            #find the best point
            if t > 10 and self.minimum < difftest.data[0]:
                self.stopcounter -= 1

                if self.stopcounter == 0:
                    self.n_iter = t
                    self.trainscores.pop()
                    break
            elif t > 10 and self.stopcounter < 3:
                self.stopcounter += 1
            
            self.minimum = difftest.data[0]
            
            self.testscoreslist.append(difftest.data[0])
            
            #zero gradients
            self.optimizer.zero_grad()
            #backpropagate
            diff.backward() 
            #update weights
            self.optimizer.step() 

    # predict with the test data
    def predict(self, X):
        
        X_var = Variable(torch.FloatTensor(X))
        return self.model(X_var) 
    
#other functions for running the nn

def figureoutnetwork(layernum, neuronnum, l2reg):
    n = EssentialityNet()
    n.setModel(layernum, neuronnum)
    n.setRegularization(l2reg)
            
    n.fit(xtrain_val, ytrain_val, xtest_val, ytest_val)
    predictions = n.predict(xtest)
#     return(calculateRMSE(predictions, ytest))
    saveRMSE(predictions, ytest)

def figureoutnetwork3(neuronnum, l2reg):
    n = EssentialityNet()
    n.setModel(3, neuronnum)
    n.setRegularization(l2reg)
            
    n.fit(xtrain_val, ytrain_val, xtest_val, ytest_val)
    predictions = n.predict(xtest)
    return(calculateRMSE(predictions, ytest))
    
#calculate RMSE function
def calculateRMSE(predicts, actuals):
    mses = []  
    multitaskrmses = []
    preds = predicts.data.numpy()

    for i in range(preds.shape[1]):
        mses.append(((preds[:,i] - actuals[:,i])**2).mean())
        multitaskrmses.append(sqrt(mses[i]))

    print(len(multitaskrmses))       
    return(np.mean(multitaskrmses))

def saveRMSE(predicts, actuals):
    mses = []  
    multitaskrmses = []
    preds = predicts.data.numpy()

    for i in range(preds.shape[1]):
        mses.append(((preds[:,i] - actuals[:,i])**2).mean())
        multitaskrmses.append(sqrt(mses[i]))
    
    #open a file for saving rmses
    rmses_file = open('rmses_' + str(fileno) + ".tab", 'w')
    
    for item in multitaskrmses:
          rmses_file.write("%s\n" % item)



In [5]:

    
for fileno in range(10):

    #starting runs with random splits:
    traininglabels = random.sample(range(0, 206), 142)
    traininglabels.sort()
    testinglabels = random.sample([x for x in range(206) if x not in traininglabels], 64)

    #index the data with the proper labels
    xtrain_not_norm = xdatw.iloc[:,traininglabels].transpose()
    xtest_not_norm = xdatw.iloc[:,testinglabels].transpose()
    ytrain = ydat_best.iloc[:,traininglabels].transpose().values
    ytest = ydat_best.iloc[:,testinglabels].transpose().values

    #normalize inputs
    xtrain = preprocessing.scale(xtrain_not_norm)
    xtest = preprocessing.scale(xtest_not_norm)

    #create validation set
    xtrain_val, xtest_val, ytrain_val, ytest_val = train_test_split(xtrain, ytrain, test_size=0.2, random_state=434)

    #do not uncomment this if you do not want to retrain
###     figureoutnetwork(3, 356, 0.012)



In [22]:

    
#open and concatenate the files
myrmses0 = pd.read_csv('rmses_0.tab', header=None)
myrmses1 = pd.read_csv('rmses_1.tab', header=None)
myrmses2 = pd.read_csv('rmses_2.tab', header=None)
myrmses3 = pd.read_csv('rmses_3.tab', header=None)
myrmses4 = pd.read_csv('rmses_4.tab', header=None)
myrmses5 = pd.read_csv('rmses_5.tab', header=None)
myrmses6 = pd.read_csv('rmses_6.tab', header=None)
myrmses7 = pd.read_csv('rmses_7.tab', header=None)
myrmses8 = pd.read_csv('rmses_8.tab', header=None)
myrmses9 = pd.read_csv('rmses_9.tab', header=None)

rmses_total = pd.concat((myrmses0, myrmses1, myrmses2, myrmses3, myrmses4, myrmses5, myrmses6, myrmses7, myrmses8, myrmses9), axis = 1, )



In [49]:

    
#get means across trials
rmse_means = pd.DataFrame.mean(rmses_total, axis=1)
dnnlist = (list(rmse_means.values))
print(np.mean(dnnlist))
#save to file
mean_rmses_file = open('mean_rmses.tab', 'w')

# for item in list(rmse_means.values):
#     mean_rmses_file.write("%s\n" % item)









    



1.01065570239



In [46]:

    
#compare
top_tasks['dnn.rmse'] = dnnlist

top_tasks = top_tasks[['jklm.rmse', 'ranger.rmse', 'mkl.d9.rmse', 'rf.d9.rmse', 'glmm.dense.rmse', 'glmm.sparse.rmse', 'dnn.rmse']]
top_tasks









    Out[46]:







  
    
      
      jklm.rmse
      ranger.rmse
      mkl.d9.rmse
      rf.d9.rmse
      glmm.dense.rmse
      glmm.sparse.rmse
      dnn.rmse
    
  
  
    
      0
      0.952833
      0.960496
      0.948655
      0.961768
      0.962843
      0.962658
      0.882877
    
    
      1
      1.060365
      1.074067
      1.058601
      1.067794
      1.032906
      1.124052
      1.007232
    
    
      2
      0.946342
      0.931658
      0.975606
      0.941134
      0.934963
      0.973344
      0.881774
    
    
      3
      1.074409
      1.053793
      1.068510
      1.056220
      1.171792
      1.019199
      1.017913
    
    
      4
      1.298735
      1.284004
      1.276571
      1.346348
      1.254156
      1.316945
      1.292043
    
    
      5
      1.019320
      1.018533
      1.026431
      1.039957
      1.031455
      1.032844
      0.968281
    
    
      6
      0.903709
      0.906174
      0.892708
      0.901269
      0.897027
      0.939720
      0.954082
    
    
      7
      0.987778
      0.990707
      1.014963
      1.050599
      1.015290
      1.056278
      0.916968
    
    
      8
      0.962102
      0.944543
      0.925711
      0.934242
      0.961486
      0.963235
      0.937306
    
    
      9
      1.025180
      1.034207
      1.028851
      1.061582
      1.021382
      0.997408
      0.945645
    
    
      10
      0.864839
      0.879444
      0.866002
      0.860268
      0.882059
      0.903176
      0.877987
    
    
      11
      1.026121
      1.016678
      1.006676
      1.025923
      1.029813
      1.054445
      0.974374
    
    
      12
      1.066715
      1.078802
      1.086855
      1.074777
      1.098704
      1.082767
      0.962467
    
    
      13
      0.912657
      0.927705
      0.919456
      0.933019
      0.914373
      0.892304
      1.049915
    
    
      14
      0.960091
      0.955081
      0.952893
      0.968033
      0.971832
      0.971949
      0.898844
    
    
      15
      1.014610
      1.005765
      0.995154
      1.026589
      1.030348
      1.007847
      1.027795
    
    
      16
      0.936305
      0.960090
      0.969868
      0.978125
      0.932842
      0.953257
      0.849989
    
    
      17
      0.968715
      0.978110
      0.968780
      1.003761
      0.963905
      1.009469
      0.984028
    
    
      18
      1.026594
      0.972322
      0.963828
      0.990733
      0.992096
      0.951621
      0.983280
    
    
      19
      0.906035
      0.934353
      0.924058
      0.931046
      0.944596
      0.927455
      0.931251
    
    
      20
      1.153014
      1.157117
      1.152325
      1.167979
      1.161337
      1.186105
      1.182163
    
    
      21
      1.018916
      1.042071
      1.102796
      1.184638
      1.059646
      1.030637
      1.055953
    
    
      22
      0.998337
      1.010587
      1.002333
      1.036592
      1.013084
      1.065147
      1.040864
    
    
      23
      0.970767
      0.990813
      0.993429
      0.999777
      1.022317
      0.960106
      0.912818
    
    
      24
      0.898563
      0.910198
      0.928346
      0.931359
      0.971286
      0.971286
      0.899371
    
    
      25
      1.101058
      1.122674
      1.122188
      1.147670
      1.131612
      1.133874
      1.182630
    
    
      26
      0.905662
      0.927240
      0.929007
      0.974975
      0.926293
      0.924893
      0.931853
    
    
      27
      1.076554
      1.078623
      1.086000
      1.107094
      1.092432
      1.080861
      0.975462
    
    
      28
      0.911745
      0.899485
      0.933249
      0.954863
      0.944321
      0.937255
      0.887756
    
    
      29
      1.278873
      1.300430
      1.264563
      1.287667
      1.264107
      1.288997
      1.319286
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      1108
      0.824665
      0.818785
      0.828190
      0.857140
      0.851707
      0.826038
      0.893217
    
    
      1109
      0.877833
      0.871966
      0.900916
      0.908297
      0.917863
      0.917566
      0.888731
    
    
      1110
      0.913307
      0.895742
      0.907654
      0.924261
      0.911022
      0.939010
      0.889690
    
    
      1111
      0.890177
      0.909035
      0.920792
      0.966972
      0.954292
      0.997018
      0.816057
    
    
      1112
      0.869413
      0.862565
      0.894360
      0.912791
      0.899760
      0.890375
      0.889486
    
    
      1113
      0.910647
      0.934896
      0.941350
      0.945469
      0.970018
      0.981674
      0.996912
    
    
      1114
      0.941791
      0.946309
      0.974123
      1.014387
      0.990833
      0.994785
      0.913325
    
    
      1115
      0.880239
      0.892297
      0.877591
      0.911228
      0.886479
      0.896507
      0.880617
    
    
      1116
      0.935164
      0.938772
      0.969023
      0.990525
      0.973272
      0.968305
      0.936234
    
    
      1117
      0.804401
      0.796536
      0.812495
      0.803534
      0.808039
      0.779075
      0.891616
    
    
      1118
      1.003451
      1.005641
      0.971408
      0.997005
      0.995884
      0.999130
      1.016141
    
    
      1119
      1.109512
      1.113757
      1.089068
      1.113132
      1.180525
      1.114830
      1.049505
    
    
      1120
      0.853283
      0.842644
      0.857619
      0.882108
      0.883380
      0.876703
      0.896816
    
    
      1121
      1.053327
      1.055018
      1.047172
      1.058848
      1.046278
      1.070476
      1.042978
    
    
      1122
      0.839807
      0.825289
      0.864418
      0.882356
      0.866215
      0.843092
      0.890023
    
    
      1123
      0.810731
      0.793734
      0.781794
      0.801653
      0.816091
      0.794254
      0.912470
    
    
      1124
      1.016015
      1.016341
      1.012411
      1.026898
      1.007053
      1.006702
      1.053143
    
    
      1125
      1.006789
      1.009246
      0.987176
      1.019357
      1.035754
      1.017252
      1.012933
    
    
      1126
      0.900048
      0.887136
      0.923358
      0.924773
      0.995944
      0.906718
      0.887253
    
    
      1127
      0.866667
      0.873464
      0.842159
      0.861137
      0.864173
      1.004010
      0.921179
    
    
      1128
      1.124805
      1.139015
      1.148699
      1.161558
      1.185100
      1.161221
      1.026753
    
    
      1129
      0.781559
      0.798718
      0.808136
      0.842453
      0.816329
      0.814878
      0.815916
    
    
      1130
      0.923668
      0.927178
      0.956415
      0.954501
      0.976227
      0.994896
      0.909458
    
    
      1131
      0.887437
      0.885006
      0.895667
      0.902937
      0.895119
      0.903921
      0.949747
    
    
      1132
      0.868581
      0.863576
      0.876061
      0.867810
      0.904657
      0.911519
      1.015141
    
    
      1133
      1.269603
      1.269768
      1.266867
      1.300112
      1.267734
      1.304537
      1.370521
    
    
      1134
      0.859855
      0.840592
      0.885673
      0.909632
      0.899399
      0.879226
      0.896755
    
    
      1135
      0.822099
      0.806193
      0.823133
      0.826042
      0.821425
      0.919244
      0.888820
    
    
      1136
      0.935305
      0.925420
      0.940226
      0.955089
      0.946809
      1.058379
      0.981741
    
    
      1137
      0.948006
      0.957404
      0.961448
      0.966228
      0.941657
      0.945153
      0.905251
    
  

1138 rows × 7 columns



In [47]:

    
#comparing with other methods for each task
best_rmse = {'dnn.rmse':0,'glmm.dense.rmse':0, 'mkl.d9.rmse':0, 'rf.d9.rmse':0,'jklm.rmse':0, 'glmm.sparse.rmse':0, 'ranger.rmse':0}

for i in range(1138):
    best_rmse[np.argmin(top_tasks.iloc[i])] += 1

best_rmse









    Out[47]:





{'dnn.rmse': 393,
 'glmm.dense.rmse': 115,
 'glmm.sparse.rmse': 113,
 'jklm.rmse': 213,
 'mkl.d9.rmse': 112,
 'ranger.rmse': 163,
 'rf.d9.rmse': 29}



In [48]:

    
#plotting who did best per task
colors = 'rgbkymc'
keys = []

keys = [s.replace(".rmse", "").upper() for s in list(best_rmse.keys())]

plt.bar(range(len(best_rmse)), best_rmse.values(), align='center', color=colors)
plt.xticks(range(len(best_rmse)), keys, rotation='vertical')
plt.title('Number of Essentiality Scores Predicted Best for Each Method')

plt.show()

	jklm.rmse	ranger.rmse	mkl.d9.rmse	rf.d9.rmse	glmm.dense.rmse	glmm.sparse.rmse	dnn.rmse
0	0.952833	0.960496	0.948655	0.961768	0.962843	0.962658	0.882877
1	1.060365	1.074067	1.058601	1.067794	1.032906	1.124052	1.007232
2	0.946342	0.931658	0.975606	0.941134	0.934963	0.973344	0.881774
3	1.074409	1.053793	1.068510	1.056220	1.171792	1.019199	1.017913
4	1.298735	1.284004	1.276571	1.346348	1.254156	1.316945	1.292043
5	1.019320	1.018533	1.026431	1.039957	1.031455	1.032844	0.968281
6	0.903709	0.906174	0.892708	0.901269	0.897027	0.939720	0.954082
7	0.987778	0.990707	1.014963	1.050599	1.015290	1.056278	0.916968
8	0.962102	0.944543	0.925711	0.934242	0.961486	0.963235	0.937306
9	1.025180	1.034207	1.028851	1.061582	1.021382	0.997408	0.945645
10	0.864839	0.879444	0.866002	0.860268	0.882059	0.903176	0.877987
11	1.026121	1.016678	1.006676	1.025923	1.029813	1.054445	0.974374
12	1.066715	1.078802	1.086855	1.074777	1.098704	1.082767	0.962467
13	0.912657	0.927705	0.919456	0.933019	0.914373	0.892304	1.049915
14	0.960091	0.955081	0.952893	0.968033	0.971832	0.971949	0.898844
15	1.014610	1.005765	0.995154	1.026589	1.030348	1.007847	1.027795
16	0.936305	0.960090	0.969868	0.978125	0.932842	0.953257	0.849989
17	0.968715	0.978110	0.968780	1.003761	0.963905	1.009469	0.984028
18	1.026594	0.972322	0.963828	0.990733	0.992096	0.951621	0.983280
19	0.906035	0.934353	0.924058	0.931046	0.944596	0.927455	0.931251
20	1.153014	1.157117	1.152325	1.167979	1.161337	1.186105	1.182163
21	1.018916	1.042071	1.102796	1.184638	1.059646	1.030637	1.055953
22	0.998337	1.010587	1.002333	1.036592	1.013084	1.065147	1.040864
23	0.970767	0.990813	0.993429	0.999777	1.022317	0.960106	0.912818
24	0.898563	0.910198	0.928346	0.931359	0.971286	0.971286	0.899371
25	1.101058	1.122674	1.122188	1.147670	1.131612	1.133874	1.182630
26	0.905662	0.927240	0.929007	0.974975	0.926293	0.924893	0.931853
27	1.076554	1.078623	1.086000	1.107094	1.092432	1.080861	0.975462
28	0.911745	0.899485	0.933249	0.954863	0.944321	0.937255	0.887756
29	1.278873	1.300430	1.264563	1.287667	1.264107	1.288997	1.319286
...	...	...	...	...	...	...	...
1108	0.824665	0.818785	0.828190	0.857140	0.851707	0.826038	0.893217
1109	0.877833	0.871966	0.900916	0.908297	0.917863	0.917566	0.888731
1110	0.913307	0.895742	0.907654	0.924261	0.911022	0.939010	0.889690
1111	0.890177	0.909035	0.920792	0.966972	0.954292	0.997018	0.816057
1112	0.869413	0.862565	0.894360	0.912791	0.899760	0.890375	0.889486
1113	0.910647	0.934896	0.941350	0.945469	0.970018	0.981674	0.996912
1114	0.941791	0.946309	0.974123	1.014387	0.990833	0.994785	0.913325
1115	0.880239	0.892297	0.877591	0.911228	0.886479	0.896507	0.880617
1116	0.935164	0.938772	0.969023	0.990525	0.973272	0.968305	0.936234
1117	0.804401	0.796536	0.812495	0.803534	0.808039	0.779075	0.891616
1118	1.003451	1.005641	0.971408	0.997005	0.995884	0.999130	1.016141
1119	1.109512	1.113757	1.089068	1.113132	1.180525	1.114830	1.049505
1120	0.853283	0.842644	0.857619	0.882108	0.883380	0.876703	0.896816
1121	1.053327	1.055018	1.047172	1.058848	1.046278	1.070476	1.042978
1122	0.839807	0.825289	0.864418	0.882356	0.866215	0.843092	0.890023
1123	0.810731	0.793734	0.781794	0.801653	0.816091	0.794254	0.912470
1124	1.016015	1.016341	1.012411	1.026898	1.007053	1.006702	1.053143
1125	1.006789	1.009246	0.987176	1.019357	1.035754	1.017252	1.012933
1126	0.900048	0.887136	0.923358	0.924773	0.995944	0.906718	0.887253
1127	0.866667	0.873464	0.842159	0.861137	0.864173	1.004010	0.921179
1128	1.124805	1.139015	1.148699	1.161558	1.185100	1.161221	1.026753
1129	0.781559	0.798718	0.808136	0.842453	0.816329	0.814878	0.815916
1130	0.923668	0.927178	0.956415	0.954501	0.976227	0.994896	0.909458
1131	0.887437	0.885006	0.895667	0.902937	0.895119	0.903921	0.949747
1132	0.868581	0.863576	0.876061	0.867810	0.904657	0.911519	1.015141
1133	1.269603	1.269768	1.266867	1.300112	1.267734	1.304537	1.370521
1134	0.859855	0.840592	0.885673	0.909632	0.899399	0.879226	0.896755
1135	0.822099	0.806193	0.823133	0.826042	0.821425	0.919244	0.888820
1136	0.935305	0.925420	0.940226	0.955089	0.946809	1.058379	0.981741
1137	0.948006	0.957404	0.961448	0.966228	0.941657	0.945153	0.905251