In [1]:
import theano
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import MDBN


Using gpu device 0: GeForce GTX 1070 (CNMeM is enabled with initial size: 75.0% of memory, cuDNN 5005)

In [2]:
import AML
me_DBN, ge_DBN, sm_DBN, dm_DBN, top_DBN = AML.load_network('Exp_2016-12-18_1435_run_0.npz','../MDBN_run')


Adding a layer with 559 input and 40 outputs
Adding a layer with 19937 input and 400 outputs
Adding a layer with 400 input and 40 outputs
Adding a layer with 1683 input and 40 outputs
Adding a layer with 120 input and 24 outputs
Adding a layer with 24 input and 3 outputs

In [3]:
datafiles = AML.prepare_AML_TCGA_datafiles('../data')

In [4]:
ME_output, _ = me_DBN.MLP_output_from_datafile(datafiles['ME'], datadir='../data')
GE_output, _ = ge_DBN.MLP_output_from_datafile(datafiles['GE'], datadir='../data')
SM_output, _ = sm_DBN.MLP_output_from_datafile(datafiles['SM'], datadir='../data')

In [5]:
joint_layer = np.concatenate([ME_output, GE_output, SM_output],axis=1)
plt.imshow(joint_layer, cmap='gray',interpolation='none')
plt.axis('tight')


Out[5]:
(-0.5, 119.5, 169.5, -0.5)

In [6]:
top_output = top_DBN.get_output(theano.shared(joint_layer,borrow=True))
plt.imshow((top_output>0.8)*np.ones_like(top_output)-(top_output<0.2)*np.ones_like(top_output),interpolation='none',extent=[0,3,385,0])
plt.colorbar()
plt.axis('tight')
plt.xticks(np.arange(0.5,3.5,1),('0','1','2'))


Out[6]:
([<matplotlib.axis.XTick at 0x7fb79f814450>,
  <matplotlib.axis.XTick at 0x7fb79f814e50>,
  <matplotlib.axis.XTick at 0x7fb79f769190>],
 <a list of 3 Text xticklabel objects>)

In [7]:
plt.imshow(top_output, interpolation='none',extent=[0,3,385,0])
plt.axis('tight')
plt.colorbar()
plt.xticks(np.arange(0.5,3.5,1),('0','1','2'))


Out[7]:
([<matplotlib.axis.XTick at 0x7fb79f714990>,
  <matplotlib.axis.XTick at 0x7fb79f71f090>,
  <matplotlib.axis.XTick at 0x7fb79f631910>],
 <a list of 3 Text xticklabel objects>)

In [8]:
plt.hist(top_output)


Out[8]:
([array([ 66.,   5.,   4.,   1.,   2.,   3.,   5.,   4.,   7.,  73.]),
  array([ 93.,   5.,   4.,   4.,   1.,   5.,   2.,   3.,   3.,  50.]),
  array([ 86.,   7.,   5.,   6.,   5.,   1.,   3.,   6.,   6.,  45.])],
 array([  2.86037452e-08,   9.99999661e-02,   1.99999904e-01,
          2.99999841e-01,   3.99999779e-01,   4.99999716e-01,
          5.99999654e-01,   6.99999591e-01,   7.99999529e-01,
          8.99999466e-01,   9.99999404e-01]),
 <a list of 3 Lists of Patches objects>)

In [25]:
plt.hist(top_output[:,2])


Out[25]:
(array([ 86.,   7.,   5.,   6.,   5.,   1.,   3.,   6.,   6.,  45.]),
 array([  1.25142790e-07,   9.99985033e-02,   1.99996881e-01,
          2.99995260e-01,   3.99993638e-01,   4.99992016e-01,
          5.99990394e-01,   6.99988772e-01,   7.99987150e-01,
          8.99985529e-01,   9.99983907e-01]),
 <a list of 10 Patch objects>)

In [26]:
code = (top_output[:,0:2] > 0.5) * np.ones_like(top_output[:,0:3])


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-26-9f2ab70cd261> in <module>()
----> 1 code = (top_output[:,0:2] > 0.5) * np.ones_like(top_output[:,0:3])

ValueError: operands could not be broadcast together with shapes (170,2) (170,3) 

In [15]:
from utils import find_unique_classes
U = find_unique_classes(code)
cl = U[0]

In [16]:
cl


Out[16]:
array([ 2.,  5.,  4.,  6.,  6.,  0.,  3.,  4.,  1.,  4.,  5.,  4.,  5.,
        4.,  6.,  4.,  4.,  2.,  0.,  1.,  1.,  5.,  0.,  0.,  1.,  1.,
        1.,  1.,  1.,  5.,  4.,  2.,  2.,  5.,  1.,  4.,  0.,  1.,  7.,
        1.,  6.,  0.,  4.,  3.,  7.,  7.,  4.,  4.,  1.,  1.,  2.,  4.,
        1.,  1.,  7.,  6.,  4.,  1.,  4.,  2.,  1.,  6.,  6.,  4.,  2.,
        0.,  4.,  6.,  1.,  6.,  4.,  6.,  1.,  1.,  7.,  4.,  4.,  4.,
        2.,  7.,  7.,  0.,  5.,  6.,  4.,  2.,  7.,  3.,  1.,  4.,  4.,
        6.,  1.,  5.,  4.,  0.,  6.,  2.,  0.,  1.,  0.,  3.,  6.,  7.,
        2.,  6.,  1.,  4.,  1.,  3.,  7.,  0.,  7.,  2.,  0.,  7.,  4.,
        6.,  4.,  1.,  4.,  4.,  0.,  1.,  6.,  1.,  4.,  4.,  4.,  4.,
        7.,  4.,  1.,  4.,  4.,  4.,  5.,  6.,  4.,  0.,  6.,  0.,  2.,
        1.,  2.,  4.,  2.,  1.,  4.,  1.,  4.,  6.,  2.,  5.,  4.,  2.,
        4.,  4.,  2.,  2.,  0.,  2.,  6.,  0.,  4.,  2.,  0.,  7.,  6.,  2.])

In [17]:
plt.hist(cl,bins=8)


Out[17]:
(array([ 19.,  32.,  22.,   5.,  46.,  10.,  22.,  14.]),
 array([ 0.   ,  0.875,  1.75 ,  2.625,  3.5  ,  4.375,  5.25 ,  6.125,  7.   ]),
 <a list of 8 Patch objects>)

Check Survival curves for the different classes


In [18]:
import csv
id=[]
with open('../data/'+datafiles['ME']) as f:
    my_csv = csv.reader(f,delimiter='\t')
    id = my_csv.next()

In [19]:
stat={}
with open('../data/AML/AML_clinical_data2.csv') as f:
    reader = csv.reader(f, delimiter=',')
    for row in reader:
        patient_id=row[0]
        stat[patient_id]=(row[4],row[7],row[6])

In [20]:
import re
time_list = []
event_list = []
group_list = []
print('The following case IDs were  not found in clinical data')
for index, key in enumerate(id[1:]):
    m = re.match('TCGA-\w+-\d+', key)
    patient_id = m.group(0)
    if patient_id in stat:
        patient_stat = stat[patient_id]
        add_group = True
        try:
            time_list.append(float(patient_stat[2]))
            event_list.append(1)
        except ValueError:
            try:
                time_list.append(float(patient_stat[1]))
                event_list.append(0)
            except ValueError:
                print('No data for %s' % patient_id)
                add_group = False
        if add_group:
            group_list.append(cl[index])
    else:
        print(patient_id)


The following case IDs were  not found in clinical data
No data for TCGA-AB-2887
No data for TCGA-AB-2891
No data for TCGA-AB-2918
No data for TCGA-AB-2921
No data for TCGA-AB-2930
No data for TCGA-AB-2940
No data for TCGA-AB-2943
No data for TCGA-AB-2946
No data for TCGA-AB-2975

In [21]:
from lifelines import KaplanMeierFitter
kmf = KaplanMeierFitter()
kmf.fit(time_list,event_observed=event_list)
kmf.plot()


Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fb79c3ecc90>

In [22]:
T=np.array(time_list)
E=np.array(event_list)
ix = (np.array(group_list) == 0)
kmf.fit(T[ix], E[ix], label='group 0')
ax=kmf.plot()
for i in range(1,4):
    ix=(np.array(group_list)==i)
    kmf.fit(T[ix], E[ix], label='group %d' % i)
    kmf.plot(ax=ax)



In [23]:
T=np.array(time_list)
E=np.array(event_list)
ix = (np.array(group_list) == 4)
kmf.fit(T[ix], E[ix], label='group 4')
ax=kmf.plot()
for i in range(5,8):
    ix=(np.array(group_list)==i)
    kmf.fit(T[ix], E[ix], label='group %d' % i)
    kmf.plot(ax=ax)



In [ ]: