This is a demo to run Deopen


In [1]:
#glance at original data (bed format) 
!wc -l ./Deopen/demo/positive.bed
! head -n 5 ./Deopen/demo/positive.bed
!wc -l ./Deopen/demo/positive.bed
! head -n 5 ./Deopen/demo/negative.bed


10000 ./Deopen/demo/positive.bed
chr1	9995	10995	id1	1	+	.
chr1	564753	565753	id2	1	+	.
chr1	565935	566935	id3	1	+	.
chr1	567904	568904	id4	1	+	.
chr1	569403	570403	id5	1	+	.
10000 ./Deopen/demo/positive.bed
chr1	100783702	100784702	id2	1	+	.
chr1	121471114	121472114	id3	1	+	.
chr1	219901357	219902357	id4	1	+	.
chr1	26268350	26269350	id5	1	+	.
chr1	190807427	190808427	id6	1	+	.

In [2]:
#generate date for model training 
! python ./Deopen/src/Gen_data.py -pos ./Deopen/demo/positive.bed -neg ./Deopen/demo/negative.bed -genome ./genome.fa -out ./Deopen/demo/preprocessd_data.hkl


Training data generation is finished!

In [3]:
#glance at the preprocessed data(including label, one-hot coding matrix, kmer features, etc)
import hickle as hkl
data = hkl.load('./Deopen/demo/preprocessd_data.hkl')
data.keys()


Out[3]:
['y', 'spot', 'kmer', 'seq', 'mat']

In [4]:
#perform classification experiment
! THEANO_FLAGS='device=gpu,floatX=float32' python ./Deopen/src/Deopen_classification.py -in ./Deopen/demo/preprocessd_data.hkl -out ./Deopen/demo/Deopen_pre.hkl


Using gpu device 0: Tesla K80 (CNMeM is enabled with initial size: 90.0% of memory, cuDNN 5110)
/opt/anaconda2/lib/python2.7/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.
  warnings.warn(warn)
/opt/anaconda2/lib/python2.7/site-packages/theano/tensor/signal/downsample.py:6: UserWarning: downsample module has been moved to the theano.tensor.signal.pool module.
  "downsample module has been moved to the theano.tensor.signal.pool module.")
Using Lasagne.layers.dnn (faster)
Initializing weights (1/5) ...
Initializing weights (2/5) ...
Initializing weights (3/5) ...
Initializing weights (4/5) ...
Initializing weights (5/5) ...
Loading pre-training weights...
Loaded parameters to layer 'conv2ddnn2' (shape 128x1x4x13).
Loaded parameters to layer 'conv2ddnn2' (shape 128).
Loaded parameters to layer 'conv2ddnn3' (shape 128x128x1x13).
Loaded parameters to layer 'conv2ddnn3' (shape 128).
Loaded parameters to layer 'conv2ddnn4' (shape 128x128x1x13).
Loaded parameters to layer 'conv2ddnn4' (shape 128).
Loaded parameters to layer 'conv2ddnn6' (shape 128x128x1x7).
Loaded parameters to layer 'conv2ddnn6' (shape 128).
Loaded parameters to layer 'conv2ddnn7' (shape 128x128x1x7).
Loaded parameters to layer 'conv2ddnn7' (shape 128).
Loaded parameters to layer 'conv2ddnn8' (shape 128x128x1x7).
Loaded parameters to layer 'conv2ddnn8' (shape 128).
Loaded parameters to layer 'conv2ddnn10' (shape 128x128x1x5).
Loaded parameters to layer 'conv2ddnn10' (shape 128).
Loaded parameters to layer 'conv2ddnn11' (shape 128x128x1x5).
Loaded parameters to layer 'conv2ddnn11' (shape 128).
Loaded parameters to layer 'conv2ddnn12' (shape 128x128x1x5).
Loaded parameters to layer 'conv2ddnn12' (shape 128).
Loaded parameters to layer 'dense14' (shape 512x256).
Loaded parameters to layer 'dense14' (shape 256).
Loaded parameters to layer 'dense18' (shape 4096x128).
Loaded parameters to layer 'dense18' (shape 128).
Loaded parameters to layer 'dense21' (shape 384x256).
Loaded parameters to layer 'dense21' (shape 256).
Loaded parameters to layer 'dense22' (shape 256x2).
Loaded parameters to layer 'dense22' (shape 2).
Continue to train...
# Neural Network with 1778434 learnable parameters

## Layer information

  #    name  size
---  ------  ---------
  0          1x4x2024
  1          1x4x1000
  2          128x1x988
  3          128x1x976
  4          128x1x964
  5          128x1x192
  6          128x1x186
  7          128x1x180
  8          128x1x174
  9          128x1x34
 10          128x1x30
 11          128x1x26
 12          128x1x22
 13          128x1x4
 14          256
 15          1x4x1024
 16          1x4x1024
 17          4096
 18          128
 19          384
 20          384
 21          256
 22          2

  epoch    trn loss    val loss    trn/val    valid acc  dur
-------  ----------  ----------  ---------  -----------  ------
      1     0.20303     0.25412    0.79896      0.90061  38.53s
      2     0.17277     0.25786    0.67001      0.90061  38.49s
      3     0.13865     0.28575    0.48522      0.89395  38.89s
      4     0.10612     0.31969    0.33194      0.89728  38.60s
      5     0.07984     0.36698    0.21757      0.90006  38.61s
      6     0.05715     0.41298    0.13838      0.89617  38.60s
Early stopping.
Best valid loss was 0.254116 at epoch 1.
Loaded parameters to layer 'conv2ddnn2' (shape 128x1x4x13).
Loaded parameters to layer 'conv2ddnn2' (shape 128).
Loaded parameters to layer 'conv2ddnn3' (shape 128x128x1x13).
Loaded parameters to layer 'conv2ddnn3' (shape 128).
Loaded parameters to layer 'conv2ddnn4' (shape 128x128x1x13).
Loaded parameters to layer 'conv2ddnn4' (shape 128).
Loaded parameters to layer 'conv2ddnn6' (shape 128x128x1x7).
Loaded parameters to layer 'conv2ddnn6' (shape 128).
Loaded parameters to layer 'conv2ddnn7' (shape 128x128x1x7).
Loaded parameters to layer 'conv2ddnn7' (shape 128).
Loaded parameters to layer 'conv2ddnn8' (shape 128x128x1x7).
Loaded parameters to layer 'conv2ddnn8' (shape 128).
Loaded parameters to layer 'conv2ddnn10' (shape 128x128x1x5).
Loaded parameters to layer 'conv2ddnn10' (shape 128).
Loaded parameters to layer 'conv2ddnn11' (shape 128x128x1x5).
Loaded parameters to layer 'conv2ddnn11' (shape 128).
Loaded parameters to layer 'conv2ddnn12' (shape 128x128x1x5).
Loaded parameters to layer 'conv2ddnn12' (shape 128).
Loaded parameters to layer 'dense14' (shape 512x256).
Loaded parameters to layer 'dense14' (shape 256).
Loaded parameters to layer 'dense18' (shape 4096x128).
Loaded parameters to layer 'dense18' (shape 128).
Loaded parameters to layer 'dense21' (shape 384x256).
Loaded parameters to layer 'dense21' (shape 256).
Loaded parameters to layer 'dense22' (shape 256x2).
Loaded parameters to layer 'dense22' (shape 2).
Model training finished.
Accuracy score is 0.9075
ROC AUC score is 0.966586412415

In [5]:
#evaluate performance using ROC curve
import hickle as hkl
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import precision_recall_curve
[y_prob,y_test] = hkl.load('./Deopen/demo/Deopen_pre.hkl')
AUC = metrics.roc_auc_score(y_test, y_prob)
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_prob)
precision, recall, thresholds = precision_recall_curve(y_test, y_prob)
plt.plot(fpr,tpr,label='AUC=%1.3f'%AUC)
plt.legend(loc=4) 
plt.show()


Out[5]:
[<matplotlib.lines.Line2D at 0x7f7266234b90>]
Out[5]:
<matplotlib.legend.Legend at 0x7f728efa2650>

In [ ]: