In [ ]:
import os
import numpy as np
from bionlp.util import io, func, plot
%matplotlib inline

In [ ]:
DATA_PATH = '../../data'
# fig_caches = ['do_dron_go_roc/roc_disease_ontology.npz', 'orig_do_dron_go_roc/roc_disease_ontology.npz']
# fig_caches = ['do_dron_go_roc/roc_drug_ontology.npz', 'orig_do_dron_go_roc/roc_drug_ontology.npz']
# fig_caches = ['do_dron_go_roc/roc_gene_ontology.npz', 'orig_do_dron_go_roc/roc_gene_ontology.npz']
# fig_caches = ['dgidb_roc/roc_dgidb_ontology.npz', 'orig_dgidb_roc/roc_dgidb_ontology.npz']
# fig_caches = ['disgenet_roc/roc_disgenet_ontology.npz', 'orig_disgenet_roc/roc_disgenet_ontology.npz']
# fig_caches = ['disgenet_dgidb_roc/roc_disgenet_and_dgidb_ontology.npz', 'orig_disgenet_dgidb_roc/roc_disgenet_and_dgidb_ontology.npz']

# fig_caches = ['do_dron_go_roc/roc_disease_ontology.npz', 'orig_do_dron_go_roc_r10/roc_disease_ontology.npz']
# fig_caches = ['do_dron_go_roc/roc_drug_ontology.npz', 'orig_do_dron_go_roc_r10/roc_drug_ontology.npz']
# fig_caches = ['do_dron_go_roc/roc_gene_ontology.npz', 'orig_do_dron_go_roc_r10/roc_gene_ontology.npz']
# fig_caches = ['dgidb_roc/roc_dgidb_ontology.npz', 'orig_dgidb_roc_r10/roc_dgidb_ontology.npz']
# fig_caches = ['disgenet_roc/roc_disgenet_ontology.npz', 'orig_disgenet_roc_r10/roc_disgenet_ontology.npz']
# fig_caches = ['disgenet_dgidb_roc/roc_disgenet_and_dgidb_ontology.npz', 'orig_disgenet_dgidb_roc_r10/roc_disgenet_and_dgidb_ontology.npz']

# fig_caches = ['do_dron_go_roc/roc_disease_ontology.npz', 'orig_do_dron_go_roc_r50/roc_disease_ontology.npz']
# fig_caches = ['do_dron_go_roc/roc_drug_ontology.npz', 'orig_do_dron_go_roc_r50/roc_drug_ontology.npz']
# fig_caches = ['do_dron_go_roc/roc_gene_ontology.npz', 'orig_do_dron_go_roc_r50/roc_gene_ontology.npz']
# fig_caches = ['dgidb_roc/roc_dgidb_ontology.npz', 'orig_dgidb_roc_r50/roc_dgidb_ontology.npz']
# fig_caches = ['disgenet_roc/roc_disgenet_ontology.npz', 'orig_disgenet_roc_r50/roc_disgenet_ontology.npz']
# fig_caches = ['disgenet_dgidb_roc/roc_disgenet_and_dgidb_ontology.npz', 'orig_disgenet_dgidb_roc_r50/roc_disgenet_and_dgidb_ontology.npz']

# fig_caches = ['do_dron_go_roc/roc_gene_ontology.npz', 'roc_gene_regulation.npz']
# fig_caches = ['gse_cv/roc_all.npz', 'orig_gse_cv/roc_all.npz']
# fig_caches = ['gsm_cv/0/roc_0.npz', 'orig_gsm_cv/0/roc_all.npz']
# fig_caches = ['gsm_cv/1/roc_1.npz', 'orig_gsm_cv/1/roc_all.npz']
# fig_caches = ['gsm_cv/2/roc_2.npz', 'orig_gsm_cv/2/roc_all.npz']
# fig_caches = ['gse_cv/prc_all.npz', 'orig_gse_cv/prc_all.npz']
# fig_caches = ['gsm_cv/0/prc_0.npz', 'orig_gsm_cv/0/prc_all.npz']
# fig_caches = ['gsm_cv/1/prc_1.npz', 'orig_gsm_cv/1/prc_all.npz']
# fig_caches = ['gsm_cv/2/prc_2.npz', 'orig_gsm_cv/2/prc_all.npz']

plot.MON = False
proc_label = True

In [ ]:
# Extract data
merged_data = {}
for fc in fig_caches:
    fig_file = io.read_npz(os.path.join(DATA_PATH, fc))
    plot.plot_data(fig_file)
    for k, v in fig_file.iteritems():
        merged_data.setdefault(k, []).append(v)

In [ ]:
# Process the group information
if (merged_data.has_key('groups') and all([x.item() is not None for x in merged_data['groups']])):
    num_data = [x.shape[0] for x in merged_data['data']]
    cumnum = np.cumsum(num_data)
    for i in range(1, len(merged_data['groups'])):
        merged_data['groups'][i] += cumnum[i-1]

# Process the labels
lb_prefix = ['GeSgnExt-', 'CREEDS-']
if (merged_data.has_key('labels') and proc_label):
    for prfx, i in zip(lb_prefix, range(len(merged_data['labels']))):
        merged_data['labels'][i] = np.array([prfx+lb for lb in merged_data['labels'][i]])

# Merge the parameters
for k, v in merged_data.iteritems():
    if (v[0].shape):
        merged_data[k] = np.concatenate(v)
    else:
        merged_data[k] = v[0]

In [ ]:
nomargin_box = dict(xlim=[0,1], ylim=[0,1])
roc_zoomin_box = dict(xlim=[0,0.4], ylim=[0,1])
prc_zoomin_box = dict(xlim=[0.4,1], ylim=[0,1])
plot.plot_data(merged_data, title='', fmt='pdf', plot_cfg=func.update_dict(dict(legend_fontsize=10, cmap='jet'), nomargin_box))