Create Run Object for Firehose Download


In [1]:
import NotebookImport
from Imports import *


importing IPython notebook from Imports
Populating the interactive namespace from numpy and matplotlib
changing to source dirctory

In [3]:
from Data.Containers import Run, Cancer

In [4]:
data_path = '{}/Firehose__{}/'.format(OUT_PATH, RUN_DATE)
result_path = data_path + 'ucsd_analyses/'

In [5]:
cancer_codes = pd.read_table('../ExtraData/diseaseStudy.txt',
                             index_col=0, squeeze=True)
run_dir = 'http://gdac.broadinstitute.org/runs'
f = '{}/analyses__{}/ingested_data.tsv'.format(run_dir, RUN_DATE)
sample_matrix = pd.read_table(f, index_col=0).dropna()
sample_matrix = sample_matrix.ix[[c for c in sample_matrix.index if 
                                  c not in ['PANCAN12', 'COADREAD','Totals']]]

In [6]:
run = Run(RUN_DATE, VERSION, data_path, result_path, PARAMETERS, 
          cancer_codes, sample_matrix, DESCRIPTION)
run.save()
run


Out[6]:
Run object for TCGA Analysis
Firehose run date: 2014_07_15
Code version: all
Comment: Updating analysis for updated dataset.

Initialize Data Objects into the File Hierarchy


In [8]:
cancer_obj = Cancer(c, run) 
cancer_obj.initialize_data(run, save=True)

In [102]:
from Processing.InitializeReal import initialize_real
from Processing.InitializeMut import initialize_mut
from Processing.InitializeCN import initialize_cn

In [103]:
def init(c, run):
    try:
        cancer_obj = Cancer(c, run)   
        cancer_obj.initialize_data(run, save=True)
    except:
        print c + '\t' + 'all'
    try:
        initialize_real(c, run.report_path, 'mRNASeq', 
                        create_meta_features=True)
    except:
        print c + '\t' + 'mRNASeq'
    try:
        initialize_real(c, run.report_path, 'RPPA', 
                        create_meta_features=True, create_real_features=False)
    except:
        print c + '\t' + 'RPPA'
    try:
        initialize_real(c, run.report_path, 'miRNASeq', 
                        create_meta_features=False)
    except:
        print c + '\t' + 'miRNASeq'
    try:
        initialize_cn(c, run.report_path, 'CN_broad')
    except:
        print c + '\t' + 'CN' 
    try:
        initialize_mut(c, run.report_path, create_meta_features=True);
    except:
        print c + '\t' + 'mut'

In [104]:
for cancer in run.cancers:
    init(cancer, run)


ACC	mRNASeq
ACC	RPPA
ACC	miRNASeq
BRCA	RPPA
CESC	RPPA
CHOL	all
CHOL	mRNASeq
CHOL	RPPA
CHOL	miRNASeq
CHOL	CN
CHOL	mut
DLBC	all
DLBC	mRNASeq
DLBC	RPPA
DLBC	miRNASeq
DLBC	CN
DLBC	mut
ESCA	mRNASeq
ESCA	RPPA
ESCA	mut
GBM	miRNASeq
KICH	RPPA
KIRP	RPPA
LAML	all
LAML	mRNASeq
LAML	RPPA
LAML	miRNASeq
LAML	CN
LAML	mut
LIHC	RPPA
MESO	mRNASeq
MESO	RPPA
MESO	miRNASeq
MESO	mut
PAAD	RPPA
PCPG	RPPA
SARC	RPPA
SARC	mut
SKCM	RPPA
STAD	mRNASeq
STAD	RPPA
TGCT	all
TGCT	mRNASeq
TGCT	RPPA
TGCT	miRNASeq
TGCT	CN
TGCT	mut
THYM	all
THYM	mRNASeq
THYM	RPPA
THYM	miRNASeq
THYM	CN
THYM	mut
UCS	mRNASeq
UCS	RPPA
UCS	miRNASeq