In [2]:

    
cd ..









    



/cellar/users/agross/TCGA_Code/DX/Notebooks



In [3]:

    
import NotebookImport
from DX_screen import *









    




importing IPython notebook from DX_screen






    




importing IPython notebook from Imports






    




importing IPython notebook from /cellar/users/agross/anaconda2/lib/python2.7/site-packages/MethylTools/Probe_Annotations



In [4]:

    
cd ../DX/Notebooks/









    



/cellar/users/agross/TCGA_Code/DX/Notebooks



In [5]:

    
from Imports import *
from Preprocessing.ClinicalDataFilters import *









    




importing IPython notebook from Preprocessing/ClinicalDataFilters






    




importing IPython notebook from Preprocessing/ClinicalData






    



PCPG
UCS
TGCT
CHOL
THYM
MESO
FPPP






    



/cellar/users/agross/anaconda2/lib/python2.7/site-packages/Processing/ProcessClinicalDataPortal.py:37: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index,col_indexer] = value instead
  f['vitalstatus'] = f['daystodeath'].isnull()

uPA protease



In [6]:

    
paired_bp_tn_split??



In [7]:

    
cc = codes.ix[matched_rna.columns.get_level_values(0)].dropna().unique()
r = pd.DataFrame({c: ttest_rel(matched_rna.ix['PLAU'].ix[ti(codes==c)])
              for c in cc}).T



In [8]:

    
fig, ax = subplots(figsize=(7,3))
cc = ['HNSC','LUSC','LUAD','BLCA','THCA','BRCA','COAD','READ']
paired_bp_tn_split(matched_rna.ix['PLAU'], codes[codes.isin(cc)], ax=ax)
fig.savefig('/cellar/users/agross/figures/plau.pdf')



In [ ]:



In [30]:

    
r.sort('p')



In [10]:

    
ttest_rel(matched_rna.ix['PLAU'])









    Out[10]:





p    6.13e-16
t    8.29e+00
dtype: float64

TPA protease



In [5]:

    
paired_bp_tn_split(matched_rna.ix['PLAT'], codes)

Collagenase



In [31]:

    
paired_bp_tn_split(matched_rna.ix['MMP1'], codes)

elastases



In [6]:

    
g = ['CELA1','CELA2A','CELA2B','CELA3A','CELA3B','CTRC','ELANE','MMP12']



In [13]:

    
paired_bp_tn_split?



In [17]:

    
fig, axs = subplots(8, 1, figsize=(15,20), sharex=True)
for i,gene in enumerate(g):
    paired_bp_tn_split(matched_rna.ix[gene], codes, ax=axs[i],
                      data_type='')

Cathepsin



In [27]:

    
g = ['CTSA','CTSB','CTSC','CTSD','CTSE','CTSF','CTSG','CTSH',
     'CTSK','CTSL1','CTSL2','CTSO','CTSS','CTSW','CTSZ']
len(g)









    Out[27]:





15



In [30]:

    
fig, axs = subplots(15, 1, figsize=(15,40), sharex=True)
for i,gene in enumerate(g):
    paired_bp_tn_split(matched_rna.ix[gene], codes, ax=axs[i],
                      data_type='')

Is there a way for you to query TCGA about all extracellular proteases in an unbiased fashion? i.e. not by asking about specific proteases by name but asking about all extracellular proteases?

If yes, can you please help me do this?

If no, the data that you already have is really useful - can we put them in the same table, ranking the most highly expressed proteases for all cancers, with HNSCC being the first cancer on the x axis (similar to panel a in the figure inserted above).



In [ ]:

	p	t
THCA	9.19e-16	10.96
HNSC	1.17e-14	11.58
LUAD	7.37e-14	9.82
LUSC	9.43e-14	10.46
KIRC	8.75e-13	-8.69
COAD	2.75e-12	9.88
BRCA	3.31e-12	7.82
PRAD	8.73e-12	-8.79
KICH	1.23e-10	-10.73
LIHC	8.18e-06	4.98
KIRP	5.05e-05	-4.70
CESC	9.36e-04	32.66
BLCA	2.60e-03	3.49
READ	7.71e-03	3.53
CHOL	1.70e-02	3.00
PCPG	3.34e-02	5.34
UCEC	8.09e-02	1.83
THYM	2.58e-01	-2.33
PAAD	3.58e-01	1.18
SARC	9.74e-01	0.04