In [2]:
cd ..
In [3]:
import NotebookImport
from DX_screen import *
In [4]:
cd ../DX/Notebooks/
In [5]:
from Imports import *
from Preprocessing.ClinicalDataFilters import *
uPA protease
In [6]:
paired_bp_tn_split??
In [7]:
cc = codes.ix[matched_rna.columns.get_level_values(0)].dropna().unique()
r = pd.DataFrame({c: ttest_rel(matched_rna.ix['PLAU'].ix[ti(codes==c)])
for c in cc}).T
In [8]:
fig, ax = subplots(figsize=(7,3))
cc = ['HNSC','LUSC','LUAD','BLCA','THCA','BRCA','COAD','READ']
paired_bp_tn_split(matched_rna.ix['PLAU'], codes[codes.isin(cc)], ax=ax)
fig.savefig('/cellar/users/agross/figures/plau.pdf')
In [ ]:
In [30]:
r.sort('p')
Out[30]:
In [10]:
ttest_rel(matched_rna.ix['PLAU'])
Out[10]:
TPA protease
In [5]:
paired_bp_tn_split(matched_rna.ix['PLAT'], codes)
Collagenase
In [31]:
paired_bp_tn_split(matched_rna.ix['MMP1'], codes)
elastases
In [6]:
g = ['CELA1','CELA2A','CELA2B','CELA3A','CELA3B','CTRC','ELANE','MMP12']
In [13]:
paired_bp_tn_split?
In [17]:
fig, axs = subplots(8, 1, figsize=(15,20), sharex=True)
for i,gene in enumerate(g):
paired_bp_tn_split(matched_rna.ix[gene], codes, ax=axs[i],
data_type='')
Cathepsin
In [27]:
g = ['CTSA','CTSB','CTSC','CTSD','CTSE','CTSF','CTSG','CTSH',
'CTSK','CTSL1','CTSL2','CTSO','CTSS','CTSW','CTSZ']
len(g)
Out[27]:
In [30]:
fig, axs = subplots(15, 1, figsize=(15,40), sharex=True)
for i,gene in enumerate(g):
paired_bp_tn_split(matched_rna.ix[gene], codes, ax=axs[i],
data_type='')
Is there a way for you to query TCGA about all extracellular proteases in an unbiased fashion? i.e. not by asking about specific proteases by name but asking about all extracellular proteases?
If yes, can you please help me do this?
If no, the data that you already have is really useful - can we put them in the same table, ranking the most highly expressed proteases for all cancers, with HNSCC being the first cancer on the x axis (similar to panel a in the figure inserted above).
In [ ]: