In [74]:
%matplotlib inline
In [77]:
import pandas as pd
import numpy as np
import os as os
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
GTEX = '/cellar/users/agross/Data/DiffExp/GTEX/'
In [61]:
GTEX_WEB = 'http://gtexportal.org/static/datasets/gtex_analysis_v4/'
files = [('annotations/GTEx_Data_V4_Annotations_SampleAttributesDS.txt',
'SampleAttributes.txt'),
('annotations/GTEx_Data_V4_Annotations_SubjectPhenotypes_DS.txt',
'SubjectPhenotypes.txt'),
('rna_seq_data/GTEx_Analysis_V4_RNA-seq_RNA-SeQCv1.1.8_gene_rpkm.gct.gz',
'rna_gene_rpkm.gct.gz')]
In [64]:
def download_files():
for f in files:
source = GTEX_WEB + f[0]
target = GTEX + f[1]
!curl $source -o $target
f = GTEX + files[2][1]
!gunzip $f
d = os.listdir(GTEX)
if (files[0][1] in d) & (files[1][1] in d) & (files[2][1][:-3] in d):
print 'Using downloaded files...'
else:
print 'Downloading GTEX files'
download_files()
In [54]:
meta = pd.read_table(GTEX + 'SampleAttributes.txt',
index_col=0)
gtex_pat = pd.read_table(GTEX + 'SubjectPhenotypes.txt',
index_col=0)
gtex = pd.read_table(GTEX + 'rna_gene_rpkm.gct', skiprows=2, index_col=[0,1])
gtex.index = gtex.index.get_level_values(1)
In [ ]:
tissue_type = meta['SMTSD']
In [84]:
def plot_tissues_across_gene(gene, log=True):
fig, ax = plt.subplots(figsize=(10,15))
if log:
v = np.log2(gtex.ix[gene]).dropna().clip_lower(-4)
else:
v = gtex.ix[gene].dropna()
sns.violinplot(v, groupby=tissue_type,
cut=.1, inner = 'points', ax=ax, vert=False)
In [88]:
#Do not import
plot_tissues_across_gene('GABRD', log=True)