In [1]:
%run ~/relmapping/annot/notebooks/__init__.ipynb


/mnt/home3/jj374/anaconda36/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
os.getcwd(): /mnt/b2/scratch/ahringer/jj374/lab/relmapping

In [2]:
# (Boeck et al., 2016)
#!cd ~/relmapping/wget; wget -m --no-parent http://genome.cshlp.org/content/suppl/2016/09/20/gr.202663.115.DC1/Supplemental_Table_S13.gz
fp_ = 'wget/genome.cshlp.org/content/suppl/2016/09/20/gr.202663.115.DC1/Supplemental_Table_S13.txt'
df_expr = pd.read_csv(fp_, delim_whitespace=True)#, index_col='WormbaseName')
len(df_expr)


Out[2]:
20391

In [3]:
#!cd ~/relmapping/wget; wget -m --no-parent http://genome.cshlp.org/content/suppl/2016/09/20/gr.202663.115.DC1/Supplemental_Fig_S4.docx
print(len(df_expr.query('L1 > 0.02'))) # eyeballed from fig4: 13.5K for L1
print(len(df_expr.query('DE > 0.02'))) # eyeballed from fig5: 15K for DE


13316
15028

In [4]:
# Select time points roughly matched with wt LE to YA
df_expr[df_expr.columns[16:24]].head()


Out[4]:
587min 626min 665min L1 L2 L3 L4 YA
0 0.3078 0.3364 0.3074 0.113352 0.079734 0.025379 0.050510 0.045095
1 2.8029 3.1333 2.8126 0.896642 0.611569 0.415548 0.387748 0.299345
2 1.4648 1.2548 1.1278 0.217031 0.269412 0.165271 0.116517 0.041802
3 0.0832 0.0676 0.0682 0.108792 0.111028 0.152491 0.252174 0.475804
4 0.5604 0.5636 0.5500 0.012563 0.532903 0.052147 0.254605 0.016723

In [6]:
# Calculate numnber of genes with dcpm > 0.02 (as used in Boeck et al., 2016) in any stage
n_ = sum((df_expr[df_expr.columns[16:24]] > 0.02).any(axis=1))
print("%d of %d genes with dcpm > 0.02 in development (Fig S4 threshold)" % (n_, len(df_expr)))
n_ = sum((df_expr[df_expr.columns[16:24]] > 0.07).any(axis=1))
print("%d of %d genes with dcpm > 0.07 in development (Fig 1 & DE analyses' threshold)" % (n_, len(df_expr)))


18173 of 20391 genes with dcpm > 0.02 in development (Fig S4 threshold)
15746 of 20391 genes with dcpm > 0.07 in development (Fig 1 & DE analyses' threshold)

In [ ]: