In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [2]:
c = sns.plotting_context('poster')
c['figure.figsize'] = np.array([10.66, 10.66])
c['axes.titlesize'] = 22
sns.set_context(c)
sns.set_style('whitegrid')
sns.plotting_context()
Out[2]:
In [3]:
label_loc = (-.1, 1.05)
In [4]:
pd.set_option('display.max_columns', 999)
In [5]:
ai = pd.read_csv('../../data/all_cis_calls_sbs.csv')
ai.head()
Out[5]:
In [6]:
cis = pd.read_csv('../../data/cis_trans_estimates_v13.csv')
cis.head()
Out[6]:
In [7]:
cis_sbs = cis.set_index(['line', 'fusion_id', 'mating_status']).unstack()
cis_sbs.columns = ['_'.join(col).strip() for col in cis_sbs.columns.values]
cis_sbs.reset_index(inplace=True)
cis_sbs.head()
Out[7]:
In [8]:
variance = pd.read_csv('../../data/cis_trans_variance_data.csv')
variance.head()
Out[8]:
In [9]:
r2_data = pd.read_csv('../../data/cis_trans_r2_data.csv')
r2_data.head()
Out[9]:
In [10]:
r2_data_sbs = r2_data.set_index(['fusion_id', 'mating_status']).unstack()
r2_data_sbs.columns = ['_'.join(col).strip() for col in r2_data_sbs.columns.values]
r2_data_sbs.reset_index(inplace=True)
r2_data_sbs.head()
Out[10]:
In [11]:
def giveProp(x):
V = x.flag_AI_V.sum() / x.flag_AI_V.count()
M = x.flag_AI_M.sum() / x.flag_AI_M.count()
return pd.Series(data=[V, M], index=['Virgin', 'Mated'])
grp = ai.groupby('line')
propAI = grp.apply(giveProp)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(21.83, 6))
#fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10.66, 10.66))
ax1.axvline(0.5, color='r', alpha=0.4, ls='--')
sns.distplot(ai.q5_mean_theta_V, ax=ax1, hist=False, kde_kws={'lw': 4}, label='Allelic Imbalance Virgin')
sns.distplot(ai.q5_mean_theta_M, ax=ax1, hist=False, kde_kws={'lw': 4}, label='Allelic Imbalance Mated')
ax1.set_xlim(0,1)
ax1.set_xlabel('Allelic Imbalance')
ax1.set_ylabel('Density')
propAI.sort_values(by='Virgin', inplace=True, ascending=False)
propAI.plot.bar(ax=ax2)
ax1.text(label_loc[0], label_loc[1], 'A', transform=ax1.transAxes, fontweight='bold')
ax2.text(label_loc[0], label_loc[1], 'B', transform=ax2.transAxes, fontweight='bold')
plt.tight_layout(pad=2)
plt.savefig('../../output/AI_density.eps')
In [12]:
r324 = ai[ai.line == 'r324']
r365 = ai[ai.line == 'r365']
w47 = ai[ai.line == 'w47']
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, figsize=(21.83, 8))
ax1.plot([0, 1], [0, 1])
sns.regplot(r324.q5_mean_theta_M, r324.q5_mean_theta_V, ax=ax1, ci=None, line_kws={'color': 'r'}, scatter_kws={'c': 'k'})
ax1.set_xlim(0, 1)
ax1.set_ylim(0, 1)
ax1.set_title('r324')
ax1.set_ylabel('Allelic Imbalance\nVirgin')
ax1.set_xlabel('Allelic Imbalance\nMated')
ax2.plot([0, 1], [0, 1])
sns.regplot(r365.q5_mean_theta_M, r365.q5_mean_theta_V, ax=ax2, ci=None, line_kws={'color': 'r'}, scatter_kws={'c': 'k'})
ax2.set_xlim(0, 1)
ax2.set_ylim(0, 1)
ax2.set_title('r365')
ax2.set_xlabel('Allelic Imbalance\nMated')
ax2.set_ylabel('')
ax3.plot([0, 1], [0, 1])
sns.regplot(w47.q5_mean_theta_M, w47.q5_mean_theta_V, ax=ax3, ci=None, line_kws={'color': 'r'}, scatter_kws={'c': 'k'})
ax3.set_xlim(0, 1)
ax3.set_ylim(0, 1)
ax3.set_title('w47')
ax3.set_ylabel('')
ax3.set_xlabel('Allelic Imbalance\nMated')
ax1.text(label_loc[0], label_loc[1], 'A', transform=ax1.transAxes, fontweight='bold')
ax2.text(label_loc[0], label_loc[1], 'B', transform=ax2.transAxes, fontweight='bold')
ax3.text(label_loc[0], label_loc[1], 'C', transform=ax3.transAxes, fontweight='bold')
plt.tight_layout(pad=2)
plt.savefig('../../output/AI_mated_vs_virgin_3Lines.eps')
In [13]:
cisM = cis[cis.mating_status == 'M']
cisV = cis[cis.mating_status == 'V']
varianceM = variance[variance.mating_status == 'M']
varianceV = variance[variance.mating_status == 'V']
In [33]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
ax1.set_title('Virgin Estimate')
cisV.c_i.plot(kind='kde', ax=ax1, xlim=(-20, 20), label='cis')
cisV.T_i_1a.plot(kind='kde', ax=ax1, label='trans')
ax2.set_title("Virgin Variance")
varianceV.cis_var.plot(kind='kde', ax=ax2, xlim=(-2000, 2000), label='cis')
varianceV.trans_var.plot(kind='kde', ax=ax2, label='trans')
ax3.set_title('Mated Estimate')
cisM.c_i.plot(kind='kde', ax=ax3, xlim=(-20, 20), label='cis')
cisM.T_i_1a.plot(kind='kde', ax=ax3, label='trans')
ax4.set_title('Mated Variance')
varianceM.cis_var.plot(kind='kde', ax=ax4, xlim=(-2000, 2000), label='cis')
varianceM.trans_var.plot(kind='kde', ax=ax4, label='trans')
ax1.text(label_loc[0], label_loc[1], 'A', transform=ax1.transAxes, fontweight='bold')
ax2.text(label_loc[0], label_loc[1], 'B', transform=ax2.transAxes, fontweight='bold')
ax3.text(label_loc[0], label_loc[1], 'C', transform=ax3.transAxes, fontweight='bold')
ax4.text(label_loc[0], label_loc[1], 'D', transform=ax4.transAxes, fontweight='bold')
_ = ax1.set_xticklabels('')
_ = ax2.set_xticklabels('')
_ = ax1.set_yticklabels('')
_ = ax2.set_yticklabels('')
_ = ax3.set_yticklabels('')
_ = ax4.set_yticklabels('')
_ = ax2.set_xticks([-2000, -1000, 0, 1000, 2000])
_ = ax4.set_xticks([-2000, -1000, 0, 1000, 2000])
_ = ax3.set_xlabel('Mean cis/trans Estimate')
_ = ax4.set_xlabel('Mean cis/trans Variance')
_ = ax2.set_ylabel('')
_ = ax4.set_ylabel('')
handles, labels = ax1.get_legend_handles_labels()
plt.legend(handles, labels, loc=(1.1, 1))
plt.tight_layout(pad=2, rect=[0, 0, .9, 1])
plt.savefig('../../output/estimate_and_variance_distribution.eps')
In [15]:
Men = 'S52856_SI'
In [16]:
hai = cis_sbs[cis_sbs.fusion_id == Men]
hai.head()
Out[16]:
In [17]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
#hai[['T_i_1a_M', 'c_i_M']].corr()
ax1.set_xticks([-.20, -.10, 0, .10, .20])
ax2.set_xticks([-.20, -.10, 0, .10, .20])
ax3.set_xticks([-.06, -.03, 0, .03, .06])
ax4.set_xticks([-.20, -.10, 0, .10, .20])
ax1.set_title('Virgin')
sns.regplot(hai.T_i_1a_V, hai.c_i_V, ax=ax1, ci=False, line_kws={'color': 'r'}, scatter_kws={'color': 'k'})
cc = round(hai[['T_i_1a_V', 'c_i_V']].corr(), 4)
ax1.text(.1, .9, r'$\rho = {}$'.format(cc.values[0,1]), transform=ax1.transAxes, fontsize=18)
ax2.set_title('Mated')
sns.regplot(hai.T_i_1a_M, hai.c_i_M, ax=ax2, ci=False, line_kws={'color': 'r'}, scatter_kws={'color': 'k'})
cc = round(hai[['T_i_1a_M', 'c_i_M']].corr(), 4)
ax2.text(.1, .9, r'$\rho = {}$'.format(cc.values[0,1]), transform=ax2.transAxes, fontsize=18)
ax3.set_title('Cis')
sns.regplot(hai.c_i_V, hai.c_i_M, ax=ax3, ci=False, line_kws={'color': 'r'}, scatter_kws={'color': 'k'})
cc = round(hai[['c_i_V', 'c_i_M']].corr(), 4)
ax3.text(.1, .9, r'$\rho = {}$'.format(cc.values[0,1]), transform=ax3.transAxes, fontsize=18)
ax4.set_title('Trans')
sns.regplot(hai.T_i_1a_V, hai.T_i_1a_M, ax=ax4, ci=False, line_kws={'color': 'r'}, scatter_kws={'color': 'k'})
cc = round(hai[['T_i_1a_V', 'T_i_1a_M']].corr(), 4)
ax4.text(.1, .9, r'$\rho = {}$'.format(cc.values[0,1]), transform=ax4.transAxes, fontsize=18)
ax1.set_xlabel('Trans Effects')
ax1.set_ylabel('Cis Effects')
ax2.set_xlabel('Trans Effects')
ax2.set_ylabel('Cis Effects')
ax3.set_xlabel('Virgin Trans Effects')
ax3.set_ylabel('Mated Trans Effects')
ax4.set_xlabel('Virgin Cis Effects')
ax4.set_ylabel('Mated Cis Effects')
ax1.text(label_loc[0], label_loc[1], 'A', transform=ax1.transAxes, fontweight='bold')
ax2.text(label_loc[0], label_loc[1], 'B', transform=ax2.transAxes, fontweight='bold')
ax3.text(label_loc[0], label_loc[1], 'C', transform=ax3.transAxes, fontweight='bold')
ax4.text(label_loc[0], label_loc[1], 'D', transform=ax4.transAxes, fontweight='bold')
fig.suptitle(r'Gene: $\mathit{Men}$')
plt.tight_layout(pad=2)
plt.savefig('../../output/Men_estimate_and_variance_distribution.eps')
In [37]:
def normalize(x):
return pd.Series(np.random.normal(x.mean(), x.std(), size=10000))
fig, (ax1, ax2) = plt.subplots(2, 1)
cis_V = normalize(r2_data_sbs.R2_cis_V)
cis_V.plot(kind='kde', label=r'$R^2$ cis effect', ax=ax1)
trans_V = normalize(r2_data_sbs.R2_diff_trans_V)
trans_V.plot(kind='kde', label=r'$R^2$ trans effect', ax=ax1)
int_V = normalize(r2_data_sbs.R2_diff_int_V)
int_V.plot(kind='kde', label=r'$R^2$ interaction effect', ax=ax1)
ax1.set_title('Virgin')
cis_M = normalize(r2_data_sbs.R2_cis_M)
cis_M.plot(kind='kde', label=r'$R^2$ cis effect', ax=ax2)
trans_M = normalize(r2_data_sbs.R2_diff_trans_M)
trans_M.plot(kind='kde', label=r'$R^2$ trans effect', ax=ax2)
int_M = normalize(r2_data_sbs.R2_diff_int_M)
r2_data_sbs.R2_diff_int_M.plot(kind='kde', label=r'$R^2$ interaction effect', ax=ax2)
ax2.set_title('Mated')
ax1.text(label_loc[0], label_loc[1], 'A', transform=ax1.transAxes, fontweight='bold')
ax2.text(label_loc[0], label_loc[1], 'B', transform=ax2.transAxes, fontweight='bold')
h, l = ax1.get_legend_handles_labels()
plt.legend(h,l, loc=(1.1,1))
plt.tight_layout(pad=2, rect=[0, 0, .75, 1])
plt.savefig('../../output/cis_trans_int_effects_distribution.eps')
In [19]:
merged = cis_sbs.merge(ai, on=['line', 'fusion_id'], how='inner')
merged['int_V'] = cis_sbs.c_i_V * cis_sbs.T_i_1a_V
In [20]:
men = merged[merged.fusion_id == 'S52856_SI']
In [21]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(21.83, 6))
men.plot('c_i_V', 'q5_mean_theta_V', kind='scatter', ax=ax1, s=60)
men.plot('T_i_1a_V', 'q5_mean_theta_V', kind='scatter', ax=ax2, s=60)
men.plot('int_V', 'q5_mean_theta_V', kind='scatter', ax=ax3, s=60)
ax1.set_ylabel('Mean Allelic Imbalance')
ax2.set_ylabel('')
ax3.set_ylabel('')
ax1.set_xlabel('Cis effects')
ax2.set_xlabel('Trans Effects')
ax3.set_xlabel('Cis x Trans Effects')
fig.suptitle(r'Gene: $\mathit{Men}$')
plt.tight_layout(pad=2)
plt.savefig('../../output/cis_trans_int_effects_examples.eps')
In [ ]: