In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [46]:
path_map = '../../data/mapping-files/emp_qiime_mapping_qc_filtered.tsv'
df_map = pd.read_csv(path_map, sep='\t', index_col=0)
In [47]:
# df_qrt is (now also) metadata in deciles for alpha-diversity analysis
df_qrt = pd.read_csv('../../data/effect-size/emp_qiime_mapping_qc_filtered.tsv.quartiles.tsv', sep='\t', index_col=0)
#df_dec = pd.read_csv('../../data/effect-size/emp_qiime_mapping_qc_filtered.tsv.deciles.tsv', sep='\t', index_col=0)
In [48]:
df_qrt['adiv_observed_otus'] = [df_map['adiv_observed_otus'][i] for i in df_qrt.index]
df_qrt.drop('oxygen_mg_per_l', axis=1, inplace=True)
In [49]:
path_adiv = '../../data/effect-size/emp_qiime_mapping_qc_filtered.quartiles.adiv.tsv'
df_qrt.to_csv(path_adiv, sep='\t')
In [7]:
# df_qrt is metadata in quartiles for beta-diversity analysis
df_qrt = pd.read_csv('../../data/effect-size/emp_qiime_mapping_qc_filtered.tsv.quartiles.tsv', sep='\t', index_col=0)
In [8]:
path_bdiv = '../../data/beta-div/emp_90_gg_1k_unweighted_unifrac.txt.pc.first_ten'
df_bdiv = pd.read_csv(path_bdiv, sep='\t', index_col=0, header=None)
df_bdiv.columns = ['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10']
In [9]:
for col in ['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10']:
df_qrt[col] = [df_bdiv[col][i] for i in df_qrt.index]
In [10]:
path_bdiv = '../../data/effect-size/emp_qiime_mapping_qc_filtered.quartiles.bdiv_uw.tsv'
df_qrt.to_csv(path_bdiv, sep='\t')
In [11]:
# df_qrt is metadata in quartiles for beta-diversity analysis
df_qrt = pd.read_csv('../../data/effect-size/emp_qiime_mapping_qc_filtered.tsv.quartiles.tsv' , sep='\t', index_col=0)
In [12]:
path_bdiv = '../../data/beta-div/emp_90_gg_1k_weighted_unifrac.txt.pc.first_ten'
df_bdiv = pd.read_csv(path_bdiv, sep='\t', index_col=0, header=None)
df_bdiv.columns = ['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10']
In [13]:
# df_qrt is metadata in quartiles for beta-diversity analysis
for col in ['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10']:
df_qrt[col] = [df_bdiv[col][i] for i in df_qrt.index]
In [14]:
path_bdiv = '../../data/effect-size/emp_qiime_mapping_qc_filtered.quartiles.bdiv_w.tsv'
df_qrt.to_csv(path_bdiv, sep='\t')
In [15]:
df_dec.study_id.value_counts()
Out[15]:
In [16]:
df_dec.host_scientific_name.value_counts()
Out[16]:
In [17]:
df_dec.latitude_deg.value_counts()
Out[17]:
In [18]:
df_dec.longitude_deg.value_counts()
Out[18]:
In [19]:
df_dec.envo_biome_3.value_counts()
Out[19]:
In [20]:
df_dec.empo_3.value_counts()
Out[20]:
In [21]:
df_dec.temperature_deg_c.value_counts()
Out[21]:
In [22]:
df_dec.ph.value_counts()
Out[22]:
In [23]:
df_dec.salinity_psu.value_counts()
Out[23]:
In [25]:
#df_dec.oxygen_mg_per_l.value_counts()
In [26]:
df_dec.nitrate_umol_per_l.value_counts()
Out[26]:
In [2]:
sns.set_style('ticks', {'ytick.color': 'white'})
sns.set_context('poster', font_scale=1.5)
mycolor = sns.xkcd_rgb['light blue']
In [3]:
cols = ['study_id', 'host_scientific_name',
'latitude_deg', 'longitude_deg',
'envo_biome_3', 'empo_3',
'temperature_deg_c', 'ph', 'salinity_psu',
'oxygen_mg_per_l', 'nitrate_umol_per_l']
In [4]:
results_q = pd.read_csv('../../data/effect-size/' +
'mannwhitneyu.alpha_map.adiv_observed_otus.quartiles.tsv',
sep='\t', index_col=0)
In [5]:
results_d = pd.read_csv('../../data/effect-size/' +
'mannwhitneyu.alpha_map.adiv_observed_otus.deciles.tsv',
sep='\t', index_col=0)
In [7]:
myresults_q = results_q.loc[results_q['column_name'].isin(cols)]
myresults_q = myresults_q.sort_values('effect_size', ascending=False)
In [8]:
myresults_d = results_d.loc[results_d['column_name'].isin(cols)]
myresults_d = myresults_d.sort_values('effect_size', ascending=False)
In [9]:
dict_colors = {'study_id':sns.color_palette('Paired', 12)[1], 'empo_3':sns.color_palette('Paired', 12)[3],
'envo_biome_3':sns.color_palette('Paired', 12)[5],'latitude_deg':sns.color_palette('Paired', 12)[7],
'longitude_deg':sns.color_palette('Paired', 12)[9], 'host_scientific_name':sns.color_palette('Paired', 12)[0],
'salinity_psu':sns.color_palette('Paired', 12)[2], 'temperature_deg_c':sns.color_palette('Paired', 12)[4],
'ph':sns.color_palette('Paired', 12)[6], 'nitrate_umol_per_l':sns.color_palette('Paired', 12)[8],
'oxygen_mg_per_l':sns.color_palette('Paired', 12)[10]}
In [11]:
sns.barplot(x='effect_size', y='column_name', data=myresults_d, palette=dict_colors)
plt.xlabel('Mean pairwise effect size')
plt.ylabel('')
plt.tight_layout()
plt.savefig('adiv_effect_size_deciles.pdf')
In [12]:
sns.barplot(x='effect_size', y='column_name', data=myresults_q, palette=dict_colors)
plt.xlabel('Mean pairwise effect size')
plt.ylabel('Factor')
plt.tight_layout()
plt.savefig('adiv_effect_size_quartiles.pdf')
In [13]:
results_w = pd.read_csv('../../data/effect-size/' +
'emp_90_gg_1k_weighted_unifrac.emp_qiime_mapping_qc_filtered.quartiles.tsv',
sep='\t', index_col=0)
In [14]:
myresults_w = results_w.loc[results_w['column_name'].isin(cols)]
myresults_w = myresults_w.sort_values('effect_size', ascending=False)
In [15]:
sns.barplot(x='effect_size', y='column_name', data=myresults_w, palette=dict_colors)
plt.xlabel('Mean pairwise effect size')
plt.ylabel('Factor')
plt.tight_layout()
plt.savefig('bdiv_weighted_effect_size_quartiles.pdf')
In [16]:
results_uw = pd.read_csv('../../data/effect-size/' +
'emp_90_gg_1k_unweighted_unifrac.emp_qiime_mapping_qc_filtered.quartiles.tsv',
sep='\t', index_col=0)
In [17]:
myresults_uw = results_uw.loc[results_uw['column_name'].isin(cols)]
myresults_uw = myresults_uw.sort_values('effect_size', ascending=False)
In [18]:
sns.barplot(x='effect_size', y='column_name', data=myresults_uw, palette=dict_colors)
plt.xlabel('Mean pairwise effect size')
plt.ylabel('Factor')
plt.tight_layout()
plt.savefig('bdiv_unweighted_effect_size_quartiles.pdf')
In [17]:
sns.set_style('ticks', {'ytick.color': 'black'})
In [18]:
path_rda = '../../data/effect-size/rda_values.xlsx'
In [19]:
df_rda = pd.read_excel(path_rda)
In [20]:
df_rda.fillna(0, inplace=True)
In [21]:
df_rda
Out[21]:
In [22]:
mycolors = ["dark grey", "windows blue", "amber", "faded green", "dusty purple"]
In [23]:
plt.figure(figsize=(10,10))
plt.bar(np.arange(6), df_rda.study_id, color=dict_colors['study_id'])
bottom = df_rda.study_id
plt.bar(np.arange(6), df_rda.empo_3, bottom=bottom, color=dict_colors['empo_3'])
bottom = bottom + df_rda.empo_3
plt.bar(np.arange(6), df_rda.envo_biome_3, bottom=bottom, color=dict_colors['envo_biome_3'])
bottom = bottom + df_rda.envo_biome_3
plt.bar(np.arange(6), df_rda.latitude_deg, bottom=bottom, color=dict_colors['latitude_deg'])
bottom = bottom + df_rda.latitude_deg
plt.bar(np.arange(6), df_rda.longitude_deg, bottom=bottom, color=dict_colors['longitude_deg'])
plt.xticks(np.arange(6), ['alpha w/ study_id', 'alpha w/o study_id', 'unweighted w/ study_id', 'unweighted w/o study_id',
'weighted w/ study_id', 'weighted w/o study_id'], rotation=270)
plt.tight_layout()
plt.savefig('rda.pdf')
In [ ]: