In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns
import h5py as h5
#import empcolors
%run ../../code/colors-and-styles/empcolors.py
%matplotlib inline
In [2]:
#filter tables by metadata presence/absence filter_samples_from_otu_table.py -i emp_cr_gg_13_8.biom -m emp_qiime_mapping_all_emp.tsv -s 'Description:*' -o emp_cr_gg_13_8_filt.biom --output_mapping_fp emp_qiime_mapping_all_emp_filt.tsv
emp_gg_otus_sampsum = pd.read_csv('../../data/predicted-rrna-copy-number/emp_cr_gg_13_8.qc_filtered_filt_summary_samplesum.txt',sep='\t',index_col=0,header=None).sort_index()
emp_gg_otus_norm_sampsum = pd.read_csv('../../data/predicted-rrna-copy-number/emp_cr_gg_13_8.normalized_qcfilt_summary_samplesum.txt',sep='\t',index_col=0,header=None).sort_index()
emp_gg_otus_meta = pd.read_csv('../../data/mapping-files/emp_qiime_mapping_qc_filtered.tsv',sep='\t',index_col=0,header=0).sort_index()
In [4]:
emp_gg_otus_meta['sampsum'] = emp_gg_otus_sampsum[1]
emp_gg_otus_meta['normsampsum'] = emp_gg_otus_norm_sampsum[1]
emp_gg_otus_meta['copynumberdepletion'] = np.divide(emp_gg_otus_norm_sampsum[1],emp_gg_otus_sampsum[1])
emp_gg_otus_meta['averagecopy'] = np.divide(1,np.divide(emp_gg_otus_norm_sampsum[1],emp_gg_otus_sampsum[1]))
In [5]:
emp_gg_otus_meta.head()
Out[5]:
In [16]:
# save data file
emp_gg_otus_meta.loc[:,['#SampleID','empo_0','empo_1','empo_2','empo_3','averagecopy']].to_csv('../../data/predicted-rrna-copy-number/emp_rrna_averagecopy_empo.csv')
In [7]:
def format_figure():
plt.xlabel('Average community 16S copy number', fontsize=14)
plt.ylabel('Number of samples', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlim([0,8])
plt.tight_layout()
In [9]:
#emp_gg_otus_meta['averagecopy'].plot(kind='hist',legend=False,bins=200,linewidth=0)
plt.hist(emp_gg_otus_meta['averagecopy'].dropna(), bins=200, linewidth=0, color=get_empo_cat_color('EMP sample'))
format_figure()
plt.savefig('copy_number_16s_emp_empo0.pdf')
In [10]:
#biom_grouped = emp_gg_otus_meta.groupby('empo_1')
#biom_grouped['averagecopy'].plot(kind='hist',alpha=0.3,legend=True,bins=200,linewidth=0)
for i in emp_gg_otus_meta.empo_1.unique():
plt.hist(emp_gg_otus_meta[emp_gg_otus_meta.empo_1 == i]['averagecopy'].dropna(), label=i,
bins=200, linewidth=0, color=get_empo_cat_color(i), alpha=0.7)
#plt.legend(loc=1,prop={'size':8})
format_figure()
plt.savefig('copy_number_16s_emp_empo1.pdf')
In [11]:
# biom_grouped = emp_gg_otus_meta.groupby('empo_2')
# biom_grouped['averagecopy'].plot(kind='hist',alpha=0.5,legend=True,bins=200,linewidth=0)
for i in ['Animal', 'Non-saline', 'Plant', 'Saline']:
plt.hist(emp_gg_otus_meta[emp_gg_otus_meta.empo_2 == i]['averagecopy'].dropna(), label=i,
bins=200, linewidth=0, color=get_empo_cat_color(i), alpha=0.8)
#plt.legend(loc=1,prop={'size':8})
format_figure()
plt.savefig('copy_number_16s_emp_empo2.pdf')
In [12]:
# biom_grouped = emp_gg_otus_meta.groupby('empo_3')
# biom_grouped['averagecopy'].plot(kind='hist',alpha=0.5,legend=True,bins=200,linewidth=0)
for i in emp_gg_otus_meta.empo_3.unique():
plt.hist(emp_gg_otus_meta[emp_gg_otus_meta.empo_3 == i]['averagecopy'].dropna(), label=i,
bins=200, linewidth=0, color=get_empo_cat_color(i), alpha=0.5)
#plt.legend(loc=1,prop={'size':8})
format_figure()
plt.savefig('copy_number_16s_emp_empo3.pdf')
In [13]:
bp = emp_gg_otus_meta.boxplot('averagecopy',by='empo_2',grid=False,rot=90)
plt.xlabel('')
plt.title('')
plt.ylim([0,8])
Out[13]:
In [14]:
bp = emp_gg_otus_meta.boxplot('averagecopy',by='empo_3',grid=False,rot=90)
plt.xlabel('')
plt.title('')
plt.ylim([0,8])
Out[14]:
In [15]:
biom_grouped = emp_gg_otus_meta.groupby('env_material')
biom_grouped['averagecopy'].plot(kind='hist',alpha=0.5,legend=True,bins=200,linewidth=0)
plt.legend(loc=1,prop={'size':3})
plt.xlabel('average community 16S copy number')
plt.xlim([0,8])
#plt.savefig('copy_number_16s_emp_env_material.pdf', dpi=300, format='pdf')
Out[15]:
In [ ]: