In [1]:
%run GLOBALS.py


3.5.2 |Continuum Analytics, Inc.| (default, Jul  2 2016, 17:52:12) 
[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import matplotlib

matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

In [4]:
#import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
#import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
# Control whether to import the original data. 
import_original_data = True
write_excel = True

In [6]:
print(os.getcwd())


/Users/janet/elvizAnalysis/ipython_notebooks

In [7]:
sns.set(style="whitegrid")

In [8]:
# Import the csv that translates the 127_HOW14 type labels to weeks and replicates.

from elviz_utils import IMPORT_METAINFO_TYPES, read_sample_info

In [9]:
sample_info = read_sample_info(MAIN_DIR)

sample_info.head()


Out[9]:
ID oxy rep week project
0 1_LOW4 Low 1 4 1056013
1 13_LOW5 Low 1 5 1056037
2 25_LOW6 Low 1 6 1056061
3 37_LOW7 Low 1 7 1056085
4 49_LOW8 Low 1 8 1056109

In [10]:
from abundance_utils import read_and_reduce_elviz_csv

#read_and_reduce_elviz_csv(
#    filename = 'elviz-contigs-1056169.csv', 
#    filepath=filepath,
#    sample_info = sample_info)

In [11]:
! ls


Bacteriovoracaceae_and_Myxococcaceae--Families.ipynb
GLOBALS.py
calculate_abundances-Copy1.ipynb
calculate_abundances.ipynb
demo_elviz_pca_module.ipynb
depreciated
elviz_abundance_plotting.ipynb
heatmap_all_below--burkholderiales_at_differing_depths.ipynb
plot_bar_charts.ipynb
plots

In [12]:
from abundance_utils import get_elviz_filenames

elviz_files = get_elviz_filenames(main_dir='../')
elviz_files[0:4]


Out[12]:
['elviz-contigs-1056013.csv',
 'elviz-contigs-1056016.csv',
 'elviz-contigs-1056019.csv',
 'elviz-contigs-1056022.csv']

In [13]:
from elviz_utils import make_directory

make_directory(dirpath=MAIN_DIR + "/plots")
make_directory(dirpath=MAIN_DIR + 'results')

In [14]:
from abundance_utils import read_and_reduce_all

In [15]:
from abundance_utils import project_number_from_filename

In [16]:
os.path.join(MAIN_DIR, 'raw_data')


Out[16]:
'../raw_data'

In [17]:
if import_original_data:
    data_reduced = read_and_reduce_all(filename_list=elviz_files,
                                       filepath=os.path.join(MAIN_DIR, 'raw_data'),
                                       sample_info = sample_info)
    print(data_reduced.size)  
else: 
    data_reduced = pd.read_csv(
        MAIN_DIR + "/results/reduced_data--all_taxonomy_remains.csv")

data_reduced.head()


elviz-contigs-1056016.csv
elviz-contigs-1056019.csv
elviz-contigs-1056022.csv
elviz-contigs-1056025.csv
elviz-contigs-1056028.csv
elviz-contigs-1056031.csv
elviz-contigs-1056034.csv
elviz-contigs-1056037.csv
elviz-contigs-1056040.csv
elviz-contigs-1056043.csv
elviz-contigs-1056046.csv
elviz-contigs-1056049.csv
elviz-contigs-1056052.csv
elviz-contigs-1056055.csv
elviz-contigs-1056058.csv
elviz-contigs-1056061.csv
elviz-contigs-1056064.csv
elviz-contigs-1056067.csv
elviz-contigs-1056070.csv
elviz-contigs-1056073.csv
elviz-contigs-1056076.csv
elviz-contigs-1056079.csv
elviz-contigs-1056082.csv
elviz-contigs-1056085.csv
elviz-contigs-1056088.csv
elviz-contigs-1056091.csv
elviz-contigs-1056094.csv
elviz-contigs-1056097.csv
elviz-contigs-1056100.csv
elviz-contigs-1056103.csv
elviz-contigs-1056106.csv
elviz-contigs-1056109.csv
elviz-contigs-1056112.csv
elviz-contigs-1056115.csv
elviz-contigs-1056118.csv
elviz-contigs-1056121.csv
elviz-contigs-1056124.csv
elviz-contigs-1056127.csv
elviz-contigs-1056130.csv
elviz-contigs-1056133.csv
elviz-contigs-1056136.csv
elviz-contigs-1056139.csv
elviz-contigs-1056142.csv
elviz-contigs-1056145.csv
elviz-contigs-1056148.csv
elviz-contigs-1056151.csv
elviz-contigs-1056154.csv
elviz-contigs-1056157.csv
elviz-contigs-1056160.csv
elviz-contigs-1056163.csv
elviz-contigs-1056166.csv
elviz-contigs-1056169.csv
elviz-contigs-1056172.csv
elviz-contigs-1056175.csv
elviz-contigs-1056178.csv
elviz-contigs-1056181.csv
elviz-contigs-1056184.csv
elviz-contigs-1056187.csv
elviz-contigs-1056190.csv
elviz-contigs-1056193.csv
elviz-contigs-1056196.csv
elviz-contigs-1056199.csv
elviz-contigs-1056202.csv
elviz-contigs-1056205.csv
elviz-contigs-1056208.csv
elviz-contigs-1056211.csv
elviz-contigs-1056214.csv
elviz-contigs-1056217.csv
elviz-contigs-1056220.csv
elviz-contigs-1056223.csv
elviz-contigs-1056226.csv
elviz-contigs-1056229.csv
elviz-contigs-1056232.csv
elviz-contigs-1056235.csv
elviz-contigs-1056238.csv
elviz-contigs-1056241.csv
elviz-contigs-1056244.csv
elviz-contigs-1056247.csv
elviz-contigs-1056250.csv
elviz-contigs-1056253.csv
elviz-contigs-1056256.csv
elviz-contigs-1056259.csv
elviz-contigs-1056262.csv
elviz-contigs-1056265.csv
elviz-contigs-1056268.csv
elviz-contigs-1056271.csv
elviz-contigs-1056274.csv
841815
Out[17]:
Kingdom Phylum Class Order Family Genus Length fraction of reads project ID oxy rep week
0 Bacteria Proteobacteria Gammaproteobacteria Methylococcales Methylococcaceae Methylobacter 9948861 0.205558 1056013 1_LOW4 Low 1 4
1 Bacteria Proteobacteria Betaproteobacteria Methylophilales Methylophilaceae Methylotenera 5066955 0.185898 1056013 1_LOW4 Low 1 4
2 Bacteria Proteobacteria other 3930509 0.075027 1056013 1_LOW4 Low 1 4
3 Bacteria Proteobacteria Gammaproteobacteria other 5620690 0.073601 1056013 1_LOW4 Low 1 4
4 Bacteria Bacteroidetes Flavobacteriia Flavobacteriales Flavobacteriaceae Flavobacterium 4654774 0.065548 1056013 1_LOW4 Low 1 4

In [18]:
if import_original_data:
    data_reduced.to_csv(MAIN_DIR + "/results/reduced_data--all_taxonomy_remains.csv", index=False)

In [19]:
data_reduced.head()


Out[19]:
Kingdom Phylum Class Order Family Genus Length fraction of reads project ID oxy rep week
0 Bacteria Proteobacteria Gammaproteobacteria Methylococcales Methylococcaceae Methylobacter 9948861 0.205558 1056013 1_LOW4 Low 1 4
1 Bacteria Proteobacteria Betaproteobacteria Methylophilales Methylophilaceae Methylotenera 5066955 0.185898 1056013 1_LOW4 Low 1 4
2 Bacteria Proteobacteria other 3930509 0.075027 1056013 1_LOW4 Low 1 4
3 Bacteria Proteobacteria Gammaproteobacteria other 5620690 0.073601 1056013 1_LOW4 Low 1 4
4 Bacteria Bacteroidetes Flavobacteriia Flavobacteriales Flavobacteriaceae Flavobacterium 4654774 0.065548 1056013 1_LOW4 Low 1 4

In [20]:
# Should have only 1 row per Id now. 
methylophilus_only = data_reduced[data_reduced['Genus']=='Methylophilus']
print(methylophilus_only[['ID', 'Length', 'fraction of reads']].sort_values('Length'))


            ID   Length  fraction of reads
219    63_LOW9    15882           0.000028
49    73_LOW10    20437           0.000760
99     51_LOW8    29583           0.000217
100   74_LOW10    33515           0.000278
26   122_LOW14    39064           0.001171
59    92_HOW11    42204           0.000607
103   75_LOW10    45610           0.000124
27   124_LOW14    46654           0.001526
22   121_LOW14    47061           0.001651
29     61_LOW9    52709           0.001694
76      4_LOW4    52969           0.000506
28   112_LOW13    55629           0.001615
74     13_LOW5    58884           0.000652
67      9_HOW4    60376           0.000780
33      8_HOW4    61368           0.001438
116  123_LOW14    68043           0.000175
41     49_LOW8    71714           0.000932
48      3_LOW4    72753           0.001035
52      2_LOW4    73660           0.000926
48      1_LOW4    74310           0.001062
88     39_LOW7    77028           0.000347
56     14_LOW5    88088           0.000793
57     27_LOW6    88622           0.000875
50     15_LOW5    93077           0.000953
57     37_LOW7    96499           0.000638
57     57_HOW8    96525           0.000506
36     10_HOW4   101934           0.001388
39      7_HOW4   105754           0.001432
34   100_LOW12   108658           0.001117
61     25_LOW6   108785           0.000540
..         ...      ...                ...
6     97_LOW12  2984166           0.015834
2     80_HOW10  3001616           0.087030
11    91_HOW11  3011404           0.016662
1      32_HOW6  3017162           0.347133
8     94_HOW11  3083901           0.028545
1    128_HOW14  3096023           0.205689
7     99_LOW12  3812409           0.022149
0      70_HOW9  3885996           0.371372
0      58_HOW8  4742705           0.363455
1      33_HOW6  4901475           0.188410
1      45_HOW7  5125910           0.198458
0      31_HOW6  5194359           0.383851
1      19_HOW5  5334592           0.100792
0    115_HOW13  5336219           0.531892
1     88_LOW11  5341441           0.068999
1    106_HOW12  5385211           0.115445
0    127_HOW14  5404569           0.360175
3    118_HOW13  5426623           0.021599
0    104_HOW12  5430800           0.644610
1     93_HOW11  5430835           0.071349
1    130_HOW14  5449081           0.082110
1    103_HOW12  5450134           0.214184
1    117_HOW13  5577034           0.158158
7     87_LOW11  5643812           0.025104
2     81_HOW10  5647252           0.045446
2     82_HOW10  5690423           0.045236
1      44_HOW7  5795352           0.252276
0      43_HOW7  5805202           0.407512
1    129_HOW14  5873847           0.121108
1    105_HOW12  6111634           0.301190

[88 rows x 3 columns]

In [21]:
if write_excel:
    from abundance_utils import write_excel_files
    write_excel_files(dataframe = data_reduced, filepath = MAIN_DIR + '/results')

In [22]:
from abundance_utils import reduce_to_genus_only
data_reduced_genus = reduce_to_genus_only(data_reduced)
data_reduced_genus.head()


Out[22]:
ID rep week oxy Genus Length fraction of reads project
7268 118_HOW13 4 13 High Methylobacter 4326741 0.820344 1056250
11786 130_HOW14 4 14 High Methylobacter 4428101 0.776829 1056274
2432 106_HOW12 4 12 High Methylobacter 4325001 0.623003 1056226
45856 82_HOW10 4 10 High Methylobacter 5488758 0.589218 1056178
14497 16_LOW5 4 5 Low other 40011267 0.548044 137285980

In [23]:
# Write a copy to csv
if import_original_data:
    data_reduced_genus.to_csv(
        MAIN_DIR + "/results/reduced_data--genus_only.csv", index=False)

In [24]:
data_reduced_genus.head(3)


Out[24]:
ID rep week oxy Genus Length fraction of reads project
7268 118_HOW13 4 13 High Methylobacter 4326741 0.820344 1056250
11786 130_HOW14 4 14 High Methylobacter 4428101 0.776829 1056274
2432 106_HOW12 4 12 High Methylobacter 4325001 0.623003 1056226

In [25]:
by_repl_and_week_Genus = data_reduced_genus.groupby(['rep','week','oxy'])

In [26]:
if write_excel:
    write_excel_files(dataframe= data_reduced_genus, 
             filepath = MAIN_DIR +  '/results', 
             by_genus=True)

In [ ]: