In [1]:
%run GLOBALS.py
In [2]:
%load_ext autoreload
%autoreload 2
In [3]:
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
In [4]:
#import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
#import matplotlib.pyplot as plt
%matplotlib inline
In [5]:
# Control whether to import the original data.
import_original_data = True
write_excel = True
In [6]:
print(os.getcwd())
In [7]:
sns.set(style="whitegrid")
In [8]:
# Import the csv that translates the 127_HOW14 type labels to weeks and replicates.
from elviz_utils import IMPORT_METAINFO_TYPES, read_sample_info
In [9]:
sample_info = read_sample_info(MAIN_DIR)
sample_info.head()
Out[9]:
In [10]:
from abundance_utils import read_and_reduce_elviz_csv
#read_and_reduce_elviz_csv(
# filename = 'elviz-contigs-1056169.csv',
# filepath=filepath,
# sample_info = sample_info)
In [11]:
! ls
In [12]:
from abundance_utils import get_elviz_filenames
elviz_files = get_elviz_filenames(main_dir='../')
elviz_files[0:4]
Out[12]:
In [13]:
from elviz_utils import make_directory
make_directory(dirpath=MAIN_DIR + "/plots")
make_directory(dirpath=MAIN_DIR + 'results')
In [14]:
from abundance_utils import read_and_reduce_all
In [15]:
from abundance_utils import project_number_from_filename
In [16]:
os.path.join(MAIN_DIR, 'raw_data')
Out[16]:
In [17]:
if import_original_data:
data_reduced = read_and_reduce_all(filename_list=elviz_files,
filepath=os.path.join(MAIN_DIR, 'raw_data'),
sample_info = sample_info)
print(data_reduced.size)
else:
data_reduced = pd.read_csv(
MAIN_DIR + "/results/reduced_data--all_taxonomy_remains.csv")
data_reduced.head()
Out[17]:
In [18]:
if import_original_data:
data_reduced.to_csv(MAIN_DIR + "/results/reduced_data--all_taxonomy_remains.csv", index=False)
In [19]:
data_reduced.head()
Out[19]:
In [20]:
# Should have only 1 row per Id now.
methylophilus_only = data_reduced[data_reduced['Genus']=='Methylophilus']
print(methylophilus_only[['ID', 'Length', 'fraction of reads']].sort_values('Length'))
In [21]:
if write_excel:
from abundance_utils import write_excel_files
write_excel_files(dataframe = data_reduced, filepath = MAIN_DIR + '/results')
In [22]:
from abundance_utils import reduce_to_genus_only
data_reduced_genus = reduce_to_genus_only(data_reduced)
data_reduced_genus.head()
Out[22]:
In [23]:
# Write a copy to csv
if import_original_data:
data_reduced_genus.to_csv(
MAIN_DIR + "/results/reduced_data--genus_only.csv", index=False)
In [24]:
data_reduced_genus.head(3)
Out[24]:
In [25]:
by_repl_and_week_Genus = data_reduced_genus.groupby(['rep','week','oxy'])
In [26]:
if write_excel:
write_excel_files(dataframe= data_reduced_genus,
filepath = MAIN_DIR + '/results',
by_genus=True)
In [ ]: