In [ ]:
%load_ext autoreload
%autoreload 2

In [ ]:
%run GLOBALS.py

In [ ]:
import matplotlib

matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

In [ ]:
#import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
#import matplotlib.pyplot as plt
%matplotlib inline

In [ ]:
import elviz_utils
import abundance_utils
import abundance_plot_utils

Use the taxa dicts to make summary spreadsheets for Mila.


In [ ]:
data_reduced = pd.read_csv(MAIN_DIR + "/results/reduced_data--all_taxonomy_remains.csv")

In [ ]:
major_groups_dict = {'Phylum':['Bacteroidetes'], 
                    'Order':['Burkholderiales','Methylophilales',
                             'Methylococcales']}

In [ ]:
major_groups_df = abundance_plot_utils.aggregate_mixed_taxonomy(
    dataframe = pd.read_csv(MAIN_DIR + "/results/reduced_data--all_taxonomy_remains.csv"),
    taxa_dict = major_groups_dict,
    main_dir = MAIN_DIR)

major_groups_df.head()

In [ ]:
m_dict = {'Genus':['Methylobacter', 'Methylovulum', 'Methylomonas', 'Methylomicrobium', 
              'Methyloglobulus', 'Methylococcus', 'Methylocaldum', 'Methylosarcina']}

In [ ]:
m_df = abundance_plot_utils.aggregate_mixed_taxonomy(
    dataframe = pd.read_csv(MAIN_DIR + "/results/reduced_data--all_taxonomy_remains.csv"),
    taxa_dict = m_dict,
    main_dir = MAIN_DIR)

m_df.head()

In [ ]:
! ls ../results

In [ ]:
major_groups_filename = '../results/4_major_group_abundances.tsv'
major_groups_df.to_csv(major_groups_filename, sep='\t', index=False)

In [ ]:
m_filename = '../results/Methylococcales_and_Methylophilales_abundances.tsv'
m_df.to_csv(m_filename, sep='\t', index=False)