In [1]:
from IPython.display import display
import pandas as pd

In [2]:
old_study = '../analysis_data/old_study.txt.gz'
old_map = '../analysis_data/old_taxa.txt'

In [3]:
pd_old = pd.read_csv(old_study, sep='\t', header=1, index_col=0)

In [4]:
del pd_old['MP_NegC_1']
del pd_old['MP_NegC_2']

In [5]:
pd_old_map = pd.read_csv(old_map, sep='\t', header=None, index_col=0)
pd_old_map.rename(columns={1: 'taxonomy'}, inplace=True)
pd_old_map.index.name = '#OTU ID'

In [6]:
pd_old_complete = pd_old.join(pd_old_map, how='inner')
pd_old_complete.set_index(['taxonomy'], inplace=True)

In [7]:
old_sum = pd_old_complete.sum(1).sort_values(ascending=False)
old_sum


Out[7]:
taxonomy
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae; g__Rhodobacter;                            215369.0
k__Bacteria; p__Firmicutes; c__Bacilli; o__Exiguobacterales; f__Exiguobacteraceae; g__Exiguobacterium; s__Exiguobacterium                    80004.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae; g__Thioclava;                               73688.0
k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Rivulariaceae;                                                         64685.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae;                                             60680.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae; g__Kaistobacter;                          58333.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae;                                           57955.0
k__Bacteria; p__Cyanobacteria; c__Oscillatoriophycideae; o__Oscillatoriales; f__Phormidiaceae; g__Microcoleus; s__Microcoleus                52812.0
k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Nostoc;                                                49108.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Rhizobiaceae; g__Ensifer;                                         41888.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Erythrobacteraceae; g__Porphyrobacter; s__Porphyrobacter     40445.0
k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Rivulariaceae;                                                         34880.0
k__Bacteria; p__Cyanobacteria; c__Chloroplast; o__Stramenopiles;                                                                             33749.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae; g__Rhodobacter;                             30469.0
k__Bacteria; p__Verrucomicrobia; c__Verrucomicrobiae; o__Verrucomicrobiales; f__Verrucomicrobiaceae; g__Luteolibacter;                       27118.0
k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae;                                                           26761.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__;                                                                 25295.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae; g__Phaeobacter;                             25143.0
k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae;                                                           21939.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae;                                           21040.0
k__Bacteria; p__Planctomycetes; c__Planctomycea; o__Pirellulales; f__; g__Rhodopirellula;                                                    20698.0
k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Methylophilales; f__Methylophilaceae; g__Methylotenera;                            19902.0
k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Comamonadaceae; g__Delftia;                                    19344.0
k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Aeromonadales; f__Aeromonadaceae; g__Aeromonas;                                   19150.0
k__Bacteria; p__Cyanobacteria; c__Oscillatoriophycideae; o__Chroococcales; f__Cyanobacteriaceae;                                             19144.0
k__Bacteria; p__Bacteroidetes; c__Sphingobacteria; o__Sphingobacteriales; f__Flexibacteraceae; g__Cytophaga;                                 18834.0
k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Xanthomonadaceae; g__Luteimonas;                              18551.0
k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Gloeotrichia; s__Gloeotrichia                          17660.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae;                                             17408.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae; g__Sphingopyxis;                          17275.0
                                                                                                                                              ...   
k__Bacteria; p__Chlamydiae; c__Chlamydiae o__Chlamydiales;                                                                                       1.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria                                                                                                1.0
k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; o__Syntrophobacterales; f__Syntrophobacteraceae;                                         1.0
k__Bacteria; p__Acidobacteria; c__; o__; f__Koribacteraceae; g__Candidatus                                                                       1.0
k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; o__Myxococcales; f__Myxococcaceae; g__Anaeromyxobacter;                                  1.0
k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Clostridiaceae;                                                                  1.0
k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Clostridiaceae; g__Clostridium; s__Clostridium                                   1.0
k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales;                                                                                     1.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodospirillales; f__Rhodospirillaceae;                                               1.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Coriobacteriales; f__Coriobacteriaceae; g__Collinsella; s__Collinsella                      0.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Hyphomicrobiaceae;                                                    0.0
k__Bacteria; p__Gemmatimonadetes; c__Gemmatimonadetes                                                                                            0.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Actinomycetales; f__Thermomonosporaceae; g__Actinoallomurus; s__Actinoallomurus             0.0
k__Bacteria; p__Acidobacteria; c__Acidobacteria o__Acidobacteriales;                                                                             0.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Acidimicrobiales; f__CL500-29;                                                              0.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Acidimicrobiales; f__EB1017;                                                                0.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Actinomycetales; f__Actinomycetaceae; g__Varibaculum; s__Varibaculum                        0.0
k__Bacteria; p__Acidobacteria; c__Solibacteres; o__Solibacterales; f__Solibacteraceae; g__Candidatus                                             0.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Acidimicrobiales; f__EB1017;                                                                0.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__MC47;                                                                                       0.0
k__Bacteria; p__Chloroflexi; c__Anaerolineae; o__SJA-101; f__SHA-31;                                                                             0.0
k__Bacteria; p__Tenericutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__; s__Eubacterium                              0.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Actinomycetales; f__Nocardioidaceae; g__Nocardioides;                                       0.0
k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Burkholderiaceae; g__Burkholderia; s__Burkholderia                 0.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Actinomycetales; f__Corynebacteriaceae; g__Corynebacterium; s__Corynebacterium              0.0
k__Bacteria; p__Acidobacteria; c__Solibacteres; o__Solibacterales; f__Solibacteraceae; g__Candidatus                                             0.0
k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Burkholderiaceae; g__Burkholderia;                                 0.0
k__Bacteria; p__Tenericutes; c__Mollicutes; o__Entomoplasmatales; f__Spiroplasmataceae; g__Spiroplasma; s__Spiroplasma                           0.0
k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; o__Myxococcales; f__Myxococcaceae; g__Anaeromyxobacter;                                  0.0
k__Bacteria; p__Aquificae; c__Aquificae o__Aquificales; f__Aquificaceae; g__Thermocrinis; s__Thermocrinis                                        0.0
dtype: float64

In [8]:
old_group = old_sum.groupby(old_sum.index).sum().sort_values(ascending=False)
old_group


Out[8]:
taxonomy
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae; g__Rhodobacter;                            293754.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae;                                          165550.0
k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Rivulariaceae;                                                        104734.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae;                                            103557.0
k__Bacteria; p__Firmicutes; c__Bacilli; o__Exiguobacterales; f__Exiguobacteraceae; g__Exiguobacterium; s__Exiguobacterium                    88510.0
k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Nostoc;                                                84379.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae; g__Thioclava;                               83049.0
k__Bacteria; p__Bacteroidetes; c__Sphingobacteria; o__Sphingobacteriales; f__Saprospiraceae;                                                 73607.0
k__Bacteria; p__Verrucomicrobia; c__Verrucomicrobiae; o__Verrucomicrobiales; f__Verrucomicrobiaceae; g__Luteolibacter;                       67324.0
k__Bacteria; p__Cyanobacteria; c__Chloroplast; o__Stramenopiles;                                                                             66928.0
k__Bacteria; p__Bacteroidetes; c__Sphingobacteria; o__Sphingobacteriales; f__Flexibacteraceae; g__Cytophaga;                                 66522.0
k__Bacteria; p__Bacteroidetes; c__Sphingobacteria; o__Sphingobacteriales; f__;                                                               65522.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae; g__Kaistobacter;                          60619.0
k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae;                                                           57803.0
k__Bacteria; p__Planctomycetes; c__Planctomycea; o__Pirellulales; f__; g__Rhodopirellula;                                                    55378.0
k__Bacteria; p__Cyanobacteria; c__Oscillatoriophycideae; o__Oscillatoriales; f__Phormidiaceae; g__Microcoleus; s__Microcoleus                53096.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Rhizobiaceae; g__Ensifer;                                         43464.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Erythrobacteraceae; g__Porphyrobacter; s__Porphyrobacter     40557.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__;                                                                 37940.0
k__Bacteria; p__Planctomycetes; c__Planctomycea; o__Gemmatales; f__Gemmataceae; g__Gemmata;                                                  37271.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae; g__Phaeobacter;                             25143.0
k__Bacteria; p__Cyanobacteria; c__Nostocophycideae; o__Nostocales; f__Nostocaceae; g__Dolichospermum;                                        24773.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae; g__Sphingopyxis;                          24769.0
k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Methylophilales; f__Methylophilaceae; g__Methylotenera;                            24605.0
k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Chromatiales; f__Sinobacteraceae;                                                 22745.0
k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Rhodocyclales; f__Rhodocyclaceae; g__Methyloversatilis;                            21579.0
k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__; g__Methylibium;                                              21451.0
k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Comamonadaceae;                                                21425.0
k__Bacteria; p__Cyanobacteria; c__Oscillatoriophycideae; o__Chroococcales; f__Cyanobacteriaceae;                                             19644.0
k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Comamonadaceae; g__Delftia;                                    19489.0
                                                                                                                                              ...   
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Actinomycetales; f__Nocardiopsaceae; g__Nocardiopsis; s__Nocardiopsis                       1.0
k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Thiotrichales; f__Thiotrichaceae; g__Thioploca; s__Thioploca                          1.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodospirillales; f__Rhodospirillaceae; g__Rhodocista; s__Rhodocista                  1.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Acidimicrobiales; f__TK06;                                                                  1.0
k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Oceanospirillales; f__OM60; g__Congregibacter; s__Congregibacter                      1.0
k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Thermosinus; s__Thermosinus                                  1.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Actinomycetales; f__Microbacteriaceae; g__Yonghaparkia;                                     1.0
k__Bacteria; p__Bacteroidetes; c__Sphingobacteria; o__Sphingobacteriales; f__Balneolaceae; g__Balneola; s__Balneola                              1.0
k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Hyphomicrobiaceae; g__Devosia; s__Candidatus                          1.0
k__Bacteria; p__SBR1093; c__EC214;                                                                                                               1.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Actinomycetales; f__Micromonosporaceae; g__Virgisporangium; s__Virgisporangium              1.0
k__Bacteria; p__Planctomycetes; c__vadinHA49;                                                                                                    1.0
k__Bacteria; p__Planctomycetes; c__Planctomycea; o__Pirellulales;                                                                                1.0
k__Bacteria; p__Chloroflexi;                                                                                                                     1.0
k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Salinicoccus; s__Salinicoccus                                    1.0
k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Chromatiales; f__Thiotrichaceae;                                                      1.0
k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Rhodocyclales; f__Rhodocyclaceae; g__Azospira;                                         1.0
k__Bacteria; p__Tenericutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__p-75-a5;                                      1.0
k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Nitrosomonadales; f__Nitrosomonadaceae; g__Nitrosovibrio; s__Nitrosovibrio             1.0
k__Bacteria; p__Firmicutes; c__Clostridia; o__Thermoanaerobacterales; f__Thermoanaerobacteraceae; g__Thermoanaerobacter;                         1.0
k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Peptostreptococcaceae; g__Peptostreptococcus;                                    1.0
k__Bacteria; p__Tenericutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__Clostridium; s__Clostridium                   1.0
k__Bacteria; p__Chloroflexi; c__Dehalococcoidetes; o__Dehalococcoidales; f__Dehalococcoidaceae; g__Dehalococcoides;                              1.0
k__Bacteria; p__Planctomycetes; c__Planctomycea; o__Gemmatales; f__Isosphaeraceae; g__Singulisphaera; s__Singulisphaera                          1.0
k__Bacteria; p__Tenericutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__Allobaculum; s__Allobaculum                   1.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Actinomycetales; f__Corynebacteriaceae; g__Corynebacterium; s__Corynebacterium              0.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Coriobacteriales; f__Coriobacteriaceae; g__Collinsella; s__Collinsella                      0.0
k__Bacteria; p__Actinobacteria; c__Actinobacteria o__Actinomycetales; f__Actinomycetaceae; g__Varibaculum; s__Varibaculum                        0.0
k__Bacteria; p__Aquificae; c__Aquificae o__Aquificales; f__Aquificaceae; g__Thermocrinis; s__Thermocrinis                                        0.0
k__Bacteria; p__Tenericutes; c__Mollicutes; o__Entomoplasmatales; f__Spiroplasmataceae; g__Spiroplasma; s__Spiroplasma                           0.0
dtype: float64

In [9]:
old_group.sort_index(inplace=True)
old_group.get_values()


Out[9]:
array([   9.,   30.,   41., ...,  107.,   11.,   15.])

In [10]:
old_org = pd.DataFrame(old_group.get_values(), index=pd.MultiIndex.from_tuples([tuple(x.split(' ')) for x in old_group.index]))
pd.set_option('display.max_rows', len(old_org))
#display(old_org)
pd.reset_option('display.max_rows')

In [22]:
old_org.index.names = [i for i in range(len(old_org.index.names))]
old_org.groupby(level=[0, 1]).sum().sort_values(0, ascending=False)


Out[22]:
0
0 1
k__Bacteria; p__Proteobacteria; 1514474.0
p__Cyanobacteria; 526554.0
p__Bacteroidetes; 318190.0
p__Firmicutes; 133312.0
p__Planctomycetes; 132029.0
p__Verrucomicrobia; 122162.0
p__Actinobacteria; 46439.0
p__Acidobacteria; 15619.0
p__Chloroflexi; 10231.0
p__Gemmatimonadetes; 9531.0
p__Thermi; 4666.0
p__Tenericutes; 4655.0
p__Armatimonadetes; 4349.0
p__Chlamydiae; 3371.0
p__Chlorobi; 2265.0
p__TM6; 1572.0
p__SR1; 1246.0
p__Fusobacteria; 1082.0
p__Nitrospirae; 713.0
k__Archaea; p__Euryarchaeota; 454.0
k__Bacteria; p__GN02; 436.0
p__SM2F11; 383.0
p__Spirochaetes; 370.0
p__Elusimicrobia; 281.0
p__Hyd24-12; 242.0
k__Archaea; p__Crenarchaeota; 231.0
k__Bacteria; p__TM7; 196.0
p__GOUTA4; 148.0
p__Thermotogae; 142.0
p__NKB19; 122.0
... ...
p__WS3; 113.0
p__OP11; 95.0
p__SAR406; 87.0
p__Synergistetes; 78.0
p__Deferribacteres; 76.0
p__Aquificae; 68.0
p__GN1; 58.0
p__AD3; 32.0
p__SPAM; 28.0
p__TG3; 27.0
p__OP9; 26.0
p__ZB3; 15.0
p__NC10; 14.0
p__WPS-2; 14.0
p__MVP-15; 13.0
p__OP3; 11.0
p__ZB2; 11.0
p__SC3; 8.0
p__BRC1; 8.0
p__GN04; 7.0
p__49S1_2B; 7.0
p__OP8; 6.0
p__SBR1093; 5.0
p__Lentisphaerae; 4.0
p__LCP-89; 4.0
p__Dictyoglomi; 2.0
p__GN06; 2.0
p__KSB1; 2.0
p__OP1; 1.0
p__HDBW-WB69; 1.0

61 rows × 1 columns


In [ ]: