In [1]:
%run GLOBALS.py
3.5.1 |Continuum Analytics, Inc.| (default, Dec 7 2015, 11:24:55)
[GCC 4.2.1 (Apple Inc. build 5577)]
In [2]:
import matplotlib as mpl
mpl.use('TkAgg')
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
%matplotlib inline
In [4]:
import elviz_utils
In [5]:
elviz_utils.prepare_plot_dir("./plots/")
Out[5]:
'./plots/'
In [6]:
from elviz_pca import import_elviz_data
tmp = import_elviz_data(main_dir = MAIN_DIR)
tmp.head()
loading ..//results/reduced_data--all_taxonomy_remains.csv
Out[6]:
fraction of reads
Genus
ID
Abiotrophia
123_LOW14
3.638477e-07
22_HOW5
2.447423e-06
44_HOW7
1.349533e-06
4_LOW4
1.266953e-05
52_LOW8
2.539954e-06
In [7]:
tmpg = import_elviz_data(main_dir = MAIN_DIR, genus_only=False)
tmpg.head()
loading ..//results/reduced_data--all_taxonomy_remains.csv
Out[7]:
fraction of reads
Kingdom
Phylum
Class
Order
Family
Genus
ID
Archaea
Crenarchaeota
Thermoprotei
Acidilobales
Acidilobaceae
Acidilobus
10_HOW4
0.000002
19_HOW5
0.000091
22_HOW5
0.000007
25_LOW6
0.000001
28_LOW6
0.000002
In [8]:
from elviz_pca import pivot_for_pca
pivot_for_pca(dataframe=tmp).head()
Out[8]:
ID
100_LOW12
103_HOW12
104_HOW12
105_HOW12
106_HOW12
109_LOW13
10_HOW4
110_LOW13
111_LOW13
112_LOW13
...
88_LOW11
8_HOW4
91_HOW11
92_HOW11
93_HOW11
94_HOW11
97_LOW12
98_LOW12
99_LOW12
9_HOW4
Genus
Abiotrophia
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000e+00
0.000000e+00
...
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000e+00
0.000000
0.0
0.000000
Acaricomes
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000e+00
0.000000e+00
...
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000e+00
0.000000
0.0
0.000000
Acetivibrio
0.000012
0.000000
0.000002
0.000000
0.000000
0.000505
0.000000
0.000101
1.176477e-07
1.754130e-05
...
0.000010
0.000008
0.000001
0.000000
0.000002
0.000000
2.329513e-07
0.000757
0.0
0.000000
Acetobacter
0.000000
0.000002
0.000003
0.000001
0.000008
0.000000
0.000202
0.000185
1.835304e-06
5.814795e-07
...
0.000003
0.000025
0.000003
0.000016
0.000000
0.000008
9.318053e-07
0.000000
0.0
0.000066
Acetobacterium
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000016
0.000000
1.882363e-07
2.907397e-07
...
0.000000
0.000066
0.000000
0.000000
0.000000
0.000000
0.000000e+00
0.000000
0.0
0.000038
5 rows × 88 columns
In [9]:
tmpg.head(2)
Out[9]:
fraction of reads
Kingdom
Phylum
Class
Order
Family
Genus
ID
Archaea
Crenarchaeota
Thermoprotei
Acidilobales
Acidilobaceae
Acidilobus
10_HOW4
0.000002
19_HOW5
0.000091
In [10]:
tmpg.unstack(6).head()
Out[10]:
fraction of reads
ID
100_LOW12
103_HOW12
104_HOW12
105_HOW12
106_HOW12
109_LOW13
10_HOW4
110_LOW13
111_LOW13
112_LOW13
...
88_LOW11
8_HOW4
91_HOW11
92_HOW11
93_HOW11
94_HOW11
97_LOW12
98_LOW12
99_LOW12
9_HOW4
Kingdom
Phylum
Class
Order
Family
Genus
Archaea
Crenarchaeota
Thermoprotei
Acidilobales
Acidilobaceae
Acidilobus
NaN
NaN
NaN
NaN
NaN
NaN
0.000002
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
7.826701e-07
NaN
NaN
NaN
0.000013
Caldisphaeraceae
Caldisphaera
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
0.000002
NaN
NaN
NaN
NaN
NaN
NaN
NaN
Desulfurococcales
Desulfurococcaceae
Aeropyrum
NaN
NaN
NaN
NaN
NaN
NaN
0.000002
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
Desulfurococcus
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0.000014
Ignisphaera
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
6.946126e-07
0.000007
5 rows × 88 columns
In [11]:
import_elviz_data(main_dir = MAIN_DIR, genus_only=False).unstack(6).head()
loading ..//results/reduced_data--all_taxonomy_remains.csv
Out[11]:
fraction of reads
ID
100_LOW12
103_HOW12
104_HOW12
105_HOW12
106_HOW12
109_LOW13
10_HOW4
110_LOW13
111_LOW13
112_LOW13
...
88_LOW11
8_HOW4
91_HOW11
92_HOW11
93_HOW11
94_HOW11
97_LOW12
98_LOW12
99_LOW12
9_HOW4
Kingdom
Phylum
Class
Order
Family
Genus
Archaea
Crenarchaeota
Thermoprotei
Acidilobales
Acidilobaceae
Acidilobus
NaN
NaN
NaN
NaN
NaN
NaN
0.000002
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
7.826701e-07
NaN
NaN
NaN
0.000013
Caldisphaeraceae
Caldisphaera
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
0.000002
NaN
NaN
NaN
NaN
NaN
NaN
NaN
Desulfurococcales
Desulfurococcaceae
Aeropyrum
NaN
NaN
NaN
NaN
NaN
NaN
0.000002
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
Desulfurococcus
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0.000014
Ignisphaera
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
6.946126e-07
0.000007
5 rows × 88 columns
In [12]:
pivot_for_pca(dataframe = import_elviz_data(main_dir = MAIN_DIR, genus_only=False),
genus_only=False).head()
loading ..//results/reduced_data--all_taxonomy_remains.csv
Out[12]:
fraction of reads
ID
100_LOW12
103_HOW12
104_HOW12
105_HOW12
106_HOW12
109_LOW13
10_HOW4
110_LOW13
111_LOW13
112_LOW13
...
88_LOW11
8_HOW4
91_HOW11
92_HOW11
93_HOW11
94_HOW11
97_LOW12
98_LOW12
99_LOW12
9_HOW4
Kingdom
Phylum
Class
Order
Family
Genus
Archaea
Crenarchaeota
Thermoprotei
Acidilobales
Acidilobaceae
Acidilobus
0.0
0.0
0.0
0.0
0.0
0.0
0.000002
0.0
0.0
0.0
...
0.0
0.0
0.000000
0.0
0.0
7.826701e-07
0.0
0.0
0.000000e+00
0.000013
Caldisphaeraceae
Caldisphaera
0.0
0.0
0.0
0.0
0.0
0.0
0.000000
0.0
0.0
0.0
...
0.0
0.0
0.000002
0.0
0.0
0.000000e+00
0.0
0.0
0.000000e+00
0.000000
Desulfurococcales
Desulfurococcaceae
Aeropyrum
0.0
0.0
0.0
0.0
0.0
0.0
0.000002
0.0
0.0
0.0
...
0.0
0.0
0.000000
0.0
0.0
0.000000e+00
0.0
0.0
0.000000e+00
0.000000
Desulfurococcus
0.0
0.0
0.0
0.0
0.0
0.0
0.000000
0.0
0.0
0.0
...
0.0
0.0
0.000000
0.0
0.0
0.000000e+00
0.0
0.0
0.000000e+00
0.000014
Ignisphaera
0.0
0.0
0.0
0.0
0.0
0.0
0.000000
0.0
0.0
0.0
...
0.0
0.0
0.000000
0.0
0.0
0.000000e+00
0.0
0.0
6.946126e-07
0.000007
5 rows × 88 columns
In [13]:
from elviz_pca import sort_by_variance
In [14]:
df = sort_by_variance(main_dir=MAIN_DIR)
loading ..//results/reduced_data--all_taxonomy_remains.csv
In [15]:
df.head()
Out[15]:
ID
100_LOW12
103_HOW12
104_HOW12
105_HOW12
106_HOW12
109_LOW13
10_HOW4
110_LOW13
111_LOW13
112_LOW13
...
8_HOW4
91_HOW11
92_HOW11
93_HOW11
94_HOW11
97_LOW12
98_LOW12
99_LOW12
9_HOW4
variance
Genus
Methylobacter
0.488347
0.095143
0.108404
0.503628
0.623003
0.549671
0.108557
0.624100
0.591209
0.426575
...
0.084548
0.389379
0.233440
0.485999
0.434607
0.574470
0.599362
0.551515
0.140160
0.037289
Methylophilus
0.001117
0.214184
0.644610
0.301190
0.115445
0.001215
0.001388
0.004370
0.001814
0.001615
...
0.001438
0.016662
0.000607
0.071349
0.028545
0.015834
0.037089
0.022149
0.000780
0.020903
Methylosarcina
0.004356
0.001051
0.001257
0.000265
0.000425
0.008294
0.029223
0.008252
0.006342
0.003870
...
0.224498
0.003789
0.003180
0.001436
0.003096
0.003236
0.007644
0.005952
0.027935
0.009851
other
0.290945
0.285678
0.108489
0.134121
0.149535
0.285467
0.344256
0.275103
0.261152
0.293225
...
0.279652
0.281977
0.296174
0.254365
0.265055
0.308956
0.273024
0.276484
0.320078
0.007776
Methylotenera
0.095586
0.302930
0.019329
0.002308
0.011839
0.078632
0.267243
0.002985
0.024772
0.151965
...
0.196150
0.030921
0.180772
0.018022
0.028454
0.030945
0.003611
0.019807
0.253807
0.007023
5 rows × 89 columns
In [16]:
'109_LOW13' in df.columns
Out[16]:
True
In [17]:
from elviz_pca import plot_variance
In [18]:
print(MAIN_DIR)
../
In [19]:
plot_variance(main_dir=MAIN_DIR)
loading ..//results/reduced_data--all_taxonomy_remains.csv
In [20]:
from elviz_pca import run_pca
In [21]:
run_pca(main_dir=MAIN_DIR, top_percent=20)
loading ..//results/reduced_data--all_taxonomy_remains.csv
number of rows to keep: 195
(195, 89)
principal components' contribution to variance:
[ 0.26763483 0.11956674 0.05648729 0.00973877 0.00523754 0.00372123
0.00151147 0.00079967 0.00040653]
(first 10)
../elviz_pca.py:86: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
data.sort(columns='variance', ascending=False, inplace=True)
Out[21]:
(Genus Methylobacter Methylophilus Methylosarcina other \
ID
100_LOW12 0.488347 0.001117 0.004356 0.290945
103_HOW12 0.095143 0.214184 0.001051 0.285678
104_HOW12 0.108404 0.644610 0.001257 0.108489
105_HOW12 0.503628 0.301190 0.000265 0.134121
106_HOW12 0.623003 0.115445 0.000425 0.149535
109_LOW13 0.549671 0.001215 0.008294 0.285467
10_HOW4 0.108557 0.001388 0.029223 0.344256
110_LOW13 0.624100 0.004370 0.008252 0.275103
111_LOW13 0.591209 0.001814 0.006342 0.261152
112_LOW13 0.426575 0.001615 0.003870 0.293225
115_HOW13 0.139907 0.531892 0.002331 0.168982
116_HOW13 0.577665 0.008565 0.002411 0.250966
117_HOW13 0.627980 0.158158 0.000006 0.134911
118_HOW13 0.820344 0.021599 0.000002 0.108220
121_LOW14 0.485900 0.001651 0.005868 0.261519
122_LOW14 0.480962 0.001171 0.005191 0.269987
123_LOW14 0.600027 0.000175 0.007641 0.245795
124_LOW14 0.448345 0.001526 0.003970 0.269143
127_HOW14 0.293167 0.360175 0.004369 0.207862
128_HOW14 0.287315 0.205689 0.003683 0.298903
129_HOW14 0.675325 0.121108 0.000002 0.137863
130_HOW14 0.776829 0.082110 0.000423 0.089942
13_LOW5 0.296807 0.000652 0.012291 0.360497
14_LOW5 0.191132 0.000793 0.010412 0.498935
15_LOW5 0.228825 0.000953 0.008665 0.373895
16_LOW5 0.163048 0.001315 0.005561 0.548044
19_HOW5 0.168388 0.100792 0.028376 0.400307
1_LOW4 0.205558 0.001062 0.013274 0.347778
20_HOW5 0.227233 0.002000 0.105138 0.339238
21_HOW5 0.063475 0.003918 0.362765 0.250594
... ... ... ... ...
61_LOW9 0.259341 0.001694 0.003472 0.298027
62_LOW9 0.217050 0.001391 0.003062 0.359054
63_LOW9 0.560437 0.000028 0.004037 0.308443
64_LOW9 0.328550 0.003173 0.002825 0.325029
67_HOW9 0.346218 0.009057 0.005759 0.478253
68_HOW9 0.300044 0.204176 0.028888 0.334460
69_HOW9 0.564852 0.038001 0.019171 0.282701
70_HOW9 0.165028 0.371372 0.001719 0.362728
73_LOW10 0.416043 0.000760 0.005120 0.276537
74_LOW10 0.409655 0.000278 0.005007 0.305787
75_LOW10 0.568736 0.000124 0.003166 0.243238
76_LOW10 0.531414 0.001128 0.004893 0.263700
79_HOW10 0.037890 0.396289 0.000315 0.430442
7_HOW4 0.070583 0.001432 0.036926 0.380766
80_HOW10 0.546378 0.087030 0.006830 0.283472
81_HOW10 0.592378 0.045446 0.007616 0.240311
82_HOW10 0.589218 0.045236 0.006280 0.241687
85_LOW11 0.507074 0.113178 0.006249 0.291065
86_LOW11 0.527974 0.102666 0.005124 0.277371
87_LOW11 0.604069 0.025104 0.007358 0.239312
88_LOW11 0.387438 0.068999 0.041732 0.308863
8_HOW4 0.084548 0.001438 0.224498 0.279652
91_HOW11 0.389379 0.016662 0.003789 0.281977
92_HOW11 0.233440 0.000607 0.003180 0.296174
93_HOW11 0.485999 0.071349 0.001436 0.254365
94_HOW11 0.434607 0.028545 0.003096 0.265055
97_LOW12 0.574470 0.015834 0.003236 0.308956
98_LOW12 0.599362 0.037089 0.007644 0.273024
99_LOW12 0.551515 0.022149 0.005952 0.276484
9_HOW4 0.140160 0.000780 0.027935 0.320078
Genus Methylotenera Flavobacterium Methylomonas Acidovorax \
ID
100_LOW12 0.095586 0.005594 0.014736 0.013642
103_HOW12 0.302930 0.013650 0.002367 0.024963
104_HOW12 0.019329 0.045588 0.002900 0.027520
105_HOW12 0.002308 0.000029 0.001635 0.018101
106_HOW12 0.011839 0.000216 0.002661 0.032060
109_LOW13 0.078632 0.006842 0.011319 0.004489
10_HOW4 0.267243 0.009625 0.012908 0.064535
110_LOW13 0.002985 0.000440 0.013217 0.002308
111_LOW13 0.024772 0.000724 0.020545 0.006153
112_LOW13 0.151965 0.014213 0.011408 0.022253
115_HOW13 0.009936 0.046919 0.010341 0.021375
116_HOW13 0.075257 0.002467 0.008076 0.030716
117_HOW13 0.000889 0.003578 0.000895 0.028380
118_HOW13 0.002262 0.000936 0.002363 0.032375
121_LOW14 0.170419 0.005126 0.011976 0.004675
122_LOW14 0.167625 0.004891 0.012005 0.004777
123_LOW14 0.042407 0.000704 0.016780 0.002562
124_LOW14 0.178582 0.004939 0.017238 0.007345
127_HOW14 0.005573 0.017450 0.016127 0.038585
128_HOW14 0.089049 0.000072 0.008891 0.026710
129_HOW14 0.000184 0.001955 0.000746 0.036585
130_HOW14 0.015372 0.000837 0.000802 0.023159
13_LOW5 0.101496 0.083633 0.011619 0.013728
14_LOW5 0.096627 0.060451 0.027743 0.014986
15_LOW5 0.089280 0.117195 0.020931 0.022595
16_LOW5 0.126366 0.050466 0.009806 0.019322
19_HOW5 0.033117 0.002275 0.043006 0.001442
1_LOW4 0.185898 0.065548 0.018689 0.015060
20_HOW5 0.125292 0.001835 0.072769 0.006741
21_HOW5 0.168471 0.003863 0.016795 0.025674
... ... ... ... ...
61_LOW9 0.331278 0.006651 0.007525 0.014102
62_LOW9 0.274918 0.001279 0.008003 0.006629
63_LOW9 0.007504 0.003088 0.008996 0.019030
64_LOW9 0.246226 0.000529 0.006297 0.003401
67_HOW9 0.007988 0.005884 0.011834 0.004166
68_HOW9 0.010294 0.030109 0.011053 0.015059
69_HOW9 0.003570 0.002371 0.015089 0.004932
70_HOW9 0.003893 0.007440 0.008325 0.004133
73_LOW10 0.187773 0.003965 0.010439 0.017471
74_LOW10 0.099970 0.001549 0.012147 0.048268
75_LOW10 0.029720 0.003759 0.012660 0.072677
76_LOW10 0.092487 0.000464 0.010041 0.014600
79_HOW10 0.000672 0.055729 0.000761 0.030499
7_HOW4 0.261409 0.062637 0.011203 0.025060
80_HOW10 0.003109 0.001336 0.012387 0.003589
81_HOW10 0.005587 0.000533 0.018573 0.002594
82_HOW10 0.005653 0.000538 0.020742 0.002402
85_LOW11 0.004074 0.013902 0.010533 0.004840
86_LOW11 0.003424 0.009308 0.014454 0.006078
87_LOW11 0.006104 0.000168 0.025904 0.003199
88_LOW11 0.040359 0.008034 0.018548 0.015316
8_HOW4 0.196150 0.023124 0.025193 0.024959
91_HOW11 0.030921 0.078917 0.007996 0.092666
92_HOW11 0.180772 0.033624 0.007741 0.062884
93_HOW11 0.018022 0.000622 0.004140 0.058415
94_HOW11 0.028454 0.002074 0.008123 0.039256
97_LOW12 0.030945 0.007054 0.011502 0.005406
98_LOW12 0.003611 0.001496 0.014469 0.005372
99_LOW12 0.019807 0.000631 0.024177 0.006382
9_HOW4 0.253807 0.036372 0.018612 0.025357
Genus Pseudomonas Bacteriovorax ... Deinococcus \
ID ...
100_LOW12 0.001855 1.025529e-06 ... 4.987309e-05
103_HOW12 0.001656 1.446747e-02 ... 6.072379e-06
104_HOW12 0.000988 1.695339e-02 ... 1.033160e-04
105_HOW12 0.001304 0.000000e+00 ... 4.957451e-05
106_HOW12 0.022840 6.153012e-07 ... 7.347420e-06
109_LOW13 0.001554 3.383238e-07 ... 2.392433e-06
10_HOW4 0.005225 3.064996e-02 ... 6.988440e-05
110_LOW13 0.001507 0.000000e+00 ... 3.979480e-07
111_LOW13 0.002180 4.284728e-04 ... 1.990599e-05
112_LOW13 0.001634 3.246594e-06 ... 8.571977e-05
115_HOW13 0.002117 6.529131e-03 ... 4.788005e-04
116_HOW13 0.001120 5.352354e-07 ... 2.854589e-06
117_HOW13 0.000912 2.144745e-06 ... 1.064017e-05
118_HOW13 0.000862 1.494405e-07 ... 7.258539e-07
121_LOW14 0.001454 3.615421e-05 ... 4.533444e-06
122_LOW14 0.001591 0.000000e+00 ... 8.431820e-06
123_LOW14 0.001715 5.043839e-04 ... 5.071128e-05
124_LOW14 0.001786 3.634653e-06 ... 5.302959e-05
127_HOW14 0.003559 2.767306e-06 ... 8.363412e-06
128_HOW14 0.002172 8.369986e-03 ... 3.495655e-05
129_HOW14 0.000619 1.545167e-07 ... 1.228408e-05
130_HOW14 0.000575 0.000000e+00 ... 5.494032e-07
13_LOW5 0.005804 7.257288e-05 ... 9.557242e-05
14_LOW5 0.004226 7.652634e-04 ... 5.609193e-05
15_LOW5 0.006466 1.746223e-02 ... 9.084235e-05
16_LOW5 0.002843 7.803558e-06 ... 3.373151e-05
19_HOW5 0.007375 1.312414e-04 ... 8.530689e-04
1_LOW4 0.006089 6.063271e-06 ... 1.321793e-04
20_HOW5 0.003867 8.555681e-05 ... 8.795311e-05
21_HOW5 0.003073 1.627067e-02 ... 1.581507e-04
... ... ... ... ...
61_LOW9 0.002201 9.356987e-03 ... 1.833224e-05
62_LOW9 0.003392 2.869770e-02 ... 7.713761e-05
63_LOW9 0.002061 2.380148e-02 ... 5.843710e-05
64_LOW9 0.003843 3.078009e-06 ... 3.951767e-05
67_HOW9 0.001934 3.334894e-05 ... 2.765252e-05
68_HOW9 0.001393 2.261371e-05 ... 3.034101e-05
69_HOW9 0.002013 4.148536e-06 ... 1.093312e-05
70_HOW9 0.001512 7.398572e-06 ... 1.466975e-04
73_LOW10 0.003266 6.214532e-03 ... 3.544825e-05
74_LOW10 0.003773 1.685557e-02 ... 3.161783e-05
75_LOW10 0.001202 3.089562e-03 ... 1.553640e-05
76_LOW10 0.003867 5.593907e-06 ... 1.800965e-05
79_HOW10 0.001704 5.565752e-06 ... 3.943597e-05
7_HOW4 0.003147 1.890409e-02 ... 8.004216e-05
80_HOW10 0.001448 2.000265e-06 ... 1.236230e-05
81_HOW10 0.001973 1.173321e-06 ... 2.642319e-05
82_HOW10 0.002216 7.440835e-07 ... 2.415791e-05
85_LOW11 0.002109 1.203665e-06 ... 1.700662e-05
86_LOW11 0.001528 6.765265e-07 ... 7.780055e-06
87_LOW11 0.002263 6.675595e-07 ... 5.674256e-06
88_LOW11 0.001571 0.000000e+00 ... 1.053374e-05
8_HOW4 0.004678 2.109896e-02 ... 7.317771e-05
91_HOW11 0.003133 2.035730e-02 ... 3.917161e-05
92_HOW11 0.060534 2.648556e-02 ... 2.100244e-05
93_HOW11 0.027832 2.001212e-06 ... 1.628259e-05
94_HOW11 0.091009 6.087434e-07 ... 3.348089e-06
97_LOW12 0.001333 5.823783e-07 ... 5.552006e-06
98_LOW12 0.001795 2.357715e-07 ... 2.711372e-06
99_LOW12 0.001674 8.580509e-07 ... 7.313862e-06
9_HOW4 0.008610 1.757489e-02 ... 2.513789e-04
Genus Saccharospirillum Denitrovibrio Pseudoxanthomonas Thiocapsa \
ID
100_LOW12 2.860686e-06 4.318017e-07 7.324436e-05 1.149672e-05
103_HOW12 3.881314e-06 1.001630e-06 2.015779e-05 6.385388e-05
104_HOW12 2.823268e-05 0.000000e+00 1.356058e-05 3.117821e-05
105_HOW12 7.813846e-07 3.472821e-07 3.286157e-05 1.128667e-06
106_HOW12 0.000000e+00 5.791070e-07 6.587342e-06 8.324663e-07
109_LOW13 4.349878e-07 2.899919e-06 1.464459e-05 6.113995e-06
10_HOW4 0.000000e+00 1.658274e-05 3.833412e-05 2.293587e-05
110_LOW13 3.122361e-06 0.000000e+00 5.816163e-07 1.530569e-07
111_LOW13 6.512975e-05 6.642858e-04 1.047064e-05 8.745928e-05
112_LOW13 1.889808e-06 2.093326e-04 5.606431e-05 1.362116e-04
115_HOW13 5.975797e-06 0.000000e+00 6.917189e-06 9.782297e-06
116_HOW13 0.000000e+00 0.000000e+00 1.413735e-04 2.033895e-06
117_HOW13 0.000000e+00 0.000000e+00 7.024738e-05 1.108583e-05
118_HOW13 5.764134e-07 2.561837e-07 1.067432e-06 1.216873e-06
121_LOW14 8.216867e-07 0.000000e+00 2.011716e-05 1.011525e-05
122_LOW14 7.432303e-07 2.819150e-07 1.499275e-05 5.125727e-06
123_LOW14 6.626577e-05 0.000000e+00 1.523612e-05 8.504941e-06
124_LOW14 0.000000e+00 0.000000e+00 4.005388e-05 1.579620e-04
127_HOW14 4.304698e-06 0.000000e+00 1.236063e-05 8.547899e-06
128_HOW14 5.064393e-05 0.000000e+00 6.120797e-06 1.963189e-05
129_HOW14 0.000000e+00 0.000000e+00 5.454441e-05 6.154916e-06
130_HOW14 0.000000e+00 0.000000e+00 8.241047e-07 4.512954e-07
13_LOW5 6.029609e-05 8.391724e-06 2.840754e-04 4.273563e-05
14_LOW5 3.555484e-05 8.163494e-05 1.309240e-04 1.488939e-04
15_LOW5 2.123587e-05 3.391841e-06 1.187144e-04 5.884107e-05
16_LOW5 2.240376e-05 2.026408e-05 2.237859e-04 6.998029e-05
19_HOW5 2.763254e-04 0.000000e+00 3.730129e-04 1.134889e-04
1_LOW4 6.184536e-05 7.795634e-06 2.886117e-04 9.112230e-05
20_HOW5 1.393405e-05 1.686286e-06 5.680120e-06 1.380979e-04
21_HOW5 1.131144e-05 0.000000e+00 4.294158e-06 9.384306e-05
... ... ... ... ...
61_LOW9 1.499004e-04 3.130199e-05 1.471568e-05 2.519124e-05
62_LOW9 4.245494e-05 3.635413e-05 3.576912e-05 2.976860e-04
63_LOW9 1.945811e-06 3.640550e-06 4.644840e-06 9.666288e-06
64_LOW9 5.599991e-05 1.687941e-06 1.191488e-05 5.113467e-05
67_HOW9 0.000000e+00 1.050796e-06 1.648090e-05 1.083979e-05
68_HOW9 2.670463e-06 0.000000e+00 1.210231e-05 7.784116e-06
69_HOW9 7.778504e-07 0.000000e+00 4.839958e-06 4.221999e-05
70_HOW9 1.287156e-04 0.000000e+00 5.208399e-05 6.026651e-06
73_LOW10 2.489045e-04 4.518029e-05 2.099763e-05 1.578361e-04
74_LOW10 2.141853e-05 1.083166e-04 1.529895e-05 2.051079e-04
75_LOW10 0.000000e+00 5.288988e-07 1.818090e-05 5.024538e-06
76_LOW10 4.720712e-05 2.701448e-05 5.184597e-06 3.097114e-05
79_HOW10 0.000000e+00 1.474686e-06 7.780636e-04 2.673464e-05
7_HOW4 4.664111e-06 0.000000e+00 6.048299e-05 1.193110e-04
80_HOW10 4.262861e-07 0.000000e+00 5.344972e-06 1.298533e-05
81_HOW10 8.842149e-05 0.000000e+00 5.444210e-06 8.865615e-05
82_HOW10 9.043094e-05 0.000000e+00 8.581763e-06 1.458404e-05
85_LOW11 3.494511e-07 0.000000e+00 3.063522e-04 9.435181e-06
86_LOW11 1.479902e-06 0.000000e+00 1.202526e-04 4.676489e-05
87_LOW11 8.874833e-05 0.000000e+00 8.047801e-06 8.322242e-05
88_LOW11 0.000000e+00 0.000000e+00 2.827478e-05 1.570821e-05
8_HOW4 7.567240e-05 0.000000e+00 1.496817e-05 5.621379e-05
91_HOW11 1.491565e-04 4.117425e-05 5.839694e-05 1.834417e-05
92_HOW11 2.268660e-05 3.269247e-06 1.248258e-05 2.922509e-04
93_HOW11 0.000000e+00 0.000000e+00 5.198602e-05 4.684655e-06
94_HOW11 1.156612e-05 4.696020e-06 8.044109e-06 3.261125e-06
97_LOW12 6.212035e-07 1.164757e-06 2.787651e-05 7.765044e-06
98_LOW12 2.357715e-07 0.000000e+00 5.658516e-06 1.414629e-06
99_LOW12 8.171913e-05 6.289513e-04 6.741828e-06 2.737591e-06
9_HOW4 1.508273e-05 3.961122e-06 2.190805e-04 7.495661e-05
Genus Chlorobaculum Thiorhodospira Pseudorhodobacter Thermomonospora \
ID
100_LOW12 0.000000e+00 2.491496e-04 5.473086e-05 2.698760e-07
103_HOW12 1.377241e-05 5.133351e-06 1.401655e-04 1.383501e-05
104_HOW12 3.034457e-05 2.778807e-06 1.834013e-06 7.102631e-05
105_HOW12 0.000000e+00 0.000000e+00 2.078049e-04 0.000000e+00
106_HOW12 0.000000e+00 5.501517e-06 2.747139e-04 1.445958e-04
109_LOW13 0.000000e+00 3.331040e-04 1.691619e-07 1.933279e-07
10_HOW4 0.000000e+00 7.193032e-05 3.768804e-06 0.000000e+00
110_LOW13 0.000000e+00 4.477221e-04 2.448911e-07 0.000000e+00
111_LOW13 0.000000e+00 1.277654e-05 2.258835e-05 4.470612e-07
112_LOW13 0.000000e+00 2.001743e-04 4.680910e-05 1.599069e-06
115_HOW13 1.125578e-05 1.964645e-06 4.747893e-06 1.799288e-04
116_HOW13 4.638707e-07 1.248883e-06 0.000000e+00 3.218549e-05
117_HOW13 1.030592e-06 8.634690e-07 1.181560e-04 7.019167e-06
118_HOW13 0.000000e+00 0.000000e+00 1.707892e-07 0.000000e+00
121_LOW14 0.000000e+00 2.126185e-04 4.816784e-07 0.000000e+00
122_LOW14 0.000000e+00 2.177665e-04 4.869440e-07 0.000000e+00
123_LOW14 4.548097e-07 6.690250e-05 6.048968e-06 2.728858e-07
124_LOW14 0.000000e+00 2.013961e-04 9.646370e-05 5.451980e-07
127_HOW14 2.398331e-06 0.000000e+00 0.000000e+00 3.811502e-04
128_HOW14 7.027582e-06 7.027582e-06 3.491121e-06 0.000000e+00
129_HOW14 0.000000e+00 0.000000e+00 1.845960e-04 4.712760e-06
130_HOW14 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
13_LOW5 9.634943e-05 4.739770e-05 0.000000e+00 3.387770e-05
14_LOW5 7.585888e-05 3.388620e-05 0.000000e+00 0.000000e+00
15_LOW5 9.806845e-05 1.459966e-05 0.000000e+00 0.000000e+00
16_LOW5 1.189413e-04 1.308984e-05 0.000000e+00 6.129569e-05
19_HOW5 1.426537e-05 1.120095e-04 0.000000e+00 4.850225e-05
1_LOW4 3.412755e-05 1.067136e-04 0.000000e+00 3.187548e-05
20_HOW5 0.000000e+00 3.425823e-05 0.000000e+00 0.000000e+00
21_HOW5 0.000000e+00 2.022444e-04 1.361562e-06 0.000000e+00
... ... ... ... ...
61_LOW9 1.101805e-04 1.138594e-04 8.336804e-05 1.629325e-04
62_LOW9 1.593732e-04 5.515799e-05 5.515799e-06 1.638025e-05
63_LOW9 1.001779e-04 1.795168e-05 1.192594e-06 6.829170e-05
64_LOW9 1.631345e-04 3.455314e-05 3.375881e-06 2.104961e-04
67_HOW9 7.742705e-07 1.825619e-04 4.369098e-06 6.636604e-07
68_HOW9 5.170471e-06 1.174436e-04 0.000000e+00 1.136367e-06
69_HOW9 0.000000e+00 2.769580e-04 0.000000e+00 5.185669e-07
70_HOW9 0.000000e+00 2.871234e-05 1.974586e-05 1.156333e-05
73_LOW10 1.640292e-04 4.346981e-05 3.104228e-04 2.923153e-04
74_LOW10 2.083717e-04 2.702815e-05 2.743612e-05 1.341208e-04
75_LOW10 3.761792e-05 3.371730e-06 0.000000e+00 1.738755e-05
76_LOW10 1.237481e-04 2.121592e-05 5.457470e-07 1.508308e-04
79_HOW10 0.000000e+00 1.018009e-05 8.562695e-07 1.907578e-05
7_HOW4 0.000000e+00 4.363201e-06 7.003689e-04 1.895735e-05
80_HOW10 3.279124e-07 2.426551e-04 7.214072e-07 0.000000e+00
81_HOW10 2.815971e-06 1.553477e-05 1.412679e-05 0.000000e+00
82_HOW10 6.944779e-07 1.875090e-05 1.612181e-05 0.000000e+00
85_LOW11 1.164837e-06 4.022183e-04 0.000000e+00 0.000000e+00
86_LOW11 0.000000e+00 3.085807e-04 0.000000e+00 2.959803e-07
87_LOW11 0.000000e+00 6.449367e-05 8.010714e-06 0.000000e+00
88_LOW11 0.000000e+00 2.882919e-04 1.355218e-06 1.478420e-06
8_HOW4 0.000000e+00 2.544589e-05 0.000000e+00 0.000000e+00
91_HOW11 2.155640e-04 1.610121e-05 9.965130e-05 2.528932e-04
92_HOW11 2.286492e-04 3.774495e-05 3.200891e-04 1.071917e-04
93_HOW11 1.819283e-06 1.046088e-06 2.197240e-04 1.122953e-04
94_HOW11 1.095738e-05 5.304764e-06 0.000000e+00 1.529685e-04
97_LOW12 0.000000e+00 2.378045e-04 5.047278e-07 0.000000e+00
98_LOW12 0.000000e+00 2.636318e-04 1.886172e-06 0.000000e+00
99_LOW12 3.677361e-07 1.135896e-05 4.629389e-05 1.144068e-06
9_HOW4 0.000000e+00 2.254792e-05 0.000000e+00 2.102442e-05
Genus Acetivibrio
ID
100_LOW12 1.176660e-05
103_HOW12 0.000000e+00
104_HOW12 1.834013e-06
105_HOW12 0.000000e+00
106_HOW12 0.000000e+00
109_LOW13 5.048758e-04
10_HOW4 0.000000e+00
110_LOW13 1.008339e-04
111_LOW13 1.176477e-07
112_LOW13 1.754130e-05
115_HOW13 0.000000e+00
116_HOW13 0.000000e+00
117_HOW13 8.634690e-07
118_HOW13 0.000000e+00
121_LOW14 8.777880e-05
122_LOW14 0.000000e+00
123_LOW14 2.501453e-06
124_LOW14 5.415633e-06
127_HOW14 1.660383e-06
128_HOW14 0.000000e+00
129_HOW14 1.545167e-07
130_HOW14 0.000000e+00
13_LOW5 2.486437e-05
14_LOW5 3.876376e-05
15_LOW5 0.000000e+00
16_LOW5 0.000000e+00
19_HOW5 1.141229e-05
1_LOW4 6.756216e-06
20_HOW5 2.218797e-06
21_HOW5 6.598340e-06
... ...
61_LOW9 0.000000e+00
62_LOW9 1.053016e-05
63_LOW9 8.787534e-07
64_LOW9 2.283684e-06
67_HOW9 9.401856e-07
68_HOW9 6.846613e-05
69_HOW9 1.199186e-04
70_HOW9 1.665904e-06
73_LOW10 0.000000e+00
74_LOW10 0.000000e+00
75_LOW10 0.000000e+00
76_LOW10 0.000000e+00
79_HOW10 0.000000e+00
7_HOW4 1.068232e-05
80_HOW10 1.094571e-04
81_HOW10 0.000000e+00
82_HOW10 0.000000e+00
85_LOW11 0.000000e+00
86_LOW11 1.259185e-04
87_LOW11 0.000000e+00
88_LOW11 1.041054e-05
8_HOW4 8.315649e-06
91_HOW11 1.441900e-06
92_HOW11 0.000000e+00
93_HOW11 1.819283e-06
94_HOW11 0.000000e+00
97_LOW12 2.329513e-07
98_LOW12 7.574945e-04
99_LOW12 0.000000e+00
9_HOW4 0.000000e+00
[88 rows x 195 columns],
array([[ -1.45183902e-01, -4.43559278e-02, -6.18029430e-03, ...,
-1.10379246e-05, -1.20558162e-05, -4.30583905e-18],
[ 3.04310547e-01, -5.99313942e-02, -1.06616197e-01, ...,
-2.93373287e-06, 2.56006189e-06, 6.08079659e-18],
[ 4.16983266e-01, 4.62073675e-01, -1.41411252e-01, ...,
-2.61051774e-05, -1.28536286e-05, -4.79587408e-18],
...,
[ -2.40217437e-01, 5.57753502e-02, 5.96392185e-03, ...,
2.38252590e-06, -1.30558343e-06, 5.90263380e-17],
[ -2.01587763e-01, 2.42893633e-02, 4.39742628e-03, ...,
5.30032061e-05, 6.26252750e-06, 1.70798900e-18],
[ 1.89478030e-01, -2.09399049e-01, -2.76366128e-02, ...,
1.48036863e-05, -1.46649930e-05, -3.06181764e-17]]),
array([ 5.33528314e-01, 2.67634832e-01, 1.19566736e-01,
5.64872882e-02, 9.73877069e-03, 5.23753878e-03,
3.72122950e-03, 1.51146779e-03, 7.99670204e-04,
4.06529861e-04, 2.52276494e-04, 2.26661401e-04,
2.11853247e-04, 1.22702604e-04, 1.17321196e-04,
8.07104363e-05, 6.77714287e-05, 4.69968256e-05,
3.56713590e-05, 2.69334278e-05, 2.44514171e-05,
2.07915042e-05, 1.95676424e-05, 1.54009765e-05,
1.23611121e-05, 1.08871656e-05, 9.61299243e-06,
6.70202907e-06, 6.64270681e-06, 5.79694538e-06,
4.56282719e-06, 4.45952680e-06, 3.64203895e-06,
3.45469255e-06, 3.01981071e-06, 2.92312261e-06,
2.21611075e-06, 2.02000013e-06, 1.68889186e-06,
1.53493008e-06, 1.47709182e-06, 1.26335529e-06,
1.08088399e-06, 9.59795506e-07, 9.55174931e-07,
9.30004095e-07, 8.49260478e-07, 7.70955738e-07,
6.82049835e-07, 6.54014192e-07, 6.12417644e-07,
5.70379049e-07, 4.55756295e-07, 4.29478122e-07,
4.04513930e-07, 3.56006056e-07, 3.48711849e-07,
3.40547146e-07, 3.14090379e-07, 2.92223847e-07,
2.84528979e-07, 2.53052760e-07, 2.41606005e-07,
2.19906645e-07, 2.12612391e-07, 1.95247436e-07,
1.74436710e-07, 1.63810321e-07, 1.49118227e-07,
1.40312196e-07, 1.23439194e-07, 1.22591391e-07,
1.11688771e-07, 1.07695317e-07, 9.16945284e-08,
8.84770241e-08, 8.14949411e-08, 6.90421862e-08,
6.66115348e-08, 6.55742463e-08, 5.77192858e-08,
5.40072522e-08, 5.09158840e-08, 4.28843181e-08,
2.90590699e-08, 2.72866010e-08, 1.66379267e-08,
4.85593985e-33]))
In [22]:
from elviz_pca import plot_pca_results
In [23]:
import elviz_utils
tmp = elviz_utils.read_sample_info(main_dir=MAIN_DIR)
In [24]:
tmp.head()
Out[24]:
ID
oxy
rep
week
project
0
1_LOW4
Low
1
4
1056013
1
13_LOW5
Low
1
5
1056037
2
25_LOW6
Low
1
6
1056061
3
37_LOW7
Low
1
7
1056085
4
49_LOW8
Low
1
8
1056109
In [25]:
type(tmp['rep'][0])
Out[25]:
numpy.int64
In [26]:
import elviz_pca
pca_input, data_transformed, variances = elviz_pca.run_pca(
main_dir=MAIN_DIR, top_percent=20, genus_only=False)
loading ..//results/reduced_data--all_taxonomy_remains.csv
number of rows to keep: 223
(223, 89)
principal components' contribution to variance:
[ 0.25107297 0.11279974 0.02895022 0.01037814 0.00521771 0.00357079
0.00308673 0.00267789 0.00149589]
(first 10)
../elviz_pca.py:86: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
data.sort(columns='variance', ascending=False, inplace=True)
In [27]:
elviz_pca.colnames_to_sample_info_array(pca_input, main_dir=MAIN_DIR)
Out[27]:
ID
oxy
rep
week
project
0
100_LOW12
Low
4
12
1056214
1
103_HOW12
High
1
12
1056217
2
104_HOW12
High
2
12
1056220
3
105_HOW12
High
3
12
1056223
4
106_HOW12
High
4
12
1056226
5
109_LOW13
Low
1
13
1056229
6
10_HOW4
High
4
4
1056034
7
110_LOW13
Low
2
13
1056232
8
111_LOW13
Low
3
13
1056235
9
112_LOW13
Low
4
13
1056238
10
115_HOW13
High
1
13
1056241
11
116_HOW13
High
2
13
1056244
12
117_HOW13
High
3
13
1056247
13
118_HOW13
High
4
13
1056250
14
121_LOW14
Low
1
14
1056253
15
122_LOW14
Low
2
14
1056256
16
123_LOW14
Low
3
14
1056259
17
124_LOW14
Low
4
14
1056262
18
127_HOW14
High
1
14
1056265
19
128_HOW14
High
2
14
1056268
20
129_HOW14
High
3
14
1056271
21
130_HOW14
High
4
14
1056274
22
13_LOW5
Low
1
5
1056037
23
14_LOW5
Low
2
5
1056040
24
15_LOW5
Low
3
5
1056043
25
16_LOW5
Low
4
5
1056046
26
19_HOW5
High
1
5
1056049
27
1_LOW4
Low
1
4
1056013
28
20_HOW5
High
2
5
1056052
29
21_HOW5
High
3
5
1056055
...
...
...
...
...
...
58
61_LOW9
Low
1
9
1056133
59
62_LOW9
Low
2
9
1056136
60
63_LOW9
Low
3
9
1056139
61
64_LOW9
Low
4
9
1056142
62
67_HOW9
High
1
9
1056145
63
68_HOW9
High
2
9
1056148
64
69_HOW9
High
3
9
1056151
65
70_HOW9
High
4
9
1056154
66
73_LOW10
Low
1
10
1056157
67
74_LOW10
Low
2
10
1056160
68
75_LOW10
Low
3
10
1056163
69
76_LOW10
Low
4
10
1056166
70
79_HOW10
High
1
10
1056169
71
7_HOW4
High
1
4
1056025
72
80_HOW10
High
2
10
1056172
73
81_HOW10
High
3
10
1056175
74
82_HOW10
High
4
10
1056178
75
85_LOW11
Low
1
11
1056181
76
86_LOW11
Low
2
11
1056184
77
87_LOW11
Low
3
11
1056187
78
88_LOW11
Low
4
11
1056190
79
8_HOW4
High
2
4
1056028
80
91_HOW11
High
1
11
1056193
81
92_HOW11
High
2
11
1056196
82
93_HOW11
High
3
11
1056199
83
94_HOW11
High
4
11
1056202
84
97_LOW12
Low
1
12
1056205
85
98_LOW12
Low
2
12
1056208
86
99_LOW12
Low
3
12
1056211
87
9_HOW4
High
3
4
1056031
88 rows × 5 columns
In [28]:
pca_input.tail(88)
Out[28]:
Kingdom
Bacteria
Phylum
Proteobacteria
Bacteroidetes
Proteobacteria
Bacteroidetes
Deinococcus-Thermus
Proteobacteria
Deferribacteres
Proteobacteria
Chlorobi
Proteobacteria
Class
Gammaproteobacteria
Betaproteobacteria
Gammaproteobacteria
Betaproteobacteria
Flavobacteriia
Gammaproteobacteria
Betaproteobacteria
Gammaproteobacteria
...
Betaproteobacteria
Sphingobacteriia
Deinococci
Gammaproteobacteria
Deltaproteobacteria
Deferribacteres
Gammaproteobacteria
Chlorobia
Gammaproteobacteria
Order
Methylococcales
Methylophilales
Methylococcales
Methylophilales
Flavobacteriales
Methylococcales
Burkholderiales
Pseudomonadales
...
Burkholderiales
Sphingobacteriales
Deinococcales
Oceanospirillales
Desulfobacterales
Deferribacterales
Xanthomonadales
Chromatiales
Chlorobiales
Chromatiales
Family
Methylococcaceae
Methylophilaceae
Methylococcaceae
Methylophilaceae
Flavobacteriaceae
Methylococcaceae
Comamonadaceae
Pseudomonadaceae
...
Comamonadaceae
Sphingobacteriaceae
Deinococcaceae
Saccharospirillaceae
Desulfobacteraceae
Deferribacteraceae
Xanthomonadaceae
Chromatiaceae
Chlorobiaceae
Ectothiorhodospiraceae
Genus
Methylobacter
Methylophilus
Methylosarcina
Methylotenera
Flavobacterium
other
Methylomonas
other
Acidovorax
Pseudomonas
...
Verminephrobacter
Sphingobacterium
Deinococcus
Saccharospirillum
other
Denitrovibrio
Pseudoxanthomonas
Thiocapsa
Chlorobaculum
Thiorhodospira
ID
fraction of reads
100_LOW12
0.488347
0.001117
0.004356
0.095586
0.005594
0.070054
0.014736
0.023301
0.013642
0.001855
...
0.000037
0.000071
4.987309e-05
2.860686e-06
1.537754e-04
4.318017e-07
7.324436e-05
1.149672e-05
0.000000e+00
2.491496e-04
103_HOW12
0.095143
0.214184
0.001051
0.302930
0.013650
0.015208
0.002367
0.044032
0.024963
0.001656
...
0.000058
0.000017
6.072379e-06
3.881314e-06
2.748221e-05
1.001630e-06
2.015779e-05
6.385388e-05
1.377241e-05
5.133351e-06
104_HOW12
0.108404
0.644610
0.001257
0.019329
0.045588
0.010212
0.002900
0.013116
0.027520
0.000988
...
0.000019
0.000105
1.033160e-04
2.823268e-05
1.595035e-05
0.000000e+00
1.356058e-05
3.117821e-05
3.034457e-05
2.778807e-06
105_HOW12
0.503628
0.301190
0.000265
0.002308
0.000029
0.017908
0.001635
0.015355
0.018101
0.001304
...
0.000071
0.000003
4.957451e-05
7.813846e-07
0.000000e+00
3.472821e-07
3.286157e-05
1.128667e-06
0.000000e+00
0.000000e+00
106_HOW12
0.623003
0.115445
0.000425
0.011839
0.000216
0.034644
0.002661
0.007422
0.032060
0.022840
...
0.000039
0.000006
7.347420e-06
0.000000e+00
6.514954e-07
5.791070e-07
6.587342e-06
8.324663e-07
0.000000e+00
5.501517e-06
109_LOW13
0.549671
0.001215
0.008294
0.078632
0.006842
0.101824
0.011319
0.003845
0.004489
0.001554
...
0.000201
0.000001
2.392433e-06
4.349878e-07
5.099023e-06
2.899919e-06
1.464459e-05
6.113995e-06
0.000000e+00
3.331040e-04
10_HOW4
0.108557
0.001388
0.029223
0.267243
0.009625
0.086553
0.012908
0.027507
0.064535
0.005225
...
0.000328
0.000268
6.988440e-05
0.000000e+00
6.579256e-05
1.658274e-05
3.833412e-05
2.293587e-05
0.000000e+00
7.193032e-05
110_LOW13
0.624100
0.004370
0.008252
0.002985
0.000440
0.096872
0.013217
0.002676
0.002308
0.001507
...
0.000177
0.000001
3.979480e-07
3.122361e-06
1.385471e-04
0.000000e+00
5.816163e-07
1.530569e-07
0.000000e+00
4.477221e-04
111_LOW13
0.591209
0.001814
0.006342
0.024772
0.000724
0.045191
0.020545
0.065785
0.006153
0.002180
...
0.000029
0.000010
1.990599e-05
6.512975e-05
1.517890e-04
6.642858e-04
1.047064e-05
8.745928e-05
0.000000e+00
1.277654e-05
112_LOW13
0.426575
0.001615
0.003870
0.151965
0.014213
0.060510
0.011408
0.033445
0.022253
0.001634
...
0.000090
0.000047
8.571977e-05
1.889808e-06
3.280513e-05
2.093326e-04
5.606431e-05
1.362116e-04
0.000000e+00
2.001743e-04
115_HOW13
0.139907
0.531892
0.002331
0.009936
0.046919
0.020536
0.010341
0.019402
0.021375
0.002117
...
0.000021
0.000102
4.788005e-04
5.975797e-06
7.940442e-06
0.000000e+00
6.917189e-06
9.782297e-06
1.125578e-05
1.964645e-06
116_HOW13
0.577665
0.008565
0.002411
0.075257
0.002467
0.033401
0.008076
0.002461
0.030716
0.001120
...
0.000072
0.000003
2.854589e-06
0.000000e+00
1.106153e-06
0.000000e+00
1.413735e-04
2.033895e-06
4.638707e-07
1.248883e-06
117_HOW13
0.627980
0.158158
0.000006
0.000889
0.003578
0.023004
0.000895
0.013047
0.028380
0.000912
...
0.000095
0.000008
1.064017e-05
0.000000e+00
8.356151e-07
0.000000e+00
7.024738e-05
1.108583e-05
1.030592e-06
8.634690e-07
118_HOW13
0.820344
0.021599
0.000002
0.002262
0.000936
0.036120
0.002363
0.001366
0.032375
0.000862
...
0.000022
0.000002
7.258539e-07
5.764134e-07
1.494405e-07
2.561837e-07
1.067432e-06
1.216873e-06
0.000000e+00
0.000000e+00
121_LOW14
0.485900
0.001651
0.005868
0.170419
0.005126
0.073538
0.011976
0.004159
0.004675
0.001454
...
0.000156
0.000003
4.533444e-06
8.216867e-07
6.035147e-05
0.000000e+00
2.011716e-05
1.011525e-05
0.000000e+00
2.126185e-04
122_LOW14
0.480962
0.001171
0.005191
0.167625
0.004891
0.080840
0.012005
0.004433
0.004777
0.001591
...
0.000195
0.000002
8.431820e-06
7.432303e-07
3.972438e-06
2.819150e-07
1.499275e-05
5.125727e-06
0.000000e+00
2.177665e-04
123_LOW14
0.600027
0.000175
0.007641
0.042407
0.000704
0.052921
0.016780
0.033425
0.002562
0.001715
...
0.000010
0.000058
5.071128e-05
6.626577e-05
1.263461e-04
0.000000e+00
1.523612e-05
8.504941e-06
4.548097e-07
6.690250e-05
124_LOW14
0.448345
0.001526
0.003970
0.178582
0.004939
0.057056
0.017238
0.017823
0.007345
0.001786
...
0.000026
0.000019
5.302959e-05
0.000000e+00
5.775464e-05
0.000000e+00
4.005388e-05
1.579620e-04
0.000000e+00
2.013961e-04
127_HOW14
0.293167
0.360175
0.004369
0.005573
0.017450
0.025932
0.016127
0.034102
0.038585
0.003559
...
0.000035
0.000077
8.363412e-06
4.304698e-06
4.661373e-05
0.000000e+00
1.236063e-05
8.547899e-06
2.398331e-06
0.000000e+00
128_HOW14
0.287315
0.205689
0.003683
0.089049
0.000072
0.039051
0.008891
0.027352
0.026710
0.002172
...
0.000038
0.000007
3.495655e-05
5.064393e-05
5.767151e-05
0.000000e+00
6.120797e-06
1.963189e-05
7.027582e-06
7.027582e-06
129_HOW14
0.675325
0.121108
0.000002
0.000184
0.001955
0.027595
0.000746
0.010546
0.036585
0.000619
...
0.000081
0.000013
1.228408e-05
0.000000e+00
1.570920e-06
0.000000e+00
5.454441e-05
6.154916e-06
0.000000e+00
0.000000e+00
130_HOW14
0.776829
0.082110
0.000423
0.015372
0.000837
0.009626
0.000802
0.002346
0.023159
0.000575
...
0.000009
0.000004
5.494032e-07
0.000000e+00
0.000000e+00
0.000000e+00
8.241047e-07
4.512954e-07
0.000000e+00
0.000000e+00
13_LOW5
0.296807
0.000652
0.012291
0.101496
0.083633
0.054318
0.011619
0.021709
0.013728
0.005804
...
0.000068
0.000138
9.557242e-05
6.029609e-05
3.731209e-04
8.391724e-06
2.840754e-04
4.273563e-05
9.634943e-05
4.739770e-05
14_LOW5
0.191132
0.000793
0.010412
0.096627
0.060451
0.042446
0.027743
0.023251
0.014986
0.004226
...
0.000083
0.000267
5.609193e-05
3.555484e-05
1.197569e-04
8.163494e-05
1.309240e-04
1.488939e-04
7.585888e-05
3.388620e-05
15_LOW5
0.228825
0.000953
0.008665
0.089280
0.117195
0.041856
0.020931
0.022999
0.022595
0.006466
...
0.000084
0.000115
9.084235e-05
2.123587e-05
7.034384e-05
3.391841e-06
1.187144e-04
5.884107e-05
9.806845e-05
1.459966e-05
16_LOW5
0.163048
0.001315
0.005561
0.126366
0.050466
0.027777
0.009806
0.019255
0.019322
0.002843
...
0.000043
0.000139
3.373151e-05
2.240376e-05
1.422261e-04
2.026408e-05
2.237859e-04
6.998029e-05
1.189413e-04
1.308984e-05
19_HOW5
0.168388
0.100792
0.028376
0.033117
0.002275
0.046880
0.043006
0.001692
0.001442
0.007375
...
0.000027
0.000082
8.530689e-04
2.763254e-04
1.641046e-04
0.000000e+00
3.730129e-04
1.134889e-04
1.426537e-05
1.120095e-04
1_LOW4
0.205558
0.001062
0.013274
0.185898
0.065548
0.052938
0.018689
0.024610
0.015060
0.006089
...
0.000069
0.000063
1.321793e-04
6.184536e-05
6.912129e-05
7.795634e-06
2.886117e-04
9.112230e-05
3.412755e-05
1.067136e-04
20_HOW5
0.227233
0.002000
0.105138
0.125292
0.001835
0.151221
0.072769
0.009883
0.006741
0.003867
...
0.000029
0.000201
8.795311e-05
1.393405e-05
2.209922e-05
1.686286e-06
5.680120e-06
1.380979e-04
0.000000e+00
3.425823e-05
21_HOW5
0.063475
0.003918
0.362765
0.168471
0.003863
0.071065
0.016795
0.017478
0.025674
0.003073
...
0.000087
0.000036
1.581507e-04
1.131144e-05
1.663201e-04
0.000000e+00
4.294158e-06
9.384306e-05
0.000000e+00
2.022444e-04
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
61_LOW9
0.259341
0.001694
0.003472
0.331278
0.006651
0.020826
0.007525
0.034864
0.014102
0.002201
...
0.000035
0.000133
1.833224e-05
1.499004e-04
1.106170e-04
3.130199e-05
1.471568e-05
2.519124e-05
1.101805e-04
1.138594e-04
62_LOW9
0.217050
0.001391
0.003062
0.274918
0.001279
0.029445
0.008003
0.044680
0.006629
0.003392
...
0.000078
0.000117
7.713761e-05
4.245494e-05
1.469208e-04
3.635413e-05
3.576912e-05
2.976860e-04
1.593732e-04
5.515799e-05
63_LOW9
0.560437
0.000028
0.004037
0.007504
0.003088
0.031230
0.008996
0.063068
0.019030
0.002061
...
0.000156
0.000048
5.843710e-05
1.945811e-06
1.550372e-05
3.640550e-06
4.644840e-06
9.666288e-06
1.001779e-04
1.795168e-05
64_LOW9
0.328550
0.003173
0.002825
0.246226
0.000529
0.033354
0.006297
0.057873
0.003401
0.003843
...
0.000045
0.000251
3.951767e-05
5.599991e-05
1.584678e-04
1.687941e-06
1.191488e-05
5.113467e-05
1.631345e-04
3.455314e-05
67_HOW9
0.346218
0.009057
0.005759
0.007988
0.005884
0.048541
0.011834
0.009022
0.004166
0.001934
...
0.000014
0.000618
2.765252e-05
0.000000e+00
3.152387e-05
1.050796e-06
1.648090e-05
1.083979e-05
7.742705e-07
1.825619e-04
68_HOW9
0.300044
0.204176
0.028888
0.010294
0.030109
0.047593
0.011053
0.017736
0.015059
0.001393
...
0.000098
0.000446
3.034101e-05
2.670463e-06
1.840347e-04
0.000000e+00
1.210231e-05
7.784116e-06
5.170471e-06
1.174436e-04
69_HOW9
0.564852
0.038001
0.019171
0.003570
0.002371
0.099160
0.015089
0.005411
0.004932
0.002013
...
0.000236
0.000011
1.093312e-05
7.778504e-07
8.249536e-05
0.000000e+00
4.839958e-06
4.221999e-05
0.000000e+00
2.769580e-04
70_HOW9
0.165028
0.371372
0.001719
0.003893
0.007440
0.036695
0.008325
0.077607
0.004133
0.001512
...
0.000014
0.000090
1.466975e-04
1.287156e-04
7.011494e-05
0.000000e+00
5.208399e-05
6.026651e-06
0.000000e+00
2.871234e-05
73_LOW10
0.416043
0.000760
0.005120
0.187773
0.003965
0.030843
0.010439
0.026816
0.017471
0.003266
...
0.000027
0.000107
3.544825e-05
2.489045e-04
2.095634e-04
4.518029e-05
2.099763e-05
1.578361e-04
1.640292e-04
4.346981e-05
74_LOW10
0.409655
0.000278
0.005007
0.099970
0.001549
0.054321
0.012147
0.018187
0.048268
0.003773
...
0.000062
0.000049
3.161783e-05
2.141853e-05
6.027787e-05
1.083166e-04
1.529895e-05
2.051079e-04
2.083717e-04
2.702815e-05
75_LOW10
0.568736
0.000124
0.003166
0.029720
0.003759
0.042890
0.012660
0.023468
0.072677
0.001202
...
0.000064
0.000055
1.553640e-05
0.000000e+00
2.591604e-05
5.288988e-07
1.818090e-05
5.024538e-06
3.761792e-05
3.371730e-06
76_LOW10
0.531414
0.001128
0.004893
0.092487
0.000464
0.038097
0.010041
0.014717
0.014600
0.003867
...
0.000045
0.000247
1.800965e-05
4.720712e-05
1.334352e-04
2.701448e-05
5.184597e-06
3.097114e-05
1.237481e-04
2.121592e-05
79_HOW10
0.037890
0.396289
0.000315
0.000672
0.055729
0.006543
0.000761
0.093931
0.030499
0.001704
...
0.000085
0.000089
3.943597e-05
0.000000e+00
5.484882e-05
1.474686e-06
7.780636e-04
2.673464e-05
0.000000e+00
1.018009e-05
7_HOW4
0.070583
0.001432
0.036926
0.261409
0.062637
0.046281
0.011203
0.055767
0.025060
0.003147
...
0.000022
0.000170
8.004216e-05
4.664111e-06
1.903258e-04
0.000000e+00
6.048299e-05
1.193110e-04
0.000000e+00
4.363201e-06
80_HOW10
0.546378
0.087030
0.006830
0.003109
0.001336
0.094087
0.012387
0.003521
0.003589
0.001448
...
0.000181
0.000049
1.236230e-05
4.262861e-07
7.204235e-05
0.000000e+00
5.344972e-06
1.298533e-05
3.279124e-07
2.426551e-04
81_HOW10
0.592378
0.045446
0.007616
0.005587
0.000533
0.053661
0.018573
0.026196
0.002594
0.001973
...
0.000013
0.000072
2.642319e-05
8.842149e-05
1.248883e-04
0.000000e+00
5.444210e-06
8.865615e-05
2.815971e-06
1.553477e-05
82_HOW10
0.589218
0.045236
0.006280
0.005653
0.000538
0.048754
0.020742
0.025953
0.002402
0.002216
...
0.000009
0.000069
2.415791e-05
9.043094e-05
1.302642e-04
0.000000e+00
8.581763e-06
1.458404e-05
6.944779e-07
1.875090e-05
85_LOW11
0.507074
0.113178
0.006249
0.004074
0.013902
0.080024
0.010533
0.008222
0.004840
0.002109
...
0.000225
0.000014
1.700662e-05
3.494511e-07
1.697556e-04
0.000000e+00
3.063522e-04
9.435181e-06
1.164837e-06
4.022183e-04
86_LOW11
0.527974
0.102666
0.005124
0.003424
0.009308
0.089673
0.014454
0.006971
0.006078
0.001528
...
0.000196
0.000016
7.780055e-06
1.479902e-06
5.727220e-04
0.000000e+00
1.202526e-04
4.676489e-05
0.000000e+00
3.085807e-04
87_LOW11
0.604069
0.025104
0.007358
0.006104
0.000168
0.044198
0.025904
0.045354
0.003199
0.002263
...
0.000019
0.000030
5.674256e-06
8.874833e-05
1.409292e-06
0.000000e+00
8.047801e-06
8.322242e-05
0.000000e+00
6.449367e-05
88_LOW11
0.387438
0.068999
0.041732
0.040359
0.008034
0.058913
0.018548
0.029895
0.015316
0.001571
...
0.000068
0.000075
1.053374e-05
0.000000e+00
1.242489e-04
0.000000e+00
2.827478e-05
1.570821e-05
0.000000e+00
2.882919e-04
8_HOW4
0.084548
0.001438
0.224498
0.196150
0.023124
0.060101
0.025193
0.021193
0.024959
0.004678
...
0.000079
0.000340
7.317771e-05
7.567240e-05
3.450994e-04
0.000000e+00
1.496817e-05
5.621379e-05
0.000000e+00
2.544589e-05
91_HOW11
0.389379
0.016662
0.003789
0.030921
0.078917
0.032652
0.007996
0.047431
0.092666
0.003133
...
0.000121
0.000072
3.917161e-05
1.491565e-04
4.902459e-05
4.117425e-05
5.839694e-05
1.834417e-05
2.155640e-04
1.610121e-05
92_HOW11
0.233440
0.000607
0.003180
0.180772
0.033624
0.045716
0.007741
0.037343
0.062884
0.060534
...
0.000233
0.000059
2.100244e-05
2.268660e-05
5.082194e-05
3.269247e-06
1.248258e-05
2.922509e-04
2.286492e-04
3.774495e-05
93_HOW11
0.485999
0.071349
0.001436
0.018022
0.000622
0.026948
0.004140
0.045792
0.058415
0.027832
...
0.000149
0.000023
1.628259e-05
0.000000e+00
8.641596e-07
0.000000e+00
5.198602e-05
4.684655e-06
1.819283e-06
1.046088e-06
94_HOW11
0.434607
0.028545
0.003096
0.028454
0.002074
0.034105
0.008123
0.020138
0.039256
0.091009
...
0.000117
0.000020
3.348089e-06
1.156612e-05
1.443591e-05
4.696020e-06
8.044109e-06
3.261125e-06
1.095738e-05
5.304764e-06
97_LOW12
0.574470
0.015834
0.003236
0.030945
0.007054
0.107628
0.011502
0.006552
0.005406
0.001333
...
0.000217
0.000004
5.552006e-06
6.212035e-07
4.621366e-04
1.164757e-06
2.787651e-05
7.765044e-06
0.000000e+00
2.378045e-04
98_LOW12
0.599362
0.037089
0.007644
0.003611
0.001496
0.097214
0.014469
0.005485
0.005372
0.001795
...
0.000199
0.000005
2.711372e-06
2.357715e-07
1.006744e-04
0.000000e+00
5.658516e-06
1.414629e-06
0.000000e+00
2.636318e-04
99_LOW12
0.551515
0.022149
0.005952
0.019807
0.000631
0.045992
0.024177
0.085400
0.006382
0.001674
...
0.000029
0.000035
7.313862e-06
8.171913e-05
1.258066e-04
6.289513e-04
6.741828e-06
2.737591e-06
3.677361e-07
1.135896e-05
9_HOW4
0.140160
0.000780
0.027935
0.253807
0.036372
0.046760
0.018612
0.034299
0.025357
0.008610
...
0.000135
0.000144
2.513789e-04
1.508273e-05
1.394010e-04
3.961122e-06
2.190805e-04
7.495661e-05
0.000000e+00
2.254792e-05
88 rows × 223 columns
In [29]:
plot_pca_results(main_dir=MAIN_DIR)
loading ..//results/reduced_data--all_taxonomy_remains.csv
number of rows to keep: 223
(223, 89)
principal components' contribution to variance:
[ 0.25107297 0.11279974 0.02895022 0.01037814 0.00521771 0.00357079
0.00308673 0.00267789 0.00149589]
(first 10)
preparing colors for 7 pre-oxygen-switch samples and 4 post-switch samples
[[0.8423298817793848, 0.8737404427964184, 0.7524954030731037], [0.7294791289837665, 0.828271237142885, 0.6624387159929747], [0.6081786671884791, 0.7816177694090097, 0.6017787295654944], [0.4926406966923403, 0.7267784825949665, 0.5693088339489007], [0.39493837953687966, 0.6591653753197486, 0.5567106633592679], [0.3256286372570367, 0.5824294714811111, 0.551260440725878], [0.2775060539006816, 0.4896743690748741, 0.5382978119096076]]
11
{'col': 'rep', 'ylim': (-0.45, 0.5), 'row': 'oxy', 'xlim': (-0.45, 0.5)}
../elviz_pca.py:86: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
data.sort(columns='variance', ascending=False, inplace=True)
<matplotlib.figure.Figure at 0x113d299e8>
In [30]:
from elviz_pca import build_color_palette
In [31]:
build_color_palette(num_items=14-4+1, weeks_before_switch=7)
preparing colors for 7 pre-oxygen-switch samples and 4 post-switch samples
[[0.8423298817793848, 0.8737404427964184, 0.7524954030731037], [0.7294791289837665, 0.828271237142885, 0.6624387159929747], [0.6081786671884791, 0.7816177694090097, 0.6017787295654944], [0.4926406966923403, 0.7267784825949665, 0.5693088339489007], [0.39493837953687966, 0.6591653753197486, 0.5567106633592679], [0.3256286372570367, 0.5824294714811111, 0.551260440725878], [0.2775060539006816, 0.4896743690748741, 0.5382978119096076]]
11
Out[31]:
[[0.8423298817793848, 0.8737404427964184, 0.7524954030731037],
[0.7294791289837665, 0.828271237142885, 0.6624387159929747],
[0.6081786671884791, 0.7816177694090097, 0.6017787295654944],
[0.4926406966923403, 0.7267784825949665, 0.5693088339489007],
[0.39493837953687966, 0.6591653753197486, 0.5567106633592679],
[0.3256286372570367, 0.5824294714811111, 0.551260440725878],
[0.2775060539006816, 0.4896743690748741, 0.5382978119096076],
(0.48385432959999997, 0.62204984959999998, 0.97480820260000001),
(0.75361061799999995, 0.83023285099999999, 0.96087115700000003),
(0.94734540359999997, 0.79469550479999995, 0.71699050580000001),
(0.93183129659999997, 0.51908552320000001, 0.40647960859999999)]
In [32]:
plot_pca_results(main_dir=MAIN_DIR, top_percent=20, genus_only=False)
loading ..//results/reduced_data--all_taxonomy_remains.csv
number of rows to keep: 223
(223, 89)
principal components' contribution to variance:
[ 0.25107297 0.11279974 0.02895022 0.01037814 0.00521771 0.00357079
0.00308673 0.00267789 0.00149589]
(first 10)
preparing colors for 7 pre-oxygen-switch samples and 4 post-switch samples
[[0.8423298817793848, 0.8737404427964184, 0.7524954030731037], [0.7294791289837665, 0.828271237142885, 0.6624387159929747], [0.6081786671884791, 0.7816177694090097, 0.6017787295654944], [0.4926406966923403, 0.7267784825949665, 0.5693088339489007], [0.39493837953687966, 0.6591653753197486, 0.5567106633592679], [0.3256286372570367, 0.5824294714811111, 0.551260440725878], [0.2775060539006816, 0.4896743690748741, 0.5382978119096076]]
11
{'col': 'rep', 'ylim': (-0.45, 0.5), 'row': 'oxy', 'xlim': (-0.45, 0.5)}
../elviz_pca.py:86: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
data.sort(columns='variance', ascending=False, inplace=True)
<matplotlib.figure.Figure at 0x113d29978>
In [33]:
from elviz_utils import read_sample_info
In [34]:
read_sample_info(main_dir=MAIN_DIR).head()
Out[34]:
ID
oxy
rep
week
project
0
1_LOW4
Low
1
4
1056013
1
13_LOW5
Low
1
5
1056037
2
25_LOW6
Low
1
6
1056061
3
37_LOW7
Low
1
7
1056085
4
49_LOW8
Low
1
8
1056109
In [35]:
type(read_sample_info(main_dir=MAIN_DIR).rep[0])
Out[35]:
numpy.int64
In [36]:
import seaborn as sns
In [37]:
plot_pca_results(main_dir=MAIN_DIR, top_percent=20, genus_only=False, facet_row=False)
loading ..//results/reduced_data--all_taxonomy_remains.csv
number of rows to keep: 223
(223, 89)
principal components' contribution to variance:
[ 0.25107297 0.11279974 0.02895022 0.01037814 0.00521771 0.00357079
0.00308673 0.00267789 0.00149589]
(first 10)
preparing colors for 7 pre-oxygen-switch samples and 4 post-switch samples
[[0.8423298817793848, 0.8737404427964184, 0.7524954030731037], [0.7294791289837665, 0.828271237142885, 0.6624387159929747], [0.6081786671884791, 0.7816177694090097, 0.6017787295654944], [0.4926406966923403, 0.7267784825949665, 0.5693088339489007], [0.39493837953687966, 0.6591653753197486, 0.5567106633592679], [0.3256286372570367, 0.5824294714811111, 0.551260440725878], [0.2775060539006816, 0.4896743690748741, 0.5382978119096076]]
11
{'ylim': (-0.45, 0.5), 'xlim': (-0.45, 0.5)}
../elviz_pca.py:86: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
data.sort(columns='variance', ascending=False, inplace=True)
<matplotlib.figure.Figure at 0x113d298d0>
In [ ]:
In [ ]:
Content source: JanetMatsen/elvizAnalysis
Similar notebooks: