In [1]:
import calour as ca
ca.set_log_level(11)
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
/Users/amnon/miniconda3/envs/calour/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
we use the chronic fatigue syndrome data from:
Giloteaux, L., Goodrich, J.K., Walters, W.A., Levine, S.M., Ley, R.E. and Hanson, M.R., 2016.
Reduced diversity and altered composition of the gut microbiome in individuals with myalgic encephalomyelitis/chronic fatigue syndrome.
Microbiome, 4(1), p.30.
In [2]:
cfs=ca.read_amplicon('data/chronic-fatigue-syndrome.biom',
'data/chronic-fatigue-syndrome.sample.txt',
normalize=10000,min_reads=1000)
2018-03-04 12:36:35 INFO loaded 87 samples, 2129 features
2018-03-04 12:36:35 WARNING These have metadata but do not have data - dropped: {'ERR1331814'}
2018-03-04 12:36:35 INFO After filtering, 87 remaining
Experiment
classCalour stores the experiment as two Pandas.DataFrame (for sample_metadata and feature_metadata) and a (sparse or dense) data matrix.
The order in the dataframes and the table is synchronized, so entry number X in the sample_metadata dataframe always corresponds to row X in the data matrix (and similarily entry Y in the feature_metadata always corresponds to column Y in the data matrix).
In [3]:
print(cfs)
AmpliconExperiment ("chronic-fatigue-syndrome.biom") with 87 samples, 2129 features
In [4]:
cfs.sample_metadata
Out[4]:
BioSample_s
Experiment_s
MBases_l
MBytes_l
Run_s
SRA_Sample_s
Sample_Name_s
Assay_Type_s
AssemblyName_s
BioProject_s
...
Description
Subject
Emotional_well_being
Role_physical
Bell
Physical_functioning
Pain
Age
BMI
_calour_original_abundance
#SampleID
ERR1331798
SAMEA3904128
ERX1403418
43
29
ERR1331798
ERS1091262
LR16
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
72.0
0.0
20.0
75.0
23.0
50
37.59
62629.0
ERR1331812
SAMEA3904142
ERX1403432
77
54
ERR1331812
ERS1091276
LR72
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
56.0
NaN
30.0
60.0
68.0
64
22.85
96404.0
ERR1331836
SAMEA3904166
ERX1403456
83
56
ERR1331836
ERS1091300
LR42
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
35
30.66
105470.0
ERR1331831
SAMEA3904161
ERX1403451
38
26
ERR1331831
ERS1091295
IC10
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
45
22.24
50560.0
ERR1331815
SAMEA3904145
ERX1403435
49
33
ERR1331815
ERS1091279
LR75
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
NaN
NaN
NaN
NaN
NaN
41
32.30
66414.0
ERR1331870
SAMEA3904200
ERX1403490
61
42
ERR1331870
ERS1091334
LR31
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
72.0
0.0
NaN
0.0
20.0
50
21.96
97011.0
ERR1331791
SAMEA3904121
ERX1403411
52
35
ERR1331791
ERS1091255
LR08
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
50.0
0.0
10.0
45.0
10.0
45
25.23
77673.0
ERR1331854
SAMEA3904184
ERX1403474
46
31
ERR1331854
ERS1091318
LR51
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
46
28.34
59655.0
ERR1331853
SAMEA3904183
ERX1403473
73
48
ERR1331853
ERS1091317
IC21
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
56.0
0.0
20.0
40.0
33.0
34
25.70
100206.0
ERR1331838
SAMEA3904168
ERX1403458
24
16
ERR1331838
ERS1091302
LR40
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
8.0
0.0
50.0
50.0
23.0
27
26.47
34044.0
ERR1331796
SAMEA3904126
ERX1403416
55
38
ERR1331796
ERS1091260
LR15
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
68.0
0.0
50.0
35.0
58.0
43
23.49
74744.0
ERR1331820
SAMEA3904150
ERX1403440
66
44
ERR1331820
ERS1091284
IC06
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
47
24.36
106959.0
ERR1331804
SAMEA3904134
ERX1403424
53
35
ERR1331804
ERS1091268
LR69
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
48.0
0.0
20.0
25.0
33.0
50
17.70
75014.0
ERR1331868
SAMEA3904198
ERX1403488
41
27
ERR1331868
ERS1091332
LR33
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
88.0
0.0
20.0
5.0
10.0
64
16.30
64515.0
ERR1331789
SAMEA3904119
ERX1403409
36
24
ERR1331789
ERS1091253
LR04
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
64.0
25.0
50.0
75.0
45.0
53
24.03
52549.0
ERR1331803
SAMEA3904133
ERX1403423
35
23
ERR1331803
ERS1091267
LR80
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
12.0
0.0
30.0
35.0
35.0
52
23.80
54079.0
ERR1331827
SAMEA3904157
ERX1403447
28
19
ERR1331827
ERS1091291
IC15
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
48
25.92
41310.0
ERR1331842
SAMEA3904172
ERX1403462
44
29
ERR1331842
ERS1091306
LR24
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
54
27.19
66931.0
ERR1331829
SAMEA3904159
ERX1403449
46
31
ERR1331829
ERS1091293
IC12
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
24
21.52
69574.0
ERR1331787
SAMEA3904117
ERX1403407
52
36
ERR1331787
ERS1091251
LR01
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
32.0
0.0
40.0
25.0
0.0
50
24.89
70387.0
ERR1331866
SAMEA3904196
ERX1403486
67
45
ERR1331866
ERS1091330
LR35
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
56.0
0.0
20.0
15.0
0.0
19
27.44
93480.0
ERR1331861
SAMEA3904191
ERX1403481
63
43
ERR1331861
ERS1091325
LR56
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
68.0
0.0
20.0
40.0
33.0
60
23.57
84020.0
ERR1331845
SAMEA3904175
ERX1403465
25
17
ERR1331845
ERS1091309
LR27
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
64.0
75.0
90.0
95.0
90.0
48
21.45
40701.0
ERR1331797
SAMEA3904127
ERX1403417
52
36
ERR1331797
ERS1091261
LR17
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
52.0
0.0
40.0
40.0
23.0
67
32.89
61849.0
ERR1331839
SAMEA3904169
ERX1403459
91
62
ERR1331839
ERS1091303
LR41
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
16.0
0.0
40.0
55.0
68.0
50
25.18
119409.0
ERR1331852
SAMEA3904182
ERX1403472
37
25
ERR1331852
ERS1091316
IC20
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
92.0
100.0
NaN
100.0
90.0
34
25.20
51815.0
ERR1331855
SAMEA3904185
ERX1403475
42
28
ERR1331855
ERS1091319
LR50
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
42
17.41
59722.0
ERR1331871
SAMEA3904201
ERX1403491
32
22
ERR1331871
ERS1091335
LR30
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
80.0
0.0
40.0
35.0
45.0
59
21.61
47760.0
ERR1331790
SAMEA3904120
ERX1403410
67
44
ERR1331790
ERS1091254
LR05
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
32.0
0.0
20.0
20.0
23.0
51
20.98
101434.0
ERR1331830
SAMEA3904160
ERX1403450
67
47
ERR1331830
ERS1091294
IC11
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
36
30.13
85726.0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
ERR1331851
SAMEA3904181
ERX1403471
73
50
ERR1331851
ERS1091315
LR29
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
48.0
0.0
40.0
35.0
45.0
62
26.62
92925.0
ERR1331856
SAMEA3904186
ERX1403476
18
12
ERR1331856
ERS1091320
LR53
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
92.0
0.0
50.0
65.0
90.0
63
30.54
22203.0
ERR1331872
SAMEA3904202
ERX1403492
45
31
ERR1331872
ERS1091336
LR38
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
50
24.89
63500.0
ERR1331793
SAMEA3904123
ERX1403413
44
29
ERR1331793
ERS1091257
LR52
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
88.0
0.0
50.0
65.0
68.0
33
22.86
64368.0
ERR1331819
SAMEA3904149
ERX1403439
55
37
ERR1331819
ERS1091283
IC05
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
33
21.77
73568.0
ERR1331817
SAMEA3904147
ERX1403437
47
32
ERR1331817
ERS1091281
IC09
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
55
46.86
69072.0
ERR1331858
SAMEA3904188
ERX1403478
63
43
ERR1331858
ERS1091322
LR55
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
48
25.06
89176.0
ERR1331833
SAMEA3904163
ERX1403453
48
32
ERR1331833
ERS1091297
LR47
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
54
28.19
64426.0
ERR1331834
SAMEA3904164
ERX1403454
43
30
ERR1331834
ERS1091298
LR44
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
49
41.40
59181.0
ERR1331810
SAMEA3904140
ERX1403430
74
51
ERR1331810
ERS1091274
LR78
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
84.0
88.0
40.0
40.0
0.0
63
24.53
100251.0
ERR1331823
SAMEA3904153
ERX1403443
54
37
ERR1331823
ERS1091287
IC19
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
43
26.15
76190.0
ERR1331848
SAMEA3904178
ERX1403468
58
39
ERR1331848
ERS1091312
LR22
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
56.0
100.0
50.0
70.0
65.0
56
22.24
84987.0
ERR1331807
SAMEA3904137
ERX1403427
50
34
ERR1331807
ERS1091271
LR65
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
88.0
0.0
20.0
25.0
10.0
54
29.95
68514.0
ERR1331800
SAMEA3904130
ERX1403420
65
44
ERR1331800
ERS1091264
LR10
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
76.0
25.0
20.0
40.0
60.0
61
25.06
84449.0
ERR1331824
SAMEA3904154
ERX1403444
59
40
ERR1331824
ERS1091288
IC18
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
48
34.15
80196.0
ERR1331841
SAMEA3904171
ERX1403461
38
27
ERR1331841
ERS1091305
LR49
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
60
30.89
48256.0
ERR1331865
SAMEA3904195
ERX1403485
36
24
ERR1331865
ERS1091329
LR36
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
20
20.80
47898.0
ERR1331809
SAMEA3904139
ERX1403429
41
28
ERR1331809
ERS1091273
LR79
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
56.0
0.0
20.0
6.0
45.0
53
28.34
55874.0
ERR1331862
SAMEA3904192
ERX1403482
74
51
ERR1331862
ERS1091326
IC3
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
52
21.79
107258.0
ERR1331846
SAMEA3904176
ERX1403466
84
57
ERR1331846
ERS1091310
LR20
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
92.0
50.0
60.0
40.0
45.0
71
20.94
110218.0
ERR1331811
SAMEA3904141
ERX1403431
43
29
ERR1331811
ERS1091275
LR73
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
36.0
25.0
50.0
50.0
33.0
39
28.66
63968.0
ERR1331835
SAMEA3904165
ERX1403455
76
52
ERR1331835
ERS1091299
LR45
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
50
23.43
94907.0
ERR1331832
SAMEA3904162
ERX1403452
37
25
ERR1331832
ERS1091296
LR46
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
72.0
0.0
30.0
56.0
23.0
65
37.20
49032.0
ERR1331859
SAMEA3904189
ERX1403479
53
36
ERR1331859
ERS1091323
LR54
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
76.0
0.0
30.0
15.0
10.0
63
25.79
77150.0
ERR1331816
SAMEA3904146
ERX1403436
59
40
ERR1331816
ERS1091280
LR74
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
64.0
25.0
30.0
50.0
NaN
51
24.39
89870.0
ERR1331818
SAMEA3904148
ERX1403438
54
38
ERR1331818
ERS1091282
IC04
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
32
32.98
71017.0
ERR1331792
SAMEA3904122
ERX1403412
32
22
ERR1331792
ERS1091256
LR09
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
24.0
100.0
90.0
80.0
65.0
43
22.14
41598.0
ERR1331857
SAMEA3904187
ERX1403477
61
42
ERR1331857
ERS1091321
IC8
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
NaN
NaN
NaN
NaN
NaN
53
29.02
89919.0
ERR1331850
SAMEA3904180
ERX1403470
54
36
ERR1331850
ERS1091314
LR28
AMPLICON
<not provided>
PRJEB13092
...
NaN
Control
60.0
75.0
NaN
80.0
90.0
50
29.65
72977.0
ERR1331795
SAMEA3904125
ERX1403415
56
38
ERR1331795
ERS1091259
LR18
AMPLICON
<not provided>
PRJEB13092
...
NaN
Patient
88.0
0.0
20.0
55.0
55.0
62
24.39
80308.0
87 rows × 59 columns
In [5]:
cfs.feature_metadata
Out[5]:
taxonomy
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCGGACGCTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGGTGTCTTGAGTACAGTAGAGGCAGGCGGAATTCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCGGGTTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGCGACCTTGAGTGCAACAGAGGTAGGCGGAATTCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGCAGGCGGTGCGGCAAGTCTGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTGGAAACTGTCGTACTTGAGTATCGGAGAGGTAAGTGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATATCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATGTCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGG
k__Bacteria;p__Proteobacteria;c__Gammaproteoba...
AACGTAGGGTGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGACCGGCAAGTTGGAAGTGAAAACTATGGGCTCAACCCATAAATTGCTTTCAAAACTGCTGGCCTTGAGTAGTGCAGAGGTAGGTGGAATTCCCGG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
AACGTAGGGTGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAAACCATGGGCTCAACCCATGAATTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGATGGAATTCCCGG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGCAGTCTTGAGTGCAGTAGAGGTGGGCGGAATTCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCTATGGGCTCAACCCATAAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGGGGCGAGCGTTGTCCGGAATGATTGGGCGTAAAGGGCGTGTAGGCGGCTTTATAAGTCTGGAGTGAAAGTCCTGTTTTCAAGATGGGAATTGCTTTGGATACTGTAGGGCTTGAGTGCAGGAGAGGTTATCGGAATTCCCGG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGCAGCAAGTCTGATGTGAAAGGCAGGGGCTTAACCCCTGGACTGCATTGGAAACTGCTGTGCTTGAGTGCCGGAGGGGTAAGCGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGGAGGATGCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCACGCCAAGTCAGCGGTGAAATTTCCGGGCTCAACCCGGAGTGTGCCGTTGAAACTGGCGAGCTAGAGTACACAAGAGGCAGGCGGAATGCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGCAGGCGGCATGATAAGTCTGATGTGAAAACCCAAGGCTCAACCATGGGACTGCATTGGAAACTGTCGTGCTGGAGTGTCGGAGAGGTGAGCGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGCAGGCGGCATGATAAGTCTGATGTGAAAACCCAAGGCTCAACCATGGGACTGCATTGGAAACTGTCGTGCTGGAGTGTCGGAGAGGTAAGCGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGGCGGAGCTGCAAGTCAGATGTGAAATCTCTGGGCTCAACCCAGAAACTGCATTTGAAACTGTAGCCCTTGAGTATCGGAGAGGCAAGCGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGTCCCGAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTGATAAGTCTGAAGTTAAAGGCTGTGGCTCAACCATAGTTCGCTTTGGAAACTGTCAAACTTGAGTGCAGAAGGGGAGAGTGGAATTCCATGT
k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactob...
TACGTAGGGAGCGAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGTAGGCGGGATAGCAAGTCAGATGTGAAAACTATGGGCTCAACCTGTAGATTGCATTTGAAACTGTTGTTCTTGAGTGAAGTAGAGGTAAGCGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGACTGGCAAGTCTGATGTGAAAGGCGGGGGCTCAACCCCTGGACTGCATTGGAAACTGTTAGTCTTGAGTGCCGGAGAGGTAAGCGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGGAAGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCGGGCTTTTAAGTCAGCGGTCAAATGTCACGGCTCAACCGTGGCCAGCCGTTGAAACTGTAAGCCTTGAGTCTGCACAGGGCACATGGAATTCGTGGT
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTGTGGCAAGTCTGATGTGAAAGGCATGGGCTCAACCTGTGGACTGCATTGGAAACTGTCATACTTGAGTGCCGGAGGGGTAAGCGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCACGCCAAGTCAGCGGTGAAATTTCCGGGCTCAACCCGGAGTGTGCCGTTGAAACTGGCGAGCTAGAGTACACAAGAGGCAGGCGGAATGCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTAGGTGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGCGTGTAGGCGGGAGTGCAAGTCAGATGTGAAAACTATGGGCTCAACCCATAGCCTGCATTTGAAACTGTACTTCTTGAGTGATGGAGAGGCAGGCGGAATTCCCTG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGTGGTGATTTAAGTCAGCGGTGAAAGTTTGTGGCTCAACCATAAAATTGCCGTTGAAACTGGGTTACTTGAGTGTGTTTGAGGTAGGCGGAATGCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGGAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGCAGCCGGGCATGCAAGTCAGATGTGAAATCTCAGGGCTTAACCCTGAAACTGCATTTGAAACTGTATGTCTTGAGTGCCGGAGAGGTAATCGGAATTCCTTG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGGAGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGCGCGCAGGCGGGCCGGTAAGTTGGAAGTGAAATCTATGGGCTTAACCCATAAACTGCTTTCAAAACTGCTGGTCTTGAGTGATGGAGAGGCAGGCGGAATTCCGTG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCACGGCAAGCCAGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTTGGAACTGCTGAGCTAGAGTGTCGGAGAGGCAAGTGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTATGGTGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGATGGGCAAGTCTGATGTGAAAACCCGGGGCTCAACCCCGGGACTGCATTGGAAACTGTTCATCTAGAGTGCTGGAGAGGTAAGTGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
...
...
TACGGAAGGTCCAGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGCAGGCGGACCTTTAAGTCAGCTGTGAAATACGGCGGCTCAACCGTCGAACTGCAGTTGATACTGGAGGTCTTGAGTGCACACAGGGATACTGGAATTCATGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGCATGTAGGCGGGCTTTTAAGTCCGACGTGAAAATGCGGGGCTTAACCCCGTATGGCGTTGGATACTGGAAGTCTTGAGTGCAGGAGAGGAAAGGGGAATTCCCAGT
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGTGGCGAGCGTTATCCGGAATCATTGGGCGTAAAGAGGGAGCAGGCGGCCGCAAGGGTCTGTGGTGAAAGACCGAAGCTAAACTTCGGTGAGCCATGGAAACCGGGCGGCTAGAGTGCGGAAGAGGATCGTGGAATTCCATGT
k__Bacteria;p__Firmicutes;c__Erysipelotrichi;o...
TACGTAGGTGGCGAGCGTTATCCGGAATGATTGGGCGTAAAGGGTACGTAGGCGGCATGATAAGTCTGGAGTGAAAGGCTACAGCTCAACTGTAGTATGCTCTGGAAACTGTCAAGCTAGAGTGCAGAAGAGGGCAATGGAACTCCATGT
k__Bacteria;p__Firmicutes;c__Erysipelotrichi;o...
TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGCGGGTTAAGCGTGTTGTGAAATGTAGGGGCTCAACCTCTGCACTGCAGCGCGAACTGGCTTGCTTGAGTACGCACAACGTGGGCGGAATTCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGGAAGGTCCAGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGCAGGCGGACCTTTAAGTCAGCTGTGAAATACGGCGGCTCAACCGTCGAACTGCAGTTGATACTGGAGGTCTTGAGTGCACACAGGGGTACTGGAATTCATGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTATGGAGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGTGTAGGTGGCCATGCAAGTCAGAAGTGAAAATCCGGGGCTCAACCTCGGAACTGCTTTTGAAACTGTAAGGCTGGAGTGCAGGAGGGGTGAGTGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGTGAGGTAAGCGTGTTGTGAAATGTAGGCGCCCAACGTCTGCACTGCAGCGCGAACTGCCCCACTTGAGTGTGCGCAACGCCGGCGGAACTCGTCG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGTCTGTTAAGCGTGTTGTGAAATGTCGGGGCTCAACCTGGGCATTGCAGCGCGAACTGGCAGACTTGAGTGCACGGGAAGTAGGCGGAATTCGTCG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTAGGTGGCGAGCGTTATCCGGATTTACTGGGTGTAAAGGGCGCGTAGGCGGGAATGCAAGTCAGATGTGAAATCCAAGGGCTCAACCCTTGAACTGCATTTGAAACTGCATTTCTTGAGTGTCGGAGAGGTTGACGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGCCGGGTCGGCAAGTCAGATGTGAAATCTATGGGCTCAACTCATAAACTGCATTTGAAACTGTTGATCTTGAGTATCGGAGAGGCAATCGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGACTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATCGCAGTTGATACTGGCAGTCTTGAGTACAGCAGAGGTGGGCGGAATTCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTATGGGGCGAGCGTTATCCGGATTCATTGGGCGTAAAGCGCGCGTAGGCGGCCTGGCAGGCCGGGAGTCAAATCCGGGGGCTCAACCCCCGCCCGCTCCCGGAACCTTTAGGCTTGAGTCTGGCAGGGGAGGGTGGAATACCCGGT
k__Bacteria;p__Actinobacteria;c__Coriobacterii...
TACGTAGGGAGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGCGCGCAGGTGGGCCGGTAAGTTGGAAGTGAAATCTATGGGCTTAACCCATAAACTGCTTTCAAAACTGCTGGTCTTGAGTGATGGAGAGGCAGGCGGAATTCCGTG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGCCGTGCAAGTCCATCTTAAAAGCGTGGGGCTTAACCCCATGAGGGGATGGAAACTGCATGGCTGGAGTGTCGGAGGGGAAAGTGGAATTCCTAGT
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTTAACCGTAAAATTGCAGTTGATACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTAGGTGACAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGCGTAGGCGGACTATCAAGTCAGTCGTGAAATACCGGGGCTTAACCCCGGGGCTGCGATTGAAACTGACAGCCTTGAGTATCGGAGAGGAAAGCGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGGAGGGTGTAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGG
k__Bacteria;p__Proteobacteria;c__Gammaproteoba...
TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAATCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTAGGGGGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGAGAAGCAAGTCAGTGGTGAAAGGTATGGGCTTAACCCATAGACTGCCATTGAAACTGTTTTCCTTGAGTGAAGTAGAGGCAGGCGGAATTCCGAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTATGGAGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGGCGGTGCTGCAAGTCTGATGTGAAAACCCGGGGCTCAACCCCGGGACTGCATTGGAAACTGTAGGACTAGAGTGTCGGAGGGGTAAGTGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGAGCAGCAAGTCTGATGTGAAAGGCGGGGGCTCAACCCCCCGGACTGCATTGGAAACTGTTGATCTTGAGTACCGGAGAGGTAAGCGGAATTCCTA
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTATGGAGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGTGTAGGTGGTCATGCAAGTCAGAAGTGAAAATCTGGGGCTCAACCCCGGAACTGCTTTTGAAACTGTAAGGCTGGAGTGCAGGAGGGGTGAGTGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTATGGAGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGTGTAGGTGGCCAGGCAAGTCAGAAGTGAAAGCCCAGGGCTCAACCCCGGGACTGCTTTTGAAACTGCAGGGCTAGAGTGCAGGAGGGGCAAGTGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
CACGGGGGATGCGAGCGTTATCCGGATTCATTGGGTTTAAAGGGAGCGTAGGCGGCCCGACAAGTCAGCGGTAAAAGACTGCAGCTAAACTGTAGCGCGCCGTTGAAACTGCCGGGCTCGAGTGCAGACGAGGTTGGCGGAACAGGTGAA
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTAGGGAGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGTGCGTAGGCGGATTGGCAAGTCAGAAGTGAAATCCATGGGCTTAACCCATGAACTGCTTTTGAAACTGTTAGTCTTGAGTGAAGCAGAGGTAGGCGGAATTCCCGG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCATGTAGGCGGTTCCCTAAGTCGGTCGTGAAAATGCGGTGCTTAACGCCGTATGGCGATCGAAACTGGGGGACTTGAGTGCAGGAGAGGAAAGGGGAACTCCCAGT
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTAATTAAGTTAGTGGTCAAATCCGGAGGCTTCACTTCCGATCGCCATTAAAACTGATTAGCTAGAGAATGGACGAGGTAGGCGGAATAAGTTAA
k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o_...
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCTGTGTAAGTCTGAAGTGAAAGCCCGGGGCTCAACCCCGGGACTGCTTTGGAAACTGTATAGCTAGAGTGCTGGAGAGGTAAGTGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
TACGTAGGGGGCAAGCGTTATCCGGAATTACTGGGTGTAAAGGGTGCGTAGGTGGTATGGCAAGTCAGAAGTGAAAACCCAGGGCTTAACTCTGGGACTGCTTTTGAAACTGTCAGACTAGAGTGTAGGAGAGGTAAGCGGAATTCCTAG
k__Bacteria;p__Firmicutes;c__Clostridia;o__Clo...
2129 rows × 1 columns
In [6]:
cfs.data
Out[6]:
<87x2129 sparse matrix of type '<class 'numpy.float64'>'
with 21995 stored elements in Compressed Sparse Row format>
When loading the data, it is by default loaded as a scipy.Sparse.CSR matrix (which is more memory efficient for sparse data).
We can force Calour to load the data as a dense numpy 2D array using the sparse=False
parameter in the read_amplicon()
function.
We can also convert between sparse and dense using the sparse
attribute of the experiment
In [7]:
cfs.sparse=False
cfs.data
Out[7]:
array([[3.17744176e+03, 9.53232528e+02, 7.34643695e+02, ...,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[2.59231982e+03, 3.83801502e+00, 3.63055475e+00, ...,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[1.12373187e+03, 4.78524699e+02, 0.00000000e+00, ...,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
...,
[3.01104327e+03, 0.00000000e+00, 2.22422402e-01, ...,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[3.40518245e+02, 1.77466873e+03, 0.00000000e+00, ...,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[5.02440604e+02, 0.00000000e+00, 0.00000000e+00, ...,
3.39941226e+01, 5.10534442e+00, 1.36972655e+00]])
In [8]:
cfs.sparse=True
cfs.data
Out[8]:
<87x2129 sparse matrix of type '<class 'numpy.float64'>'
with 21995 stored elements in Compressed Sparse Row format>
In [9]:
dat = cfs.get_data(sparse=None)
dat
Out[9]:
<87x2129 sparse matrix of type '<class 'numpy.float64'>'
with 21995 stored elements in Compressed Sparse Row format>
In [10]:
dat = cfs.get_data(sparse=True)
dat
Out[10]:
<87x2129 sparse matrix of type '<class 'numpy.float64'>'
with 21995 stored elements in Compressed Sparse Row format>
In [11]:
dat = cfs.get_data(sparse=False)
dat
Out[11]:
array([[3.17744176e+03, 9.53232528e+02, 7.34643695e+02, ...,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[2.59231982e+03, 3.83801502e+00, 3.63055475e+00, ...,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[1.12373187e+03, 4.78524699e+02, 0.00000000e+00, ...,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
...,
[3.01104327e+03, 0.00000000e+00, 2.22422402e-01, ...,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[3.40518245e+02, 1.77466873e+03, 0.00000000e+00, ...,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[5.02440604e+02, 0.00000000e+00, 0.00000000e+00, ...,
3.39941226e+01, 5.10534442e+00, 1.36972655e+00]])
In [12]:
dat = cfs.get_data(sparse=None, copy=False)
dat is cfs.data
Out[12]:
True
In [13]:
dat = cfs.get_data(sparse=None, copy=True)
dat is cfs.data
Out[13]:
False
In [14]:
cfs['ERR1331815','TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCGGACGCTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGGTGTCTTGAGTACAGTAGAGGCAGGCGGAATTCGTGG']
Out[14]:
1407.0828439786792
In [15]:
cfs.save('cfs')
In [16]:
!ls cfs*
cfs.biom cfs_feature.txt cfs_sample.txt
In [ ]:
Content source: RNAer/Calour
Similar notebooks: