In [1]:
library(vegan)
Loading required package: permute
Loading required package: lattice
This is vegan 2.4-3
In [2]:
mf = read.csv('../data/mapping_PDalpha.txt', stringsAsFactors=FALSE, sep='\t')
colnames(mf)[1] = 'SampleName'
In [3]:
head(mf)
SampleName BarcodeSequence LinkerPrimerSequence Experiment_Design_Description Library_Construction_Protocol Linker Platform Center_Name Center_Project Instrument_Model ⋯ OHV1D2CT OHVD2CT OHVDTOT OHV1DTOT OHSEAS VDstatus Description ratio_activation ratio_catabolism alpha_pd
BI0023 TCTGGTGACATT GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq ⋯ 1: Yes 1: Yes 25.8 0.0393 3:SUMMER sufficiency Orwoll.BI0023.BI 0.001523256 0.06860465 22.49523
BI0056 CAAGCATGCCTA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq ⋯ 1: Yes 1: Yes 39.2 0.0619 2:SPRING sufficiency Orwoll.BI0056.BI 0.001579082 0.09974490 14.80982
BI0131 CTATTTGCGACA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq ⋯ 1: Yes 1: Yes 23.1 0.0521 2:SPRING sufficiency Orwoll.BI0131.BI 0.002255411 0.06450216 17.61783
BI0153 ATCGGCGTTACA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq ⋯ 1: Yes 1: Yes 27.3 0.0431 2:SPRING sufficiency Orwoll.BI0153.BI 0.001578755 0.07838828 14.79724
BI0215 CCTCTCGTGATC GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq ⋯ 1: Yes 1: Yes 33.0 0.0502 4:FALL sufficiency Orwoll.BI0215.BI 0.001521212 0.10969697 15.39670
BI0353 TGCCATCTGAAT GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study 16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq ⋯ 1: Yes 1: Yes 19.5 0.0455 2:SPRING deficiency Orwoll.BI0353.BI 0.002333333 0.09179487 10.69057
In [4]:
dim(mf)
- 599
- 68
In [5]:
# convert categorial to factors
vars_cat = c('Sex','GIERACE', 'SITE', 'TUDRAMT', 'TURSMOKE', 'M1ADEPR', 'M1VITMND', 'M1ANTIB', 'M1PROBI',
'OHSEAS', 'VDstatus', 'OHV1D2CT', 'OHVD2CT')
mf[vars_cat] = lapply(mf[vars_cat], factor)
# convert continuous to numeric
vars_cts = c('Latitude', 'Longitude', 'Age', 'Height', 'Weight', 'BMI', 'PASCORE', 'DTVITD',
'OHV1D3', 'OHV24D3', 'OHVD3', 'OHVD2', 'OHV1D2', 'OHVDTOT', 'OHV1DTOT', 'alpha_pd')
In [6]:
length(vars_cat)
length(vars_cts)
13
16
In [7]:
dat = mf[c(vars_cat, vars_cts)]
dim(dat)
head(dat)
- 599
- 29
Sex GIERACE SITE TUDRAMT TURSMOKE M1ADEPR M1VITMND M1ANTIB M1PROBI OHSEAS ⋯ PASCORE DTVITD OHV1D3 OHV24D3 OHVD3 OHVD2 OHV1D2 OHVDTOT OHV1DTOT alpha_pd
male 1:WHITE Birmingham 1: Less than one drink per week M:Not Applicable 0: No 0: No 0: No 0: No 3:SUMMER ⋯ 91.00000 250.90 0.0393 1.77 25.8 0 0 25.8 0.0393 22.49523
male 1:WHITE Birmingham 0:None drinker 1:PAST 0: No 1: Yes 1: Yes 0: No 2:SPRING ⋯ 199.17857 72.97 0.0619 3.91 39.2 0 0 39.2 0.0619 14.80982
male 1:WHITE Birmingham 0:None drinker 1:PAST 0: No 1: Yes 0: No 0: No 2:SPRING ⋯ 161.71429 312.15 0.0521 1.49 23.1 0 0 23.1 0.0521 17.61783
male 1:WHITE Birmingham 4: 6-13 drinks per week 1:PAST 0: No 1: Yes 0: No 0: No 2:SPRING ⋯ 88.21429 323.52 0.0431 2.14 27.3 0 0 27.3 0.0431 14.79724
male 1:WHITE Birmingham 3: 3-5 drinks per week 1:PAST 0: No 0: No 0: No 0: No 4:FALL ⋯ 256.82143 31.95 0.0502 3.62 33.0 0 0 33.0 0.0502 15.39670
male 1:WHITE Birmingham 0:None drinker 1:PAST 0: No 0: No 1: Yes 0: No 2:SPRING ⋯ 179.57143 156.06 0.0455 1.79 19.5 0 0 19.5 0.0455 10.69057
In [8]:
summary(dat)
Sex GIERACE SITE
male:599 1:WHITE :520 Birmingham : 75
2:AFRICAN AMERICAN: 24 Minneapolis: 91
3:ASIAN : 34 Palo Alto : 86
4:HISPANIC : 12 Pittsburgh : 92
5:OTHER : 9 Portland :121
San Diego :134
TUDRAMT TURSMOKE M1ADEPR
. : 2 0:NO :230 0: No :546
0:None drinker :230 1:PAST :289 1: Yes: 53
1: Less than one drink per week: 78 2:CURRENT : 9
2: 1-2drinks per week : 66 M:Not Applicable: 71
3: 3-5 drinks per week : 90
4: 6-13 drinks per week :106
5: 14 or more drinks per week : 27
M1VITMND M1ANTIB M1PROBI OHSEAS
0: No :154 0: No :558 0: No :573 1:WINTER : 6
1: Yes:445 1: Yes: 41 1: Yes: 26 2:SPRING :211
3:SUMMER :220
4:FALL :130
Missing:Not collected : 32
VDstatus OHV1D2CT
: 11 0: No : 4
deficiency : 40 1: Yes :563
Missing:Not collected : 32 Missing:Not collected : 32
sufficiency :516
OHVD2CT Latitude Longitude Age
0: No :119 Min. :32.72 Min. :-122.7 Min. :78.00
1: Yes :448 1st Qu.:33.52 1st Qu.:-122.1 1st Qu.:81.00
Missing:Not collected : 32 Median :40.44 Median :-117.2 Median :83.00
Mean :39.13 Mean :-105.9 Mean :84.24
3rd Qu.:44.98 3rd Qu.: -86.8 3rd Qu.:87.00
Max. :45.52 Max. : -80.0 Max. :98.00
Height Weight BMI PASCORE
Min. :153.8 Min. : 51.40 Min. :17.60 Min. : 0.00
1st Qu.:167.3 1st Qu.: 71.00 1st Qu.:24.51 1st Qu.: 70.14
Median :172.1 Median : 79.00 Median :26.72 Median :116.75
Mean :172.1 Mean : 80.17 Mean :27.01 Mean :122.48
3rd Qu.:176.7 3rd Qu.: 87.50 3rd Qu.:28.98 3rd Qu.:165.79
Max. :194.7 Max. :127.60 Max. :42.93 Max. :359.14
DTVITD OHV1D3 OHV24D3 OHVD3
Min. : 0.429 Min. :0.01070 Min. : 0.300 Min. : 7.80
1st Qu.: 77.963 1st Qu.:0.04410 1st Qu.: 2.175 1st Qu.: 27.40
Median :128.125 Median :0.05550 Median : 3.180 Median : 33.65
Mean :164.396 Mean :0.05777 Mean : 3.431 Mean : 35.23
3rd Qu.:211.643 3rd Qu.:0.06630 3rd Qu.: 4.235 3rd Qu.: 41.83
Max. :982.830 Max. :0.15600 Max. :14.070 Max. :104.00
NA's :15 NA's :32 NA's :32 NA's :43
OHVD2 OHV1D2 OHVDTOT OHV1DTOT
Min. : 0.0000 Min. :0.00000 Min. : 7.80 Min. :0.01070
1st Qu.: 0.0000 1st Qu.:0.00000 1st Qu.: 28.10 1st Qu.:0.04425
Median : 0.0000 Median :0.00000 Median : 34.15 Median :0.05550
Mean : 0.7691 Mean :0.00018 Mean : 36.01 Mean :0.05795
3rd Qu.: 0.0000 3rd Qu.:0.00000 3rd Qu.: 42.30 3rd Qu.:0.06645
Max. :59.0000 Max. :0.03780 Max. :104.00 Max. :0.15600
NA's :32 NA's :32 NA's :43 NA's :32
alpha_pd
Min. : 3.206
1st Qu.:12.900
Median :16.074
Mean :16.451
3rd Qu.:20.063
Max. :29.953
In [9]:
dat = dat[complete.cases(dat), ]
print(dim(dat))
alpha = dat$alpha_pd
dat = dat[, -which(names(dat) %in% 'alpha_pd')]
dim(dat)
[1] 545 29
- 545
- 28
In [10]:
mod0 <- rda(alpha ~ 1., dat) # Model with intercept only
mod1 <- rda(alpha ~ ., dat) # Model with all explanatory variables
In [11]:
step.res <- ordiR2step(mod0, mod1, perm.max = 1000)
Step: R2.adj= 0
Call: alpha ~ 1
R2.adjusted
<All variables> 0.1259630505
+ OHV1DTOT 0.0455455639
+ OHV1D3 0.0446292301
+ SITE 0.0311618389
+ GIERACE 0.0271660306
+ M1ANTIB 0.0211519590
+ Latitude 0.0210409870
+ TUDRAMT 0.0187397874
+ M1ADEPR 0.0098967724
+ OHV24D3 0.0061390817
+ Longitude 0.0053765637
+ OHVD2CT 0.0041786581
+ PASCORE 0.0032152201
+ BMI 0.0029544630
+ Age 0.0025306259
+ OHSEAS 0.0018635360
+ Weight 0.0006867532
+ OHVD2 0.0003109701
<none> 0.0000000000
+ Sex 0.0000000000
+ M1PROBI -0.0007188472
+ OHVDTOT -0.0012109491
+ VDstatus -0.0012901636
+ M1VITMND -0.0015829009
+ Height -0.0016186020
+ OHVD3 -0.0017627462
+ OHV1D2CT -0.0017909375
+ OHV1D2 -0.0017937839
+ DTVITD -0.0018356540
+ TURSMOKE -0.0052206388
Df AIC F Pr(>F)
+ OHV1DTOT 1 1659.7 26.959 0.002 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Step: R2.adj= 0.04554556
Call: alpha ~ OHV1DTOT
R2.adjusted
<All variables> 0.12596305
+ SITE 0.07518566
+ GIERACE 0.06971679
+ Latitude 0.06745386
+ M1ANTIB 0.06240655
+ OHVDTOT 0.05848702
+ OHVD3 0.05621401
+ TUDRAMT 0.05570484
+ M1ADEPR 0.05230161
+ Age 0.05080285
+ Longitude 0.04962648
+ OHSEAS 0.04880212
+ M1VITMND 0.04864340
+ OHVD2CT 0.04801510
+ M1PROBI 0.04661057
+ OHV24D3 0.04567053
<none> 0.04554556
+ Sex 0.04554556
+ BMI 0.04527385
+ PASCORE 0.04504960
+ OHVD2 0.04460698
+ Height 0.04437866
+ Weight 0.04411556
+ VDstatus 0.04411456
+ DTVITD 0.04384848
+ OHV1D2 0.04381955
+ OHV1D3 0.04381955
+ OHV1D2CT 0.04380537
+ TURSMOKE 0.04040278
Df AIC F Pr(>F)
+ SITE 5 1647.5 4.4806 0.002 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Step: R2.adj= 0.07518566
Call: alpha ~ OHV1DTOT + SITE
R2.adjusted
<All variables> 0.12596305
+ GIERACE 0.09741708
+ M1ANTIB 0.08656495
+ OHVDTOT 0.08454035
+ OHVD3 0.08381350
+ M1ADEPR 0.08193039
+ TUDRAMT 0.08149368
+ Age 0.07957713
+ M1VITMND 0.07898872
+ M1PROBI 0.07581920
+ PASCORE 0.07567500
+ OHV24D3 0.07533269
+ OHVD2CT 0.07519435
<none> 0.07518566
+ Sex 0.07518566
+ Latitude 0.07518566
+ Longitude 0.07518566
+ OHSEAS 0.07509863
+ BMI 0.07446073
+ DTVITD 0.07382140
+ OHV1D2 0.07376042
+ OHV1D3 0.07376042
+ Weight 0.07374421
+ Height 0.07372653
+ VDstatus 0.07361439
+ OHVD2 0.07359777
+ OHV1D2CT 0.07352021
+ TURSMOKE 0.07019275
Df AIC F Pr(>F)
+ GIERACE 4 1638.2 4.3129 0.004 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Step: R2.adj= 0.09741708
Call: alpha ~ OHV1DTOT + SITE + GIERACE
R2.adjusted
<All variables> 0.12596305
+ OHVDTOT 0.11006721
+ M1ANTIB 0.10952791
+ OHVD3 0.10906921
+ M1ADEPR 0.10592749
+ M1VITMND 0.10035140
+ Age 0.10013792
+ OHSEAS 0.09913744
+ PASCORE 0.09909125
+ M1PROBI 0.09866066
+ OHV24D3 0.09863145
+ Weight 0.09830159
+ BMI 0.09827145
+ TUDRAMT 0.09783283
<none> 0.09741708
+ Sex 0.09741708
+ Latitude 0.09741708
+ Longitude 0.09741708
+ OHVD2CT 0.09715362
+ DTVITD 0.09648618
+ Height 0.09621369
+ OHV1D2 0.09595488
+ OHV1D3 0.09595488
+ OHVD2 0.09594473
+ OHV1D2CT 0.09573675
+ VDstatus 0.09573438
+ TURSMOKE 0.09276304
Df AIC F Pr(>F)
+ OHVDTOT 1 1631.4 8.5906 0.008 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Step: R2.adj= 0.1100672
Call: alpha ~ OHV1DTOT + SITE + GIERACE + OHVDTOT
R2.adjusted
<All variables> 0.1259631
+ M1ANTIB 0.1227722
+ M1ADEPR 0.1162019
+ Age 0.1129860
+ OHSEAS 0.1120651
+ Weight 0.1118310
+ BMI 0.1117963
+ PASCORE 0.1117143
+ M1PROBI 0.1113392
+ TUDRAMT 0.1102671
<none> 0.1100672
+ Sex 0.1100672
+ Latitude 0.1100672
+ Longitude 0.1100672
+ OHVD2CT 0.1099989
+ OHV24D3 0.1098021
+ M1VITMND 0.1094061
+ OHV1D2 0.1092404
+ OHV1D3 0.1092404
+ VDstatus 0.1090778
+ Height 0.1089457
+ DTVITD 0.1088647
+ OHV1D2CT 0.1085747
+ OHVD3 0.1085589
+ OHVD2 0.1085589
+ TURSMOKE 0.1055434
Df AIC F Pr(>F)
+ M1ANTIB 1 1624.6 8.7195 0.004 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Step: R2.adj= 0.1227722
Call: alpha ~ OHV1DTOT + SITE + GIERACE + OHVDTOT + M1ANTIB
R2.adjusted
+ M1ADEPR 0.1290980
<All variables> 0.1259631
+ OHSEAS 0.1258564
+ Age 0.1252768
+ TUDRAMT 0.1247094
+ PASCORE 0.1242834
+ Weight 0.1237718
+ BMI 0.1236242
+ M1PROBI 0.1228087
<none> 0.1227722
+ Sex 0.1227722
+ Latitude 0.1227722
+ Longitude 0.1227722
+ OHVD2CT 0.1227436
+ OHV1D2 0.1224839
+ OHV1D3 0.1224839
+ OHV24D3 0.1220017
+ Height 0.1216943
+ M1VITMND 0.1216492
+ OHV1D2CT 0.1215532
+ VDstatus 0.1215267
+ DTVITD 0.1214938
+ OHVD3 0.1213308
+ OHVD2 0.1213308
+ TURSMOKE 0.1180293
In [15]:
table = step.res$anova
table
R2.adj Df AIC F Pr(>F)
+ OHV1DTOT 0.04554556 1 1659.710 26.959109 0.002
+ SITE 0.07518566 5 1647.475 4.480606 0.002
+ GIERACE 0.09741708 4 1638.147 4.312855 0.004
+ OHVDTOT 0.11006721 1 1631.433 8.590649 0.008
+ M1ANTIB 0.12277224 1 1624.572 8.719521 0.004
<All variables> 0.12596305 NA NA NA NA
In [ ]:
# Step: R2.adj= 0
# Call: alpha ~ 1
# R2.adjusted
# <All variables> 0.1259630505
# + OHV1DTOT 0.0455455639
# + SITE 0.0311618389
# + GIERACE 0.0271660306
# + M1ANTIB 0.0211519590
# + OHVDTOT -0.0012109491
In [ ]:
# explain negative adjusted R^2: https://stats.stackexchange.com/questions/183265/what-does-negative-r-squared-mean
In [16]:
table$ES.independent = c(0.0455455639, 0.0311618389, 0.0271660306, 0.0012109491, 0.0211519590, 0.1259630505)
table$ES.RDA = c(table$R2.adj[1], table$R2.adj[2]-table$R2.adj[1],
table$R2.adj[3]-table$R2.adj[2], table$R2.adj[4]-table$R2.adj[3],
table$R2.adj[5]-table$R2.adj[4], table$R2.adj[6]-table$R2.adj[5])
table = table[-6, ]
In [17]:
step.res$call
rda(formula = alpha ~ OHV1DTOT + SITE + GIERACE + OHVDTOT + M1ANTIB,
data = dat)
In [18]:
table
R2.adj Df AIC F Pr(>F) ES.independent ES.RDA
+ OHV1DTOT 0.04554556 1 1659.710 26.959109 0.002 0.045545564 0.04554556
+ SITE 0.07518566 5 1647.475 4.480606 0.002 0.031161839 0.02964010
+ GIERACE 0.09741708 4 1638.147 4.312855 0.004 0.027166031 0.02223142
+ OHVDTOT 0.11006721 1 1631.433 8.590649 0.008 0.001210949 0.01265013
+ M1ANTIB 0.12277224 1 1624.572 8.719521 0.004 0.021151959 0.01270502
In [ ]:
Content source: serenejiang/MrOS_VitaminD
Similar notebooks: