In [1]:
library(vegan)


Loading required package: permute
Loading required package: lattice
This is vegan 2.4-3

In [2]:
mf = read.csv('../data/mapping_PDalpha.txt', stringsAsFactors=FALSE, sep='\t')
colnames(mf)[1] = 'SampleName'

In [3]:
head(mf)


SampleNameBarcodeSequenceLinkerPrimerSequenceExperiment_Design_DescriptionLibrary_Construction_ProtocolLinkerPlatformCenter_NameCenter_ProjectInstrument_ModelOHV1D2CTOHVD2CTOHVDTOTOHV1DTOTOHSEASVDstatusDescriptionratio_activationratio_catabolismalpha_pd
BI0023 TCTGGTGACATT GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq 1: Yes 1: Yes 25.8 0.0393 3:SUMMER sufficiency Orwoll.BI0023.BI 0.001523256 0.06860465 22.49523
BI0056 CAAGCATGCCTA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq 1: Yes 1: Yes 39.2 0.0619 2:SPRING sufficiency Orwoll.BI0056.BI 0.001579082 0.09974490 14.80982
BI0131 CTATTTGCGACA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq 1: Yes 1: Yes 23.1 0.0521 2:SPRING sufficiency Orwoll.BI0131.BI 0.002255411 0.06450216 17.61783
BI0153 ATCGGCGTTACA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq 1: Yes 1: Yes 27.3 0.0431 2:SPRING sufficiency Orwoll.BI0153.BI 0.001578755 0.07838828 14.79724
BI0215 CCTCTCGTGATC GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq 1: Yes 1: Yes 33.0 0.0502 4:FALL sufficiency Orwoll.BI0215.BI 0.001521212 0.10969697 15.39670
BI0353 TGCCATCTGAAT GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq 1: Yes 1: Yes 19.5 0.0455 2:SPRING deficiency Orwoll.BI0353.BI 0.002333333 0.09179487 10.69057

In [4]:
dim(mf)


  1. 599
  2. 68

In [5]:
# convert categorial to factors
vars_cat = c('Sex','GIERACE', 'SITE', 'TUDRAMT', 'TURSMOKE', 'M1ADEPR', 'M1VITMND', 'M1ANTIB', 'M1PROBI', 
               'OHSEAS', 'VDstatus', 'OHV1D2CT', 'OHVD2CT')
mf[vars_cat] = lapply(mf[vars_cat], factor)

# convert continuous to numeric
vars_cts = c('Latitude', 'Longitude', 'Age', 'Height', 'Weight', 'BMI', 'PASCORE', 'DTVITD', 
             'OHV1D3', 'OHV24D3', 'OHVD3', 'OHVD2', 'OHV1D2', 'OHVDTOT', 'OHV1DTOT', 'alpha_pd')

In [6]:
length(vars_cat)
length(vars_cts)


13
16

In [7]:
dat = mf[c(vars_cat, vars_cts)]
dim(dat)
head(dat)


  1. 599
  2. 29
SexGIERACESITETUDRAMTTURSMOKEM1ADEPRM1VITMNDM1ANTIBM1PROBIOHSEASPASCOREDTVITDOHV1D3OHV24D3OHVD3OHVD2OHV1D2OHVDTOTOHV1DTOTalpha_pd
male 1:WHITE Birmingham 1: Less than one drink per weekM:Not Applicable 0: No 0: No 0: No 0: No 3:SUMMER 91.00000 250.90 0.0393 1.77 25.8 0 0 25.8 0.0393 22.49523
male 1:WHITE Birmingham 0:None drinker 1:PAST 0: No 1: Yes 1: Yes 0: No 2:SPRING 199.17857 72.97 0.0619 3.91 39.2 0 0 39.2 0.0619 14.80982
male 1:WHITE Birmingham 0:None drinker 1:PAST 0: No 1: Yes 0: No 0: No 2:SPRING 161.71429 312.15 0.0521 1.49 23.1 0 0 23.1 0.0521 17.61783
male 1:WHITE Birmingham 4: 6-13 drinks per week 1:PAST 0: No 1: Yes 0: No 0: No 2:SPRING 88.21429 323.52 0.0431 2.14 27.3 0 0 27.3 0.0431 14.79724
male 1:WHITE Birmingham 3: 3-5 drinks per week 1:PAST 0: No 0: No 0: No 0: No 4:FALL 256.82143 31.95 0.0502 3.62 33.0 0 0 33.0 0.0502 15.39670
male 1:WHITE Birmingham 0:None drinker 1:PAST 0: No 0: No 1: Yes 0: No 2:SPRING 179.57143 156.06 0.0455 1.79 19.5 0 0 19.5 0.0455 10.69057

In [8]:
summary(dat)


   Sex                    GIERACE             SITE    
 male:599   1:WHITE           :520   Birmingham : 75  
            2:AFRICAN AMERICAN: 24   Minneapolis: 91  
            3:ASIAN           : 34   Palo Alto  : 86  
            4:HISPANIC        : 12   Pittsburgh : 92  
            5:OTHER           :  9   Portland   :121  
                                     San Diego  :134  
                                                      
                            TUDRAMT                TURSMOKE     M1ADEPR   
 .                              :  2   0:NO            :230   0: No :546  
 0:None drinker                 :230   1:PAST          :289   1: Yes: 53  
 1: Less than one drink per week: 78   2:CURRENT       :  9               
 2: 1-2drinks per week          : 66   M:Not Applicable: 71               
 3: 3-5 drinks per week         : 90                                      
 4: 6-13 drinks per week        :106                                      
 5: 14 or more drinks per week  : 27                                      
   M1VITMND     M1ANTIB      M1PROBI                       OHSEAS   
 0: No :154   0: No :558   0: No :573   1:WINTER              :  6  
 1: Yes:445   1: Yes: 41   1: Yes: 26   2:SPRING              :211  
                                        3:SUMMER              :220  
                                        4:FALL                :130  
                                        Missing:Not collected : 32  
                                                                    
                                                                    
                   VDstatus                     OHV1D2CT  
                       : 11   0: No                 :  4  
 deficiency            : 40   1: Yes                :563  
 Missing:Not collected : 32   Missing:Not collected : 32  
 sufficiency           :516                               
                                                          
                                                          
                                                          
                   OHVD2CT       Latitude       Longitude           Age       
 0: No                 :119   Min.   :32.72   Min.   :-122.7   Min.   :78.00  
 1: Yes                :448   1st Qu.:33.52   1st Qu.:-122.1   1st Qu.:81.00  
 Missing:Not collected : 32   Median :40.44   Median :-117.2   Median :83.00  
                              Mean   :39.13   Mean   :-105.9   Mean   :84.24  
                              3rd Qu.:44.98   3rd Qu.: -86.8   3rd Qu.:87.00  
                              Max.   :45.52   Max.   : -80.0   Max.   :98.00  
                                                                              
     Height          Weight            BMI           PASCORE      
 Min.   :153.8   Min.   : 51.40   Min.   :17.60   Min.   :  0.00  
 1st Qu.:167.3   1st Qu.: 71.00   1st Qu.:24.51   1st Qu.: 70.14  
 Median :172.1   Median : 79.00   Median :26.72   Median :116.75  
 Mean   :172.1   Mean   : 80.17   Mean   :27.01   Mean   :122.48  
 3rd Qu.:176.7   3rd Qu.: 87.50   3rd Qu.:28.98   3rd Qu.:165.79  
 Max.   :194.7   Max.   :127.60   Max.   :42.93   Max.   :359.14  
                                                                  
     DTVITD            OHV1D3           OHV24D3           OHVD3       
 Min.   :  0.429   Min.   :0.01070   Min.   : 0.300   Min.   :  7.80  
 1st Qu.: 77.963   1st Qu.:0.04410   1st Qu.: 2.175   1st Qu.: 27.40  
 Median :128.125   Median :0.05550   Median : 3.180   Median : 33.65  
 Mean   :164.396   Mean   :0.05777   Mean   : 3.431   Mean   : 35.23  
 3rd Qu.:211.643   3rd Qu.:0.06630   3rd Qu.: 4.235   3rd Qu.: 41.83  
 Max.   :982.830   Max.   :0.15600   Max.   :14.070   Max.   :104.00  
 NA's   :15        NA's   :32        NA's   :32       NA's   :43      
     OHVD2             OHV1D2           OHVDTOT          OHV1DTOT      
 Min.   : 0.0000   Min.   :0.00000   Min.   :  7.80   Min.   :0.01070  
 1st Qu.: 0.0000   1st Qu.:0.00000   1st Qu.: 28.10   1st Qu.:0.04425  
 Median : 0.0000   Median :0.00000   Median : 34.15   Median :0.05550  
 Mean   : 0.7691   Mean   :0.00018   Mean   : 36.01   Mean   :0.05795  
 3rd Qu.: 0.0000   3rd Qu.:0.00000   3rd Qu.: 42.30   3rd Qu.:0.06645  
 Max.   :59.0000   Max.   :0.03780   Max.   :104.00   Max.   :0.15600  
 NA's   :32        NA's   :32        NA's   :43       NA's   :32       
    alpha_pd     
 Min.   : 3.206  
 1st Qu.:12.900  
 Median :16.074  
 Mean   :16.451  
 3rd Qu.:20.063  
 Max.   :29.953  
                 

In [9]:
dat = dat[complete.cases(dat), ]
print(dim(dat))
alpha = dat$alpha_pd
dat = dat[, -which(names(dat) %in% 'alpha_pd')]
dim(dat)


[1] 545  29
  1. 545
  2. 28

In [10]:
mod0 <- rda(alpha ~ 1., dat)  # Model with intercept only
mod1 <- rda(alpha ~ ., dat)  # Model with all explanatory variables

In [11]:
step.res <- ordiR2step(mod0, mod1, perm.max = 1000)


Step: R2.adj= 0 
Call: alpha ~ 1 
 
                  R2.adjusted
<All variables>  0.1259630505
+ OHV1DTOT       0.0455455639
+ OHV1D3         0.0446292301
+ SITE           0.0311618389
+ GIERACE        0.0271660306
+ M1ANTIB        0.0211519590
+ Latitude       0.0210409870
+ TUDRAMT        0.0187397874
+ M1ADEPR        0.0098967724
+ OHV24D3        0.0061390817
+ Longitude      0.0053765637
+ OHVD2CT        0.0041786581
+ PASCORE        0.0032152201
+ BMI            0.0029544630
+ Age            0.0025306259
+ OHSEAS         0.0018635360
+ Weight         0.0006867532
+ OHVD2          0.0003109701
<none>           0.0000000000
+ Sex            0.0000000000
+ M1PROBI       -0.0007188472
+ OHVDTOT       -0.0012109491
+ VDstatus      -0.0012901636
+ M1VITMND      -0.0015829009
+ Height        -0.0016186020
+ OHVD3         -0.0017627462
+ OHV1D2CT      -0.0017909375
+ OHV1D2        -0.0017937839
+ DTVITD        -0.0018356540
+ TURSMOKE      -0.0052206388

           Df    AIC      F Pr(>F)   
+ OHV1DTOT  1 1659.7 26.959  0.002 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: R2.adj= 0.04554556 
Call: alpha ~ OHV1DTOT 
 
                R2.adjusted
<All variables>  0.12596305
+ SITE           0.07518566
+ GIERACE        0.06971679
+ Latitude       0.06745386
+ M1ANTIB        0.06240655
+ OHVDTOT        0.05848702
+ OHVD3          0.05621401
+ TUDRAMT        0.05570484
+ M1ADEPR        0.05230161
+ Age            0.05080285
+ Longitude      0.04962648
+ OHSEAS         0.04880212
+ M1VITMND       0.04864340
+ OHVD2CT        0.04801510
+ M1PROBI        0.04661057
+ OHV24D3        0.04567053
<none>           0.04554556
+ Sex            0.04554556
+ BMI            0.04527385
+ PASCORE        0.04504960
+ OHVD2          0.04460698
+ Height         0.04437866
+ Weight         0.04411556
+ VDstatus       0.04411456
+ DTVITD         0.04384848
+ OHV1D2         0.04381955
+ OHV1D3         0.04381955
+ OHV1D2CT       0.04380537
+ TURSMOKE       0.04040278

       Df    AIC      F Pr(>F)   
+ SITE  5 1647.5 4.4806  0.002 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: R2.adj= 0.07518566 
Call: alpha ~ OHV1DTOT + SITE 
 
                R2.adjusted
<All variables>  0.12596305
+ GIERACE        0.09741708
+ M1ANTIB        0.08656495
+ OHVDTOT        0.08454035
+ OHVD3          0.08381350
+ M1ADEPR        0.08193039
+ TUDRAMT        0.08149368
+ Age            0.07957713
+ M1VITMND       0.07898872
+ M1PROBI        0.07581920
+ PASCORE        0.07567500
+ OHV24D3        0.07533269
+ OHVD2CT        0.07519435
<none>           0.07518566
+ Sex            0.07518566
+ Latitude       0.07518566
+ Longitude      0.07518566
+ OHSEAS         0.07509863
+ BMI            0.07446073
+ DTVITD         0.07382140
+ OHV1D2         0.07376042
+ OHV1D3         0.07376042
+ Weight         0.07374421
+ Height         0.07372653
+ VDstatus       0.07361439
+ OHVD2          0.07359777
+ OHV1D2CT       0.07352021
+ TURSMOKE       0.07019275

          Df    AIC      F Pr(>F)   
+ GIERACE  4 1638.2 4.3129  0.004 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: R2.adj= 0.09741708 
Call: alpha ~ OHV1DTOT + SITE + GIERACE 
 
                R2.adjusted
<All variables>  0.12596305
+ OHVDTOT        0.11006721
+ M1ANTIB        0.10952791
+ OHVD3          0.10906921
+ M1ADEPR        0.10592749
+ M1VITMND       0.10035140
+ Age            0.10013792
+ OHSEAS         0.09913744
+ PASCORE        0.09909125
+ M1PROBI        0.09866066
+ OHV24D3        0.09863145
+ Weight         0.09830159
+ BMI            0.09827145
+ TUDRAMT        0.09783283
<none>           0.09741708
+ Sex            0.09741708
+ Latitude       0.09741708
+ Longitude      0.09741708
+ OHVD2CT        0.09715362
+ DTVITD         0.09648618
+ Height         0.09621369
+ OHV1D2         0.09595488
+ OHV1D3         0.09595488
+ OHVD2          0.09594473
+ OHV1D2CT       0.09573675
+ VDstatus       0.09573438
+ TURSMOKE       0.09276304

          Df    AIC      F Pr(>F)   
+ OHVDTOT  1 1631.4 8.5906  0.008 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: R2.adj= 0.1100672 
Call: alpha ~ OHV1DTOT + SITE + GIERACE + OHVDTOT 
 
                R2.adjusted
<All variables>   0.1259631
+ M1ANTIB         0.1227722
+ M1ADEPR         0.1162019
+ Age             0.1129860
+ OHSEAS          0.1120651
+ Weight          0.1118310
+ BMI             0.1117963
+ PASCORE         0.1117143
+ M1PROBI         0.1113392
+ TUDRAMT         0.1102671
<none>            0.1100672
+ Sex             0.1100672
+ Latitude        0.1100672
+ Longitude       0.1100672
+ OHVD2CT         0.1099989
+ OHV24D3         0.1098021
+ M1VITMND        0.1094061
+ OHV1D2          0.1092404
+ OHV1D3          0.1092404
+ VDstatus        0.1090778
+ Height          0.1089457
+ DTVITD          0.1088647
+ OHV1D2CT        0.1085747
+ OHVD3           0.1085589
+ OHVD2           0.1085589
+ TURSMOKE        0.1055434

          Df    AIC      F Pr(>F)   
+ M1ANTIB  1 1624.6 8.7195  0.004 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: R2.adj= 0.1227722 
Call: alpha ~ OHV1DTOT + SITE + GIERACE + OHVDTOT + M1ANTIB 
 
                R2.adjusted
+ M1ADEPR         0.1290980
<All variables>   0.1259631
+ OHSEAS          0.1258564
+ Age             0.1252768
+ TUDRAMT         0.1247094
+ PASCORE         0.1242834
+ Weight          0.1237718
+ BMI             0.1236242
+ M1PROBI         0.1228087
<none>            0.1227722
+ Sex             0.1227722
+ Latitude        0.1227722
+ Longitude       0.1227722
+ OHVD2CT         0.1227436
+ OHV1D2          0.1224839
+ OHV1D3          0.1224839
+ OHV24D3         0.1220017
+ Height          0.1216943
+ M1VITMND        0.1216492
+ OHV1D2CT        0.1215532
+ VDstatus        0.1215267
+ DTVITD          0.1214938
+ OHVD3           0.1213308
+ OHVD2           0.1213308
+ TURSMOKE        0.1180293


In [15]:
table = step.res$anova
table


R2.adjDfAICFPr(>F)
+ OHV1DTOT0.04554556 1 1659.710 26.959109 0.002
+ SITE0.07518566 5 1647.475 4.480606 0.002
+ GIERACE0.09741708 4 1638.147 4.312855 0.004
+ OHVDTOT0.11006721 1 1631.433 8.590649 0.008
+ M1ANTIB0.12277224 1 1624.572 8.719521 0.004
<All variables>0.12596305NA NA NA NA

In [ ]:
# Step: R2.adj= 0 
# Call: alpha ~ 1 
 
#                   R2.adjusted
# <All variables>  0.1259630505
# + OHV1DTOT       0.0455455639
# + SITE           0.0311618389
# + GIERACE        0.0271660306
# + M1ANTIB        0.0211519590
# + OHVDTOT       -0.0012109491

In [ ]:
# explain negative adjusted R^2: https://stats.stackexchange.com/questions/183265/what-does-negative-r-squared-mean

In [16]:
table$ES.independent = c(0.0455455639, 0.0311618389, 0.0271660306, 0.0012109491, 0.0211519590, 0.1259630505)
table$ES.RDA = c(table$R2.adj[1], table$R2.adj[2]-table$R2.adj[1], 
                       table$R2.adj[3]-table$R2.adj[2], table$R2.adj[4]-table$R2.adj[3],
                       table$R2.adj[5]-table$R2.adj[4], table$R2.adj[6]-table$R2.adj[5])
table = table[-6, ]

In [17]:
step.res$call


rda(formula = alpha ~ OHV1DTOT + SITE + GIERACE + OHVDTOT + M1ANTIB, 
    data = dat)

In [18]:
table


R2.adjDfAICFPr(>F)ES.independentES.RDA
+ OHV1DTOT0.04554556 1 1659.710 26.959109 0.002 0.0455455640.04554556
+ SITE0.07518566 5 1647.475 4.480606 0.002 0.0311618390.02964010
+ GIERACE0.09741708 4 1638.147 4.312855 0.004 0.0271660310.02223142
+ OHVDTOT0.11006721 1 1631.433 8.590649 0.008 0.0012109490.01265013
+ M1ANTIB0.12277224 1 1624.572 8.719521 0.004 0.0211519590.01270502

In [ ]: