In [1]:
library(vegan)


Loading required package: permute
Loading required package: lattice
This is vegan 2.4-3

In [2]:
mf = read.csv('../data/mapping_Shannonalpha.txt', stringsAsFactors=FALSE, sep='\t')
colnames(mf)[1] = 'SampleName'

In [3]:
head(mf)


SampleNameBarcodeSequenceLinkerPrimerSequenceExperiment_Design_DescriptionLibrary_Construction_ProtocolLinkerPlatformCenter_NameCenter_ProjectInstrument_ModelOHV1D2CTOHVD2CTOHVDTOTOHV1DTOTOHSEASVDstatusDescriptionratio_activationratio_catabolismalpha_shannon
BI0023 TCTGGTGACATT GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq 1: Yes 1: Yes 25.8 0.0393 3:SUMMER sufficiency Orwoll.BI0023.BI 0.001523256 0.06860465 5.621879
BI0056 CAAGCATGCCTA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq 1: Yes 1: Yes 39.2 0.0619 2:SPRING sufficiency Orwoll.BI0056.BI 0.001579082 0.09974490 3.766687
BI0131 CTATTTGCGACA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq 1: Yes 1: Yes 23.1 0.0521 2:SPRING sufficiency Orwoll.BI0131.BI 0.002255411 0.06450216 3.945446
BI0153 ATCGGCGTTACA GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq 1: Yes 1: Yes 27.3 0.0431 2:SPRING sufficiency Orwoll.BI0153.BI 0.001578755 0.07838828 4.801318
BI0215 CCTCTCGTGATC GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq 1: Yes 1: Yes 33.0 0.0502 4:FALL sufficiency Orwoll.BI0215.BI 0.001521212 0.10969697 5.229841
BI0353 TGCCATCTGAAT GGACTACHVGGGTWTCTAAT 16S stool samples sequenced for MrOS Vitamin D study16S rRNA v4 GT Illumina BI MrOS Illumina MiSeq 1: Yes 1: Yes 19.5 0.0455 2:SPRING deficiency Orwoll.BI0353.BI 0.002333333 0.09179487 4.111526

In [4]:
dim(mf)


  1. 599
  2. 68

In [5]:
# convert categorial to factors
vars_cat = c('Sex','GIERACE', 'SITE', 'TUDRAMT', 'TURSMOKE', 'M1ADEPR', 'M1VITMND', 'M1ANTIB', 'M1PROBI', 
               'OHSEAS', 'VDstatus', 'OHV1D2CT', 'OHVD2CT')
mf[vars_cat] = lapply(mf[vars_cat], factor)

# convert continuous to numeric
vars_cts = c('Latitude', 'Longitude', 'Age', 'Height', 'Weight', 'BMI', 'PASCORE', 'DTVITD', 
             'OHV1D3', 'OHV24D3', 'OHVD3', 'OHVD2', 'OHV1D2', 'OHVDTOT', 'OHV1DTOT', 'alpha_shannon')

In [6]:
length(vars_cat)
length(vars_cts)


13
16

In [7]:
dat = mf[c(vars_cat, vars_cts)]
dim(dat)
head(dat)


  1. 599
  2. 29
SexGIERACESITETUDRAMTTURSMOKEM1ADEPRM1VITMNDM1ANTIBM1PROBIOHSEASPASCOREDTVITDOHV1D3OHV24D3OHVD3OHVD2OHV1D2OHVDTOTOHV1DTOTalpha_shannon
male 1:WHITE Birmingham 1: Less than one drink per weekM:Not Applicable 0: No 0: No 0: No 0: No 3:SUMMER 91.00000 250.90 0.0393 1.77 25.8 0 0 25.8 0.0393 5.621879
male 1:WHITE Birmingham 0:None drinker 1:PAST 0: No 1: Yes 1: Yes 0: No 2:SPRING 199.17857 72.97 0.0619 3.91 39.2 0 0 39.2 0.0619 3.766687
male 1:WHITE Birmingham 0:None drinker 1:PAST 0: No 1: Yes 0: No 0: No 2:SPRING 161.71429 312.15 0.0521 1.49 23.1 0 0 23.1 0.0521 3.945446
male 1:WHITE Birmingham 4: 6-13 drinks per week 1:PAST 0: No 1: Yes 0: No 0: No 2:SPRING 88.21429 323.52 0.0431 2.14 27.3 0 0 27.3 0.0431 4.801318
male 1:WHITE Birmingham 3: 3-5 drinks per week 1:PAST 0: No 0: No 0: No 0: No 4:FALL 256.82143 31.95 0.0502 3.62 33.0 0 0 33.0 0.0502 5.229841
male 1:WHITE Birmingham 0:None drinker 1:PAST 0: No 0: No 1: Yes 0: No 2:SPRING 179.57143 156.06 0.0455 1.79 19.5 0 0 19.5 0.0455 4.111526

In [8]:
summary(dat)


   Sex                    GIERACE             SITE    
 male:599   1:WHITE           :520   Birmingham : 75  
            2:AFRICAN AMERICAN: 24   Minneapolis: 91  
            3:ASIAN           : 34   Palo Alto  : 86  
            4:HISPANIC        : 12   Pittsburgh : 92  
            5:OTHER           :  9   Portland   :121  
                                     San Diego  :134  
                                                      
                            TUDRAMT                TURSMOKE     M1ADEPR   
 .                              :  2   0:NO            :230   0: No :546  
 0:None drinker                 :230   1:PAST          :289   1: Yes: 53  
 1: Less than one drink per week: 78   2:CURRENT       :  9               
 2: 1-2drinks per week          : 66   M:Not Applicable: 71               
 3: 3-5 drinks per week         : 90                                      
 4: 6-13 drinks per week        :106                                      
 5: 14 or more drinks per week  : 27                                      
   M1VITMND     M1ANTIB      M1PROBI                       OHSEAS   
 0: No :154   0: No :558   0: No :573   1:WINTER              :  6  
 1: Yes:445   1: Yes: 41   1: Yes: 26   2:SPRING              :211  
                                        3:SUMMER              :220  
                                        4:FALL                :130  
                                        Missing:Not collected : 32  
                                                                    
                                                                    
                   VDstatus                     OHV1D2CT  
                       : 11   0: No                 :  4  
 deficiency            : 40   1: Yes                :563  
 Missing:Not collected : 32   Missing:Not collected : 32  
 sufficiency           :516                               
                                                          
                                                          
                                                          
                   OHVD2CT       Latitude       Longitude           Age       
 0: No                 :119   Min.   :32.72   Min.   :-122.7   Min.   :78.00  
 1: Yes                :448   1st Qu.:33.52   1st Qu.:-122.1   1st Qu.:81.00  
 Missing:Not collected : 32   Median :40.44   Median :-117.2   Median :83.00  
                              Mean   :39.13   Mean   :-105.9   Mean   :84.24  
                              3rd Qu.:44.98   3rd Qu.: -86.8   3rd Qu.:87.00  
                              Max.   :45.52   Max.   : -80.0   Max.   :98.00  
                                                                              
     Height          Weight            BMI           PASCORE      
 Min.   :153.8   Min.   : 51.40   Min.   :17.60   Min.   :  0.00  
 1st Qu.:167.3   1st Qu.: 71.00   1st Qu.:24.51   1st Qu.: 70.14  
 Median :172.1   Median : 79.00   Median :26.72   Median :116.75  
 Mean   :172.1   Mean   : 80.17   Mean   :27.01   Mean   :122.48  
 3rd Qu.:176.7   3rd Qu.: 87.50   3rd Qu.:28.98   3rd Qu.:165.79  
 Max.   :194.7   Max.   :127.60   Max.   :42.93   Max.   :359.14  
                                                                  
     DTVITD            OHV1D3           OHV24D3           OHVD3       
 Min.   :  0.429   Min.   :0.01070   Min.   : 0.300   Min.   :  7.80  
 1st Qu.: 77.963   1st Qu.:0.04410   1st Qu.: 2.175   1st Qu.: 27.40  
 Median :128.125   Median :0.05550   Median : 3.180   Median : 33.65  
 Mean   :164.396   Mean   :0.05777   Mean   : 3.431   Mean   : 35.23  
 3rd Qu.:211.643   3rd Qu.:0.06630   3rd Qu.: 4.235   3rd Qu.: 41.83  
 Max.   :982.830   Max.   :0.15600   Max.   :14.070   Max.   :104.00  
 NA's   :15        NA's   :32        NA's   :32       NA's   :43      
     OHVD2             OHV1D2           OHVDTOT          OHV1DTOT      
 Min.   : 0.0000   Min.   :0.00000   Min.   :  7.80   Min.   :0.01070  
 1st Qu.: 0.0000   1st Qu.:0.00000   1st Qu.: 28.10   1st Qu.:0.04425  
 Median : 0.0000   Median :0.00000   Median : 34.15   Median :0.05550  
 Mean   : 0.7691   Mean   :0.00018   Mean   : 36.01   Mean   :0.05795  
 3rd Qu.: 0.0000   3rd Qu.:0.00000   3rd Qu.: 42.30   3rd Qu.:0.06645  
 Max.   :59.0000   Max.   :0.03780   Max.   :104.00   Max.   :0.15600  
 NA's   :32        NA's   :32        NA's   :43       NA's   :32       
 alpha_shannon  
 Min.   :1.459  
 1st Qu.:4.384  
 Median :5.019  
 Mean   :4.904  
 3rd Qu.:5.547  
 Max.   :6.764  
                

In [9]:
dat = dat[complete.cases(dat), ]
print(dim(dat))
alpha = dat$alpha_shannon
dat = dat[, -which(names(dat) %in% 'alpha_shannon')]
dim(dat)


[1] 545  29
  1. 545
  2. 28

In [10]:
mod0 <- rda(alpha ~ 1., dat)  # Model with intercept only
mod1 <- rda(alpha ~ ., dat)  # Model with all explanatory variables

In [11]:
step.res <- ordiR2step(mod0, mod1, perm.max = 1000)


Step: R2.adj= 0 
Call: alpha ~ 1 
 
                  R2.adjusted
<All variables>  6.705342e-02
+ OHV1D3         2.674177e-02
+ OHV1DTOT       2.615615e-02
+ SITE           2.470355e-02
+ GIERACE        2.282282e-02
+ M1ANTIB        1.754900e-02
+ OHV24D3        1.292210e-02
+ M1ADEPR        1.267359e-02
+ Latitude       1.118248e-02
+ TUDRAMT        5.309808e-03
+ Longitude      4.327768e-03
+ OHVD2          2.171294e-03
+ PASCORE        1.996015e-03
+ TURSMOKE       1.639366e-03
+ OHVD3          1.032975e-03
+ OHVD2CT        5.620141e-04
<none>           0.000000e+00
+ Sex            0.000000e+00
+ OHSEAS        -1.990955e-05
+ Height        -3.044395e-04
+ OHVDTOT       -8.380857e-04
+ VDstatus      -8.389714e-04
+ OHV1D2        -1.264224e-03
+ M1VITMND      -1.452819e-03
+ BMI           -1.460865e-03
+ Age           -1.497746e-03
+ OHV1D2CT      -1.722565e-03
+ M1PROBI       -1.803669e-03
+ Weight        -1.837918e-03
+ DTVITD        -1.841588e-03

         Df     AIC      F Pr(>F)   
+ OHV1D3  1 -139.45 15.947  0.002 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: R2.adj= 0.02674177 
Call: alpha ~ OHV1D3 
 
                R2.adjusted
<All variables>  0.06705342
+ SITE           0.04849791
+ GIERACE        0.04699867
+ M1ANTIB        0.04093914
+ Latitude       0.03829696
+ M1ADEPR        0.03675633
+ Longitude      0.02990335
+ TUDRAMT        0.02799118
+ OHSEAS         0.02755673
+ Height         0.02705354
<none>           0.02674177
+ Sex            0.02674177
+ TURSMOKE       0.02673742
+ OHVD2          0.02633353
+ OHVD2CT        0.02626271
+ OHVDTOT        0.02623675
+ PASCORE        0.02615071
+ OHV24D3        0.02575473
+ Weight         0.02566085
+ OHVD3          0.02545363
+ M1VITMND       0.02535431
+ OHV1D2CT       0.02503365
+ M1PROBI        0.02503070
+ VDstatus       0.02498326
+ OHV1DTOT       0.02497579
+ OHV1D2         0.02497579
+ DTVITD         0.02497527
+ Age            0.02496951
+ BMI            0.02496549

       Df     AIC      F Pr(>F)  
+ SITE  5 -146.81 3.4831  0.012 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: R2.adj= 0.04849791 
Call: alpha ~ OHV1D3 + SITE 
 
                R2.adjusted
<All variables>  0.06705342
+ GIERACE        0.06563356
+ M1ANTIB        0.05933434
+ M1ADEPR        0.05869710
+ TURSMOKE       0.05023173
+ PASCORE        0.04860019
<none>           0.04849791
+ Sex            0.04849791
+ Latitude       0.04849791
+ Longitude      0.04849791
+ Height         0.04791070
+ OHSEAS         0.04770900
+ OHV24D3        0.04761256
+ TUDRAMT        0.04756302
+ Weight         0.04728769
+ M1VITMND       0.04726216
+ OHV1D2CT       0.04723182
+ OHVDTOT        0.04714798
+ OHVD2          0.04709069
+ OHVD2CT        0.04693233
+ DTVITD         0.04692231
+ OHVD3          0.04691301
+ BMI            0.04679526
+ M1PROBI        0.04675592
+ OHV1D2         0.04675405
+ OHV1DTOT       0.04675405
+ Age            0.04675097
+ VDstatus       0.04672905

          Df     AIC      F Pr(>F)  
+ GIERACE  4 -152.78 3.4666  0.014 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: R2.adj= 0.06563356 
Call: alpha ~ OHV1D3 + SITE + GIERACE 
 
                R2.adjusted
+ M1ADEPR        0.07643586
+ M1ANTIB        0.07626007
+ TURSMOKE       0.06944108
<All variables>  0.06705342
+ PASCORE        0.06644381
+ OHSEAS         0.06570132
<none>           0.06563356
+ Sex            0.06563356
+ Latitude       0.06563356
+ Longitude      0.06563356
+ OHVDTOT        0.06482613
+ OHVD3          0.06440082
+ OHV24D3        0.06439327
+ DTVITD         0.06438085
+ OHVD2          0.06437992
+ OHV1D2CT       0.06416410
+ Age            0.06407867
+ M1VITMND       0.06407478
+ M1PROBI        0.06400581
+ OHVD2CT        0.06399239
+ BMI            0.06390843
+ Weight         0.06390332
+ VDstatus       0.06389693
+ Height         0.06388701
+ OHV1D2         0.06388329
+ OHV1DTOT       0.06388329
+ TUDRAMT        0.06199672


In [13]:
table = step.res$anova
table


R2.adjDfAICFPr(>F)
+ OHV1D30.02674177 1 -139.4461 15.947237 0.002
+ SITE0.04849791 5 -146.8088 3.483144 0.012
+ GIERACE0.06563356 4 -152.7804 3.466640 0.014
<All variables>0.06705342NA NA NA NA

In [ ]:
# Step: R2.adj= 0 
# Call: alpha ~ 1 
 
#                   R2.adjusted
# <All variables>  6.705342e-02
# + OHV1D3         2.674177e-02
# + SITE           2.470355e-02
# + GIERACE        2.282282e-02

In [ ]:
# explain negative adjusted R^2: https://stats.stackexchange.com/questions/183265/what-does-negative-r-squared-mean

In [16]:
table$ES.independent = c(0.06705342, 0.02470355, 0.02282282, 0.06705342)
table$ES.RDA = c(table$R2.adj[1], table$R2.adj[2]-table$R2.adj[1], 
                       table$R2.adj[3]-table$R2.adj[2], table$R2.adj[4]-table$R2.adj[3])
table = table[-4, ]

In [17]:
step.res$call


rda(formula = alpha ~ OHV1D3 + SITE + GIERACE, data = dat)

In [18]:
table


R2.adjDfAICFPr(>F)ES.independentES.RDA
+ OHV1D30.026741771 -139.4461 15.947237 0.002 0.067053420.02674177
+ SITE0.048497915 -146.8088 3.483144 0.012 0.024703550.02175614
+ GIERACE0.065633564 -152.7804 3.466640 0.014 0.022822820.01713566

In [ ]: