In [6]:
#pull in source functions from github
devtools::source_url('https://raw.githubusercontent.com/jsphyg/Machine_Learning_Notebooks/master/myRfunctions.R')
#source("C:\\Work\\myRfunctions.R")
fnRunDate()
fnInstallPackages()
SHA-1 hash of file is 7a7a4f3a5ca0087d14658c26536447ad49ec74ab
'Project last run on Thu Sep 21 9:10:48 AM 2017'
'Package install completed'
In [2]:
data(Soybean)
dataset <- as_tibble(Soybean)
In [3]:
glimpse(dataset)
Observations: 683
Variables: 36
$ Class <fctr> diaporthe-stem-canker, diaporthe-stem-canker, diap...
$ date <fctr> 6, 4, 3, 3, 6, 5, 5, 4, 6, 4, 6, 4, 3, 6, 6, 5, 6,...
$ plant.stand <ord> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ precip <ord> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, ...
$ temp <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, ...
$ hail <fctr> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1,...
$ crop.hist <fctr> 1, 2, 1, 1, 2, 3, 2, 1, 3, 2, 1, 1, 1, 3, 1, 3, 0,...
$ area.dam <fctr> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 2, 3, 3, 3, 2,...
$ sever <fctr> 1, 2, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1,...
$ seed.tmt <fctr> 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0,...
$ germ <ord> 0, 1, 2, 1, 2, 1, 0, 2, 1, 2, 0, 1, 0, 0, 1, 2, 0, ...
$ plant.growth <fctr> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ leaves <fctr> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ leaf.halo <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ leaf.marg <fctr> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,...
$ leaf.size <ord> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...
$ leaf.shread <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ leaf.malf <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ leaf.mild <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ stem <fctr> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ lodging <fctr> 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,...
$ stem.cankers <fctr> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0,...
$ canker.lesion <fctr> 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3,...
$ fruiting.bodies <fctr> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,...
$ ext.decay <fctr> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,...
$ mycelium <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ int.discolor <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2,...
$ sclerotia <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,...
$ fruit.pods <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ fruit.spots <fctr> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,...
$ seed <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ mold.growth <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ seed.discolor <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ seed.size <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ shriveling <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ roots <fctr> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
In [4]:
psych::describe(dataset,check = T)
vars n mean sd median trimmed mad min max range skew kurtosis se
Class* 1 683 9.295754 5.51115341 8 9.179159 7.4130 1 19 18 0.11302119 -1.3791026 0.210878424
date* 2 682 4.554252 1.69411726 5 4.615385 1.4826 1 7 6 -0.30397011 -0.9045074 0.064871103
plant.stand* 3 647 1.452859 0.49815792 1 1.441233 0.0000 1 2 1 0.18896734 -1.9673249 0.019584609
precip* 4 645 2.596899 0.68614709 3 2.744681 0.0000 1 3 2 -1.41630633 0.5502093 0.027017015
temp* 5 653 2.182236 0.62821435 2 2.227533 0.0000 1 3 2 -0.15829545 -0.5843151 0.024583927
hail* 6 562 1.225979 0.41859776 1 1.157778 0.0000 1 2 1 1.30690508 -0.2925101 0.017657481
crop.hist* 7 667 2.884558 0.97576561 3 2.977570 1.4826 1 4 3 -0.39757148 -0.9187916 0.037781795
area.dam* 8 682 2.580645 1.07437412 2 2.600733 1.4826 1 4 3 0.01799005 -1.2864923 0.041139911
sever* 9 562 1.733096 0.59702831 2 1.691111 0.0000 1 3 2 0.17391297 -0.5647524 0.025184119
seed.tmt* 10 562 1.519573 0.61224099 1 1.446667 0.0000 1 3 2 0.73966698 -0.4396667 0.025825828
germ* 11 571 2.049037 0.79098758 2 2.061269 1.4826 1 3 2 -0.08680952 -1.3998550 0.033101800
plant.growth* 12 667 1.338831 0.47366739 1 1.299065 0.0000 1 2 1 0.67949699 -1.5405868 0.018340474
leaves* 13 683 1.887262 0.31650395 2 1.983547 0.0000 1 2 1 -2.44354029 3.9767180 0.012110687
leaf.halo* 14 599 2.202003 0.94899841 3 2.251559 0.0000 1 3 2 -0.41080342 -1.7648507 0.038775024
leaf.marg* 15 599 1.772955 0.95651425 1 1.717256 0.0000 1 3 2 0.46484621 -1.7465620 0.039082113
leaf.size* 16 599 2.283806 0.61169336 2 2.336798 0.0000 1 3 2 -0.24946067 -0.6293671 0.024993113
leaf.shread* 17 583 1.164666 0.37119689 1 1.081370 0.0000 1 2 1 1.80367508 1.2554060 0.015373404
leaf.malf* 18 599 1.075125 0.26381357 1 1.000000 0.0000 1 2 1 3.21564565 8.3543324 0.010779130
leaf.mild* 19 575 1.104348 0.40411457 1 1.000000 0.0000 1 3 2 3.95290557 14.6848261 0.016852743
stem* 20 667 1.556222 0.49720190 2 1.570093 0.0000 1 2 1 -0.22581409 -1.9519277 0.019251734
lodging* 21 562 1.074733 0.26319445 1 1.000000 0.0000 1 2 1 3.22582942 8.4209689 0.011102188
stem.cankers* 22 645 2.060465 1.35169658 1 1.951644 0.0000 1 4 3 0.60983130 -1.5090610 0.053223001
canker.lesion* 23 645 1.979845 1.08400138 2 1.851064 1.4826 1 4 3 0.51457211 -1.2379837 0.042682512
fruiting.bodies* 24 577 1.180243 0.38472295 1 1.101512 0.0000 1 2 1 1.65939253 0.7549009 0.016016226
ext.decay* 25 645 1.249612 0.47746159 1 1.162476 0.0000 1 3 2 1.69543723 1.9750241 0.018800032
mycelium* 26 645 1.009302 0.09607342 1 1.000000 0.0000 1 2 1 10.19921824 102.1824824 0.003782887
int.discolor* 27 645 1.130233 0.41899848 1 1.000000 0.0000 1 3 2 3.33861193 10.5712527 0.016498049
sclerotia* 28 645 1.031008 0.17347313 1 1.000000 0.0000 1 2 1 5.39870500 27.1881751 0.006830498
fruit.pods* 29 599 1.504174 0.88251272 1 1.282744 0.0000 1 4 3 1.83817833 2.4130176 0.036058492
fruit.spots* 30 577 1.847487 1.17006859 1 1.686825 0.0000 1 4 3 0.94650965 -0.7574031 0.048710593
seed* 31 591 1.194585 0.39621658 1 1.118393 0.0000 1 2 1 1.53904600 0.3692961 0.016298172
mold.growth* 32 591 1.113367 0.31730966 1 1.016913 0.0000 1 2 1 2.43281985 3.9252628 0.013052375
seed.discolor* 33 577 1.110919 0.31430372 1 1.015119 0.0000 1 2 1 2.47154019 4.1156528 0.013084635
seed.size* 34 591 1.099831 0.30002820 1 1.000000 0.0000 1 2 1 2.66303034 5.1003693 0.012341511
shriveling* 35 577 1.065858 0.24824873 1 1.000000 0.0000 1 2 1 3.49157641 10.2088077 0.010334730
roots* 36 652 1.177914 0.43882605 1 1.068966 0.0000 1 3 2 2.45781443 5.4857676 0.017185754
In [8]:
VIM::aggr(dataset, prop = FALSE, combined = TRUE, numbers = TRUE, sortVars = TRUE, sortCombs = TRUE)
Variables sorted by number of missings:
Variable Count
hail 121
sever 121
seed.tmt 121
lodging 121
germ 112
leaf.mild 108
fruiting.bodies 106
fruit.spots 106
seed.discolor 106
shriveling 106
leaf.shread 100
seed 92
mold.growth 92
seed.size 92
leaf.halo 84
leaf.marg 84
leaf.size 84
leaf.malf 84
fruit.pods 84
precip 38
stem.cankers 38
canker.lesion 38
ext.decay 38
mycelium 38
int.discolor 38
sclerotia 38
plant.stand 36
roots 31
temp 30
crop.hist 16
plant.growth 16
stem 16
date 1
area.dam 1
Class 0
leaves 0
In [43]:
dataset$temp <- recode(dataset$temp,
"0 = 'low'; 1 = 'norm'; 2 = 'high'; NA = 'missing'",
levels = c("low", "norm", "high", "missing"))
table(dataset$temp)
low norm high missing
80 374 199 30
In [44]:
dataset$date <- recode(dataset$date,
"0 ='apr';1='may';2='june';3='july';4='aug';5='sept';6='oct';NA = 'missing'",
levels = c("apr", "may", "june", "july", "aug", "sept", "missing"))
table(dataset$date)
apr may june july aug sept missing
26 75 93 118 131 149 1
In [45]:
table(dataset$Class)
2-4-d-injury alternarialeaf-spot
16 91
anthracnose bacterial-blight
44 20
bacterial-pustule brown-spot
20 92
brown-stem-rot charcoal-rot
44 20
cyst-nematode diaporthe-pod-&-stem-blight
14 15
diaporthe-stem-canker downy-mildew
20 20
frog-eye-leaf-spot herbicide-injury
91 8
phyllosticta-leaf-spot phytophthora-rot
20 88
powdery-mildew purple-seed-stain
20 20
rhizoctonia-root-rot
20
In [46]:
# fix the classes to work with r
dataset$Class <- make.names(dataset$Class)
table(dataset$Class)
alternarialeaf.spot anthracnose
91 44
bacterial.blight bacterial.pustule
20 20
brown.spot brown.stem.rot
92 44
charcoal.rot cyst.nematode
20 14
diaporthe.pod...stem.blight diaporthe.stem.canker
15 20
downy.mildew frog.eye.leaf.spot
20 91
herbicide.injury phyllosticta.leaf.spot
8 20
phytophthora.rot powdery.mildew
88 20
purple.seed.stain rhizoctonia.root.rot
20 20
X2.4.d.injury
16
In [7]:
# summarize the class distribution
fnClassDistribution(Class = dataset$Class)
freq percentage
2-4-d-injury 16 2.342606
alternarialeaf-spot 91 13.323572
anthracnose 44 6.442167
bacterial-blight 20 2.928258
bacterial-pustule 20 2.928258
brown-spot 92 13.469985
brown-stem-rot 44 6.442167
charcoal-rot 20 2.928258
cyst-nematode 14 2.049780
diaporthe-pod-&-stem-blight 15 2.196193
diaporthe-stem-canker 20 2.928258
downy-mildew 20 2.928258
frog-eye-leaf-spot 91 13.323572
herbicide-injury 8 1.171303
phyllosticta-leaf-spot 20 2.928258
phytophthora-rot 88 12.884334
powdery-mildew 20 2.928258
purple-seed-stain 20 2.928258
rhizoctonia-root-rot 20 2.928258
In [47]:
CrossTable(x = dataset$Class, y = dataset$temp)
Cell Contents
|-------------------------|
| N |
| Chi-square contribution |
| N / Row Total |
| N / Col Total |
| N / Table Total |
|-------------------------|
Total Observations in Table: 683
| dataset$temp
dataset$Class | low | norm | high | missing | Row Total |
----------------------------|-----------|-----------|-----------|-----------|-----------|
alternarialeaf.spot | 0 | 40 | 51 | 0 | 91 |
| 10.659 | 1.939 | 22.613 | 3.997 | |
| 0.000 | 0.440 | 0.560 | 0.000 | 0.133 |
| 0.000 | 0.107 | 0.256 | 0.000 | |
| 0.000 | 0.059 | 0.075 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
anthracnose | 0 | 33 | 11 | 0 | 44 |
| 5.154 | 3.292 | 0.258 | 1.933 | |
| 0.000 | 0.750 | 0.250 | 0.000 | 0.064 |
| 0.000 | 0.088 | 0.055 | 0.000 | |
| 0.000 | 0.048 | 0.016 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
bacterial.blight | 0 | 17 | 3 | 0 | 20 |
| 2.343 | 3.340 | 1.372 | 0.878 | |
| 0.000 | 0.850 | 0.150 | 0.000 | 0.029 |
| 0.000 | 0.045 | 0.015 | 0.000 | |
| 0.000 | 0.025 | 0.004 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
bacterial.pustule | 5 | 11 | 4 | 0 | 20 |
| 3.014 | 0.000 | 0.573 | 0.878 | |
| 0.250 | 0.550 | 0.200 | 0.000 | 0.029 |
| 0.062 | 0.029 | 0.020 | 0.000 | |
| 0.007 | 0.016 | 0.006 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
brown.spot | 0 | 82 | 10 | 0 | 92 |
| 10.776 | 19.849 | 10.536 | 4.041 | |
| 0.000 | 0.891 | 0.109 | 0.000 | 0.135 |
| 0.000 | 0.219 | 0.050 | 0.000 | |
| 0.000 | 0.120 | 0.015 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
brown.stem.rot | 13 | 25 | 6 | 0 | 44 |
| 11.945 | 0.034 | 3.628 | 1.933 | |
| 0.295 | 0.568 | 0.136 | 0.000 | 0.064 |
| 0.163 | 0.067 | 0.030 | 0.000 | |
| 0.019 | 0.037 | 0.009 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
charcoal.rot | 0 | 5 | 15 | 0 | 20 |
| 2.343 | 3.234 | 14.439 | 0.878 | |
| 0.000 | 0.250 | 0.750 | 0.000 | 0.029 |
| 0.000 | 0.013 | 0.075 | 0.000 | |
| 0.000 | 0.007 | 0.022 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
cyst.nematode | 0 | 0 | 0 | 14 | 14 |
| 1.640 | 7.666 | 4.079 | 291.348 | |
| 0.000 | 0.000 | 0.000 | 1.000 | 0.020 |
| 0.000 | 0.000 | 0.000 | 0.467 | |
| 0.000 | 0.000 | 0.000 | 0.020 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
diaporthe.pod...stem.blight | 0 | 0 | 15 | 0 | 15 |
| 1.757 | 8.214 | 25.853 | 0.659 | |
| 0.000 | 0.000 | 1.000 | 0.000 | 0.022 |
| 0.000 | 0.000 | 0.075 | 0.000 | |
| 0.000 | 0.000 | 0.022 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
diaporthe.stem.canker | 0 | 20 | 0 | 0 | 20 |
| 2.343 | 7.476 | 5.827 | 0.878 | |
| 0.000 | 1.000 | 0.000 | 0.000 | 0.029 |
| 0.000 | 0.053 | 0.000 | 0.000 | |
| 0.000 | 0.029 | 0.000 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
downy.mildew | 8 | 9 | 3 | 0 | 20 |
| 13.663 | 0.348 | 1.372 | 0.878 | |
| 0.400 | 0.450 | 0.150 | 0.000 | 0.029 |
| 0.100 | 0.024 | 0.015 | 0.000 | |
| 0.012 | 0.013 | 0.004 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
frog.eye.leaf.spot | 0 | 54 | 37 | 0 | 91 |
| 10.659 | 0.349 | 4.147 | 3.997 | |
| 0.000 | 0.593 | 0.407 | 0.000 | 0.133 |
| 0.000 | 0.144 | 0.186 | 0.000 | |
| 0.000 | 0.079 | 0.054 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
herbicide.injury | 8 | 0 | 0 | 0 | 8 |
| 53.237 | 4.381 | 2.331 | 0.351 | |
| 1.000 | 0.000 | 0.000 | 0.000 | 0.012 |
| 0.100 | 0.000 | 0.000 | 0.000 | |
| 0.012 | 0.000 | 0.000 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
phyllosticta.leaf.spot | 0 | 10 | 10 | 0 | 20 |
| 2.343 | 0.083 | 2.988 | 0.878 | |
| 0.000 | 0.500 | 0.500 | 0.000 | 0.029 |
| 0.000 | 0.027 | 0.050 | 0.000 | |
| 0.000 | 0.015 | 0.015 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
phytophthora.rot | 9 | 51 | 28 | 0 | 88 |
| 0.166 | 0.164 | 0.217 | 3.865 | |
| 0.102 | 0.580 | 0.318 | 0.000 | 0.129 |
| 0.112 | 0.136 | 0.141 | 0.000 | |
| 0.013 | 0.075 | 0.041 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
powdery.mildew | 10 | 10 | 0 | 0 | 20 |
| 25.030 | 0.083 | 5.827 | 0.878 | |
| 0.500 | 0.500 | 0.000 | 0.000 | 0.029 |
| 0.125 | 0.027 | 0.000 | 0.000 | |
| 0.015 | 0.015 | 0.000 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
purple.seed.stain | 7 | 7 | 6 | 0 | 20 |
| 9.259 | 1.426 | 0.005 | 0.878 | |
| 0.350 | 0.350 | 0.300 | 0.000 | 0.029 |
| 0.087 | 0.019 | 0.030 | 0.000 | |
| 0.010 | 0.010 | 0.009 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
rhizoctonia.root.rot | 20 | 0 | 0 | 0 | 20 |
| 133.093 | 10.952 | 5.827 | 0.878 | |
| 1.000 | 0.000 | 0.000 | 0.000 | 0.029 |
| 0.250 | 0.000 | 0.000 | 0.000 | |
| 0.029 | 0.000 | 0.000 | 0.000 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
X2.4.d.injury | 0 | 0 | 0 | 16 | 16 |
| 1.874 | 8.761 | 4.662 | 332.969 | |
| 0.000 | 0.000 | 0.000 | 1.000 | 0.023 |
| 0.000 | 0.000 | 0.000 | 0.533 | |
| 0.000 | 0.000 | 0.000 | 0.023 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
Column Total | 80 | 374 | 199 | 30 | 683 |
| 0.117 | 0.548 | 0.291 | 0.044 | |
----------------------------|-----------|-----------|-----------|-----------|-----------|
In [48]:
# Split out validation dataset
validation_index <- createDataPartition(dataset$Class, p = 0.80, list = FALSE)
validation <- dataset[-validation_index,]
dataset <- dataset[validation_index,]
In [49]:
control <- caret::trainControl(method = "cv", number = 10, sampling = "up", search = "random")
metric <- "Accuracy"
set.seed(13)
xgb <- caret::train(Class ~ ., data = dataset, method = "xgbLinear", metric = metric, trControl = control, na.action = na.pass, tuneLength = 15)
xgb
eXtreme Gradient Boosting
393 samples
35 predictor
19 classes: 'alternarialeaf.spot', 'anthracnose', 'bacterial.blight', 'bacterial.pustule', 'brown.spot', 'brown.stem.rot', 'charcoal.rot', 'cyst.nematode', 'diaporthe.pod...stem.blight', 'diaporthe.stem.canker', 'downy.mildew', 'frog.eye.leaf.spot', 'herbicide.injury', 'phyllosticta.leaf.spot', 'phytophthora.rot', 'powdery.mildew', 'purple.seed.stain', 'rhizoctonia.root.rot', 'X2.4.d.injury'
No pre-processing
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 498, 493, 495, 498, 498, 497, ...
Addtional sampling using up-sampling
Resampling results across tuning parameters:
lambda alpha nrounds eta Accuracy Kappa
1.172317e-05 1.714939e-03 63 0.03062265 0.9231204 0.9156211
1.309642e-05 1.117352e-01 48 0.20773979 0.9303423 0.9236081
1.415223e-05 7.872409e-05 7 0.46470473 0.9338484 0.9274387
4.456196e-05 3.425635e-01 38 1.66951512 0.9320893 0.9254201
8.250286e-05 9.011206e-04 81 2.79386232 0.9266348 0.9194156
1.466103e-04 3.638820e-04 54 2.43794014 0.9303073 0.9234851
7.778636e-04 8.384563e-02 48 0.96775876 0.9282312 0.9212006
1.858186e-03 3.622672e-01 52 0.51949944 0.9267625 0.9196219
2.773122e-03 3.862522e-04 93 0.25253718 0.9264722 0.9192495
6.751193e-03 8.651494e-05 67 2.11915569 0.9266964 0.9194559
1.431099e-02 1.247184e-05 76 0.33653508 0.9300831 0.9232105
1.453314e-02 1.617243e-04 94 2.14255902 0.9267313 0.9195835
3.620379e-02 1.412948e-01 4 2.04721068 0.9301759 0.9233562
5.206797e-02 8.388680e-03 2 0.35234523 0.9251661 0.9179438
1.541768e-01 3.660031e-02 95 2.74626992 0.9266348 0.9194286
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were nrounds = 7, lambda =
1.415223e-05, alpha = 7.872409e-05 and eta = 0.4647047.
In [50]:
varImp(xgb)
xgbLinear variable importance
only 20 most important variables shown (out of 65)
Overall
precip.L 100.00
leaf.mild2 94.84
leaf.size.Q 79.86
int.discolor1 78.00
crop.hist1 77.92
leaf.mild1 77.90
int.discolor2 77.89
fruit.pods2 77.89
seed1 66.38
stem.cankers1 64.76
roots1 58.40
shriveling1 54.74
leaf.size.L 49.63
fruit.spots4 48.83
fruiting.bodies1 44.75
fruit.pods3 43.03
canker.lesion2 36.60
canker.lesion3 35.53
fruit.spots2 35.12
leaf.halo2 34.94
In [51]:
#test predicting power on unseen validation set
validation$prediction <- predict(xgb, newdata = validation, na.action = na.pass)
head(validation$prediction)
- diaporthe.stem.canker
- diaporthe.stem.canker
- charcoal.rot
- charcoal.rot
- rhizoctonia.root.rot
- phytophthora.rot
In [52]:
#Check the accuracy with a confusion matrix
confusionMatrix(validation$prediction, validation$Class)
Confusion Matrix and Statistics
Reference
Prediction alternarialeaf.spot anthracnose bacterial.blight
alternarialeaf.spot 15 0 0
anthracnose 0 8 0
bacterial.blight 0 0 4
bacterial.pustule 0 0 0
brown.spot 0 0 0
brown.stem.rot 0 0 0
charcoal.rot 0 0 0
cyst.nematode 0 0 0
diaporthe.pod...stem.blight 0 0 0
diaporthe.stem.canker 0 0 0
downy.mildew 0 0 0
frog.eye.leaf.spot 3 0 0
herbicide.injury 0 0 0
phyllosticta.leaf.spot 0 0 0
phytophthora.rot 0 0 0
powdery.mildew 0 0 0
purple.seed.stain 0 0 0
rhizoctonia.root.rot 0 0 0
X2.4.d.injury 0 0 0
Reference
Prediction bacterial.pustule brown.spot brown.stem.rot
alternarialeaf.spot 0 0 0
anthracnose 0 0 0
bacterial.blight 0 0 0
bacterial.pustule 4 0 0
brown.spot 0 17 0
brown.stem.rot 0 0 8
charcoal.rot 0 0 0
cyst.nematode 0 0 0
diaporthe.pod...stem.blight 0 0 0
diaporthe.stem.canker 0 0 0
downy.mildew 0 0 0
frog.eye.leaf.spot 0 1 0
herbicide.injury 0 0 0
phyllosticta.leaf.spot 0 0 0
phytophthora.rot 0 0 0
powdery.mildew 0 0 0
purple.seed.stain 0 0 0
rhizoctonia.root.rot 0 0 0
X2.4.d.injury 0 0 0
Reference
Prediction charcoal.rot cyst.nematode
alternarialeaf.spot 0 0
anthracnose 0 0
bacterial.blight 0 0
bacterial.pustule 0 0
brown.spot 0 0
brown.stem.rot 0 0
charcoal.rot 4 0
cyst.nematode 0 2
diaporthe.pod...stem.blight 0 0
diaporthe.stem.canker 0 0
downy.mildew 0 0
frog.eye.leaf.spot 0 0
herbicide.injury 0 0
phyllosticta.leaf.spot 0 0
phytophthora.rot 0 0
powdery.mildew 0 0
purple.seed.stain 0 0
rhizoctonia.root.rot 0 0
X2.4.d.injury 0 0
Reference
Prediction diaporthe.pod...stem.blight diaporthe.stem.canker
alternarialeaf.spot 0 0
anthracnose 0 0
bacterial.blight 0 0
bacterial.pustule 0 0
brown.spot 0 0
brown.stem.rot 0 0
charcoal.rot 0 0
cyst.nematode 0 0
diaporthe.pod...stem.blight 3 0
diaporthe.stem.canker 0 4
downy.mildew 0 0
frog.eye.leaf.spot 0 0
herbicide.injury 0 0
phyllosticta.leaf.spot 0 0
phytophthora.rot 0 0
powdery.mildew 0 0
purple.seed.stain 0 0
rhizoctonia.root.rot 0 0
X2.4.d.injury 0 0
Reference
Prediction downy.mildew frog.eye.leaf.spot herbicide.injury
alternarialeaf.spot 0 3 0
anthracnose 0 0 0
bacterial.blight 0 0 0
bacterial.pustule 0 0 0
brown.spot 0 0 0
brown.stem.rot 0 0 0
charcoal.rot 0 0 0
cyst.nematode 0 0 0
diaporthe.pod...stem.blight 0 0 0
diaporthe.stem.canker 0 0 0
downy.mildew 4 0 0
frog.eye.leaf.spot 0 15 0
herbicide.injury 0 0 1
phyllosticta.leaf.spot 0 0 0
phytophthora.rot 0 0 0
powdery.mildew 0 0 0
purple.seed.stain 0 0 0
rhizoctonia.root.rot 0 0 0
X2.4.d.injury 0 0 0
Reference
Prediction phyllosticta.leaf.spot phytophthora.rot
alternarialeaf.spot 0 0
anthracnose 0 0
bacterial.blight 0 0
bacterial.pustule 0 0
brown.spot 0 0
brown.stem.rot 0 0
charcoal.rot 0 0
cyst.nematode 0 0
diaporthe.pod...stem.blight 0 0
diaporthe.stem.canker 0 0
downy.mildew 0 0
frog.eye.leaf.spot 0 0
herbicide.injury 0 0
phyllosticta.leaf.spot 4 0
phytophthora.rot 0 17
powdery.mildew 0 0
purple.seed.stain 0 0
rhizoctonia.root.rot 0 0
X2.4.d.injury 0 0
Reference
Prediction powdery.mildew purple.seed.stain
alternarialeaf.spot 0 0
anthracnose 0 0
bacterial.blight 0 0
bacterial.pustule 0 0
brown.spot 0 0
brown.stem.rot 0 0
charcoal.rot 0 0
cyst.nematode 0 0
diaporthe.pod...stem.blight 0 0
diaporthe.stem.canker 0 0
downy.mildew 0 0
frog.eye.leaf.spot 0 0
herbicide.injury 0 0
phyllosticta.leaf.spot 0 0
phytophthora.rot 0 0
powdery.mildew 4 0
purple.seed.stain 0 4
rhizoctonia.root.rot 0 0
X2.4.d.injury 0 0
Reference
Prediction rhizoctonia.root.rot X2.4.d.injury
alternarialeaf.spot 0 0
anthracnose 0 0
bacterial.blight 0 0
bacterial.pustule 0 0
brown.spot 0 0
brown.stem.rot 0 0
charcoal.rot 0 0
cyst.nematode 0 0
diaporthe.pod...stem.blight 0 0
diaporthe.stem.canker 0 0
downy.mildew 0 0
frog.eye.leaf.spot 0 0
herbicide.injury 0 0
phyllosticta.leaf.spot 0 0
phytophthora.rot 0 0
powdery.mildew 0 0
purple.seed.stain 0 0
rhizoctonia.root.rot 4 0
X2.4.d.injury 0 3
Overall Statistics
Accuracy : 0.947
95% CI : (0.8938, 0.9784)
No Information Rate : 0.1364
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.9418
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: alternarialeaf.spot Class: anthracnose
Sensitivity 0.8333 1.00000
Specificity 0.9737 1.00000
Pos Pred Value 0.8333 1.00000
Neg Pred Value 0.9737 1.00000
Prevalence 0.1364 0.06061
Detection Rate 0.1136 0.06061
Detection Prevalence 0.1364 0.06061
Balanced Accuracy 0.9035 1.00000
Class: bacterial.blight Class: bacterial.pustule
Sensitivity 1.0000 1.0000
Specificity 1.0000 1.0000
Pos Pred Value 1.0000 1.0000
Neg Pred Value 1.0000 1.0000
Prevalence 0.0303 0.0303
Detection Rate 0.0303 0.0303
Detection Prevalence 0.0303 0.0303
Balanced Accuracy 1.0000 1.0000
Class: brown.spot Class: brown.stem.rot
Sensitivity 0.9444 1.00000
Specificity 1.0000 1.00000
Pos Pred Value 1.0000 1.00000
Neg Pred Value 0.9913 1.00000
Prevalence 0.1364 0.06061
Detection Rate 0.1288 0.06061
Detection Prevalence 0.1288 0.06061
Balanced Accuracy 0.9722 1.00000
Class: charcoal.rot Class: cyst.nematode
Sensitivity 1.0000 1.00000
Specificity 1.0000 1.00000
Pos Pred Value 1.0000 1.00000
Neg Pred Value 1.0000 1.00000
Prevalence 0.0303 0.01515
Detection Rate 0.0303 0.01515
Detection Prevalence 0.0303 0.01515
Balanced Accuracy 1.0000 1.00000
Class: diaporthe.pod...stem.blight
Sensitivity 1.00000
Specificity 1.00000
Pos Pred Value 1.00000
Neg Pred Value 1.00000
Prevalence 0.02273
Detection Rate 0.02273
Detection Prevalence 0.02273
Balanced Accuracy 1.00000
Class: diaporthe.stem.canker Class: downy.mildew
Sensitivity 1.0000 1.0000
Specificity 1.0000 1.0000
Pos Pred Value 1.0000 1.0000
Neg Pred Value 1.0000 1.0000
Prevalence 0.0303 0.0303
Detection Rate 0.0303 0.0303
Detection Prevalence 0.0303 0.0303
Balanced Accuracy 1.0000 1.0000
Class: frog.eye.leaf.spot Class: herbicide.injury
Sensitivity 0.8333 1.000000
Specificity 0.9649 1.000000
Pos Pred Value 0.7895 1.000000
Neg Pred Value 0.9735 1.000000
Prevalence 0.1364 0.007576
Detection Rate 0.1136 0.007576
Detection Prevalence 0.1439 0.007576
Balanced Accuracy 0.8991 1.000000
Class: phyllosticta.leaf.spot Class: phytophthora.rot
Sensitivity 1.0000 1.0000
Specificity 1.0000 1.0000
Pos Pred Value 1.0000 1.0000
Neg Pred Value 1.0000 1.0000
Prevalence 0.0303 0.1288
Detection Rate 0.0303 0.1288
Detection Prevalence 0.0303 0.1288
Balanced Accuracy 1.0000 1.0000
Class: powdery.mildew Class: purple.seed.stain
Sensitivity 1.0000 1.0000
Specificity 1.0000 1.0000
Pos Pred Value 1.0000 1.0000
Neg Pred Value 1.0000 1.0000
Prevalence 0.0303 0.0303
Detection Rate 0.0303 0.0303
Detection Prevalence 0.0303 0.0303
Balanced Accuracy 1.0000 1.0000
Class: rhizoctonia.root.rot Class: X2.4.d.injury
Sensitivity 1.0000 1.00000
Specificity 1.0000 1.00000
Pos Pred Value 1.0000 1.00000
Neg Pred Value 1.0000 1.00000
Prevalence 0.0303 0.02273
Detection Rate 0.0303 0.02273
Detection Prevalence 0.0303 0.02273
Balanced Accuracy 1.0000 1.00000
In [ ]:
Content source: jsphyg/Machine_Learning_Notebooks
Similar notebooks: