In [160]:
# first install packages devtools and pacman manually
#pull in source functions from github
devtools::source_url('https://raw.githubusercontent.com/jsphyg/Machine_Learning_Notebooks/master/myRfunctions.R')
#source("C:\\Work\\myRfunctions.R")
fnRunDate()
fnInstallPackages()
SHA-1 hash of file is 7a7a4f3a5ca0087d14658c26536447ad49ec74ab
'Project last run on Thu Sep 21 1:15:10 PM 2017'
'Package install completed'
In [161]:
# import data changing any blanks to NAs
dataset <- read_csv("C:\\Work\\kaggle_digitrecognizer\\train.csv", na = c(""))
test <- read_csv("C:\\Work\\kaggle_digitrecognizer\\test.csv", na = c(""))
head(dataset)
head(test)
Parsed with column specification:
cols(
.default = col_integer()
)
See spec(...) for full column specifications.
Parsed with column specification:
cols(
.default = col_integer()
)
See spec(...) for full column specifications.
label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 pixel780 pixel781 pixel782 pixel783
1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 pixel9 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 pixel780 pixel781 pixel782 pixel783
0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
In [162]:
sample <- read_csv("C:\\Work\\kaggle_digitrecognizer\\sample_submission.csv", na = c(""))
Parsed with column specification:
cols(
ImageId = col_integer(),
Label = col_integer()
)
In [163]:
#glimpse(dataset)
In [164]:
fnClassDistribution(Class = dataset$label)
freq percentage
0 4132 9.838095
1 4684 11.152381
2 4177 9.945238
3 4351 10.359524
4 4072 9.695238
5 3795 9.035714
6 4137 9.850000
7 4401 10.478571
8 4063 9.673810
9 4188 9.971429
In [165]:
psych::describe(dataset, check = T)
vars n mean sd min max range se
label 1 42000 4.4566428571 2.8877301 0 9 9 0.0140906803
pixel0 2 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel1 3 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel2 4 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel3 5 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel4 6 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel5 7 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel6 8 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel7 9 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel8 10 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel9 11 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel10 12 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel11 13 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel12 14 42000 0.0030000000 0.5681202 0 116 116 0.0027721428
pixel13 15 42000 0.0111904762 1.6269266 0 254 254 0.0079385890
pixel14 16 42000 0.0051428571 1.0539721 0 216 216 0.0051428571
pixel15 17 42000 0.0002142857 0.0439155 0 9 9 0.0002142857
pixel16 18 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel17 19 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel18 20 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel19 21 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel20 22 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel21 23 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel22 24 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel23 25 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel24 26 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel25 27 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel26 28 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel27 29 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
pixel28 30 42000 0.0000000000 0.0000000 0 0 0 0.0000000000
... ... ... ... ... ... ... ... ...
pixel754 756 42000 0.000000000 0.0000000 0 0 0 0.000000000
pixel755 757 42000 0.000000000 0.0000000 0 0 0 0.000000000
pixel756 758 42000 0.000000000 0.0000000 0 0 0 0.000000000
pixel757 759 42000 0.000000000 0.0000000 0 0 0 0.000000000
pixel758 760 42000 0.000000000 0.0000000 0 0 0 0.000000000
pixel759 761 42000 0.000000000 0.0000000 0 0 0 0.000000000
pixel760 762 42000 0.000000000 0.0000000 0 0 0 0.000000000
pixel761 763 42000 0.006142857 0.9498035 0 177 177 0.004634566
pixel762 764 42000 0.035833333 2.3508590 0 231 231 0.011471017
pixel763 765 42000 0.082357143 3.9342804 0 253 253 0.019197322
pixel764 766 42000 0.114904762 4.5435829 0 254 254 0.022170414
pixel765 767 42000 0.178714286 5.8567724 0 254 254 0.028578123
pixel766 768 42000 0.301452381 7.2197422 0 255 255 0.035228735
pixel767 769 42000 0.413642857 8.9282864 0 255 255 0.043565577
pixel768 770 42000 0.513666667 10.0040685 0 255 255 0.048814856
pixel769 771 42000 0.558833333 10.1295951 0 255 255 0.049427363
pixel770 772 42000 0.677857143 11.2549313 0 255 255 0.054918441
pixel771 773 42000 0.602809524 10.6960301 0 255 255 0.052191283
pixel772 774 42000 0.489238095 9.4800656 0 255 255 0.046257983
pixel773 775 42000 0.340214286 7.9502514 0 255 255 0.038793255
pixel774 776 42000 0.219285714 6.3128897 0 254 254 0.030803748
pixel775 777 42000 0.117095238 4.6338187 0 254 254 0.022610720
pixel776 778 42000 0.059023810 3.2744880 0 253 253 0.015977865
pixel777 779 42000 0.020190476 1.7598700 0 253 253 0.008587286
pixel778 780 42000 0.017238095 1.8944977 0 254 254 0.009244202
pixel779 781 42000 0.002857143 0.4142644 0 62 62 0.002021403
pixel780 782 42000 0.000000000 0.0000000 0 0 0 0.000000000
pixel781 783 42000 0.000000000 0.0000000 0 0 0 0.000000000
pixel782 784 42000 0.000000000 0.0000000 0 0 0 0.000000000
pixel783 785 42000 0.000000000 0.0000000 0 0 0 0.000000000
In [166]:
# Create a 28*28 matrix with pixel color values
m = matrix(unlist(dataset[4,-1]),nrow = 28,byrow = T)
# Plot that matrix
image(m,col=grey.colors(255))
# Create a 28*28 matrix with pixel color values
m = matrix(unlist(dataset[101,-1]),nrow = 28,byrow = T)
# Plot that matrix
image(m,col=grey.colors(255))
In [167]:
# separate multi-level, categorical response variable ("label") from the remaining predictor variables in the training dataset ("TRAIN")
LABEL <- dataset$label
dataset$label <- NULL
# try making all binary
#dataset[dataset > 0] <- 1
#test[test > 0] <- 1
# find and remove vectors with near-zero variance
nzv <- nearZeroVar(dataset)
str(nzv)
dataset <- dataset[, -nzv]
test <- test[, -nzv]
# re-attach response variable ("LABEL") to training dataset ("TRAIN")
dataset$label <- LABEL
dim(dataset)
dim(test)
int [1:532] 1 2 3 4 5 6 7 8 9 10 ...
- 42000
- 253
- 28000
- 252
In [168]:
dataset$label <- as.factor(dataset$label)
In [169]:
glimpse(dataset)
Observations: 42,000
Variables: 253
$ pixel151 <int> 0, 254, 0, 0, 253, 5, 0, 0, 0, 253, 0, 0, 0, 0, 252, 0, 91...
$ pixel152 <int> 0, 254, 9, 0, 253, 165, 0, 0, 0, 253, 0, 0, 0, 0, 252, 0, ...
$ pixel153 <int> 0, 254, 254, 0, 253, 254, 0, 0, 41, 253, 0, 0, 0, 0, 252, ...
$ pixel154 <int> 0, 254, 254, 0, 253, 179, 0, 0, 149, 253, 0, 0, 169, 0, 25...
$ pixel155 <int> 0, 217, 8, 0, 253, 163, 0, 0, 156, 253, 0, 0, 207, 0, 252,...
$ pixel156 <int> 0, 246, 0, 0, 253, 249, 0, 0, 179, 253, 0, 0, 33, 0, 246, ...
$ pixel157 <int> 0, 151, 0, 9, 253, 244, 0, 0, 254, 124, 0, 0, 0, 0, 209, 2...
$ pixel158 <int> 0, 32, 0, 77, 114, 72, 0, 0, 254, 0, 0, 0, 0, 0, 38, 73, 2...
$ pixel159 <int> 191, 0, 0, 0, 2, 0, 0, 0, 201, 0, 0, 0, 0, 0, 0, 0, 0, 222...
$ pixel178 <int> 0, 254, 0, 0, 253, 0, 0, 130, 0, 202, 0, 0, 0, 0, 233, 0, ...
$ pixel179 <int> 0, 254, 0, 0, 253, 135, 0, 190, 13, 200, 0, 0, 0, 0, 195, ...
$ pixel180 <int> 0, 254, 9, 0, 253, 254, 0, 254, 147, 181, 7, 0, 0, 0, 195,...
$ pixel181 <int> 0, 254, 254, 0, 253, 150, 0, 254, 241, 164, 24, 0, 0, 0, 1...
$ pixel182 <int> 0, 254, 254, 0, 253, 0, 0, 250, 253, 216, 24, 15, 168, 0, ...
$ pixel183 <int> 0, 254, 8, 0, 253, 0, 0, 175, 253, 253, 97, 48, 254, 0, 21...
$ pixel184 <int> 0, 254, 0, 0, 253, 189, 0, 135, 254, 253, 253, 143, 105, 0...
$ pixel185 <int> 0, 254, 0, 27, 253, 254, 0, 96, 253, 211, 253, 186, 0, 0, ...
$ pixel186 <int> 123, 231, 0, 202, 253, 243, 0, 96, 253, 151, 253, 244, 0, ...
$ pixel187 <int> 248, 54, 0, 0, 107, 31, 0, 16, 253, 0, 253, 143, 0, 0, 32,...
$ pixel188 <int> 253, 15, 0, 0, 0, 0, 0, 4, 253, 0, 255, 31, 0, 0, 0, 0, 0,...
$ pixel205 <int> 0, 254, 0, 0, 253, 0, 51, 254, 0, 78, 0, 0, 0, 255, 162, 0...
$ pixel206 <int> 0, 254, 0, 0, 253, 82, 51, 254, 31, 3, 0, 0, 0, 255, 56, 0...
$ pixel207 <int> 0, 254, 0, 0, 253, 248, 21, 248, 224, 0, 30, 0, 0, 191, 0,...
$ pixel208 <int> 0, 254, 9, 0, 253, 209, 41, 222, 253, 0, 186, 0, 0, 191, 0...
$ pixel209 <int> 0, 254, 254, 0, 253, 5, 51, 222, 253, 0, 252, 83, 84, 191,...
$ pixel210 <int> 0, 254, 254, 0, 253, 0, 51, 225, 180, 20, 252, 209, 249, 1...
$ pixel211 <int> 0, 254, 106, 0, 253, 0, 51, 254, 174, 134, 253, 253, 254, ...
$ pixel212 <int> 0, 254, 0, 0, 253, 164, 51, 254, 175, 253, 252, 252, 105, ...
$ pixel213 <int> 80, 254, 0, 27, 253, 236, 113, 254, 174, 253, 252, 252, 0,...
$ pixel214 <int> 247, 254, 0, 254, 253, 254, 193, 254, 174, 224, 252, 252, ...
$ pixel215 <int> 253, 254, 0, 63, 215, 115, 152, 254, 174, 0, 252, 252, 0, ...
$ pixel216 <int> 208, 104, 0, 0, 101, 0, 30, 206, 174, 0, 253, 192, 0, 0, 1...
$ pixel217 <int> 13, 0, 0, 0, 3, 0, 0, 112, 223, 0, 252, 15, 0, 0, 0, 0, 0,...
$ pixel232 <int> 0, 254, 0, 207, 253, 0, 252, 254, 0, 0, 0, 0, 0, 255, 173,...
$ pixel233 <int> 0, 254, 0, 6, 253, 8, 253, 177, 7, 0, 0, 0, 0, 255, 0, 0, ...
$ pixel234 <int> 0, 254, 0, 0, 253, 211, 252, 117, 197, 0, 38, 0, 0, 255, 0...
$ pixel235 <int> 0, 254, 0, 0, 248, 254, 223, 39, 254, 0, 155, 5, 0, 255, 0...
$ pixel236 <int> 0, 254, 9, 0, 161, 58, 243, 0, 253, 0, 252, 166, 0, 255, 0...
$ pixel237 <int> 0, 109, 254, 0, 222, 0, 253, 0, 165, 0, 252, 241, 89, 255,...
$ pixel238 <int> 0, 83, 254, 0, 222, 0, 252, 56, 2, 28, 252, 252, 254, 255,...
$ pixel239 <int> 0, 199, 184, 0, 246, 0, 253, 248, 0, 206, 253, 253, 254, 2...
$ pixel240 <int> 29, 254, 0, 0, 253, 0, 252, 102, 0, 253, 252, 252, 105, 25...
$ pixel241 <int> 207, 254, 0, 27, 253, 33, 253, 48, 0, 253, 252, 170, 0, 25...
$ pixel242 <int> 253, 254, 0, 254, 253, 230, 252, 48, 0, 224, 227, 162, 0, ...
$ pixel243 <int> 235, 254, 0, 65, 253, 212, 233, 103, 0, 0, 79, 252, 0, 128...
$ pixel244 <int> 77, 243, 0, 0, 253, 6, 30, 192, 0, 0, 222, 252, 0, 0, 27, ...
$ pixel245 <int> 0, 85, 0, 0, 39, 0, 0, 254, 12, 0, 252, 113, 0, 0, 0, 0, 0...
$ pixel260 <int> 0, 254, 0, 254, 253, 0, 41, 36, 0, 0, 0, 0, 0, 128, 253, 0...
$ pixel261 <int> 0, 254, 0, 21, 253, 119, 102, 0, 152, 0, 85, 5, 0, 128, 0,...
$ pixel262 <int> 0, 202, 0, 0, 229, 254, 102, 0, 253, 0, 233, 61, 0, 191, 0...
$ pixel263 <int> 0, 147, 0, 0, 77, 156, 102, 0, 254, 0, 252, 234, 0, 255, 0...
$ pixel264 <int> 0, 147, 9, 0, 0, 3, 102, 0, 162, 0, 252, 252, 0, 255, 0, 0...
$ pixel265 <int> 0, 45, 254, 0, 0, 0, 102, 0, 18, 0, 252, 252, 89, 255, 0, ...
$ pixel266 <int> 0, 0, 254, 0, 0, 0, 102, 72, 0, 78, 252, 243, 254, 255, 0,...
$ pixel267 <int> 54, 11, 184, 0, 70, 0, 102, 92, 0, 253, 253, 121, 193, 255...
$ pixel268 <int> 209, 29, 0, 0, 218, 0, 162, 0, 0, 253, 252, 44, 14, 255, 0...
$ pixel269 <int> 253, 200, 0, 20, 253, 18, 254, 0, 0, 253, 252, 2, 0, 255, ...
$ pixel270 <int> 253, 254, 0, 239, 253, 230, 253, 0, 0, 224, 202, 21, 0, 25...
$ pixel271 <int> 88, 254, 0, 65, 253, 254, 142, 0, 0, 0, 11, 245, 0, 191, 2...
$ pixel272 <int> 0, 254, 0, 0, 253, 33, 0, 12, 0, 0, 180, 252, 0, 0, 27, 0,...
$ pixel273 <int> 0, 171, 0, 0, 215, 0, 0, 224, 0, 0, 252, 122, 0, 0, 0, 0, ...
$ pixel288 <int> 0, 254, 0, 254, 253, 10, 0, 0, 0, 0, 43, 0, 0, 0, 252, 0, ...
$ pixel289 <int> 0, 89, 0, 21, 253, 212, 0, 0, 235, 0, 240, 80, 0, 0, 126, ...
$ pixel290 <int> 0, 67, 0, 0, 195, 254, 0, 0, 254, 0, 253, 252, 0, 0, 0, 0,...
$ pixel291 <int> 0, 0, 0, 0, 0, 35, 0, 0, 158, 0, 252, 252, 0, 0, 0, 0, 211...
$ pixel292 <int> 0, 0, 9, 0, 0, 0, 0, 0, 15, 0, 252, 243, 0, 0, 0, 0, 114, ...
$ pixel293 <int> 0, 0, 254, 0, 0, 0, 0, 0, 0, 5, 252, 163, 89, 0, 0, 0, 96,...
$ pixel294 <int> 93, 0, 254, 0, 0, 0, 0, 0, 0, 99, 252, 50, 254, 191, 0, 0,...
$ pixel295 <int> 254, 0, 184, 0, 0, 0, 0, 0, 0, 234, 253, 0, 184, 255, 0, 2...
$ pixel296 <int> 253, 0, 0, 0, 104, 0, 183, 0, 0, 253, 252, 0, 0, 255, 51, ...
$ pixel297 <int> 238, 128, 0, 0, 224, 33, 253, 0, 0, 253, 252, 0, 0, 255, 2...
$ pixel298 <int> 170, 252, 0, 195, 253, 254, 212, 50, 0, 224, 244, 5, 0, 25...
$ pixel299 <int> 17, 254, 0, 65, 253, 254, 20, 139, 0, 0, 126, 101, 0, 128,...
$ pixel300 <int> 0, 254, 0, 0, 253, 33, 0, 240, 0, 0, 201, 88, 0, 0, 27, 0,...
$ pixel301 <int> 0, 212, 0, 0, 253, 0, 0, 254, 0, 0, 252, 8, 0, 0, 0, 0, 0,...
$ pixel316 <int> 0, 254, 0, 254, 253, 116, 0, 0, 74, 0, 212, 105, 0, 0, 195...
$ pixel317 <int> 0, 29, 0, 21, 247, 254, 0, 0, 250, 0, 253, 234, 0, 0, 110,...
$ pixel318 <int> 0, 0, 0, 0, 75, 154, 0, 0, 253, 0, 255, 252, 0, 0, 0, 0, 1...
$ pixel319 <int> 0, 0, 0, 0, 0, 3, 0, 0, 15, 14, 253, 210, 0, 0, 0, 0, 18, ...
$ pixel320 <int> 0, 0, 6, 0, 0, 0, 0, 0, 0, 142, 253, 88, 7, 128, 0, 0, 0, ...
$ pixel321 <int> 23, 0, 185, 0, 0, 0, 0, 0, 0, 220, 253, 0, 204, 255, 0, 0,...
$ pixel322 <int> 210, 0, 254, 0, 0, 0, 0, 0, 0, 219, 232, 0, 254, 255, 0, 5...
$ pixel323 <int> 254, 0, 184, 0, 0, 0, 11, 7, 16, 236, 221, 0, 184, 255, 51...
$ pixel324 <int> 253, 0, 0, 0, 0, 0, 213, 121, 20, 253, 42, 0, 0, 255, 101,...
$ pixel325 <int> 159, 0, 0, 0, 26, 33, 254, 220, 19, 253, 0, 74, 0, 255, 25...
$ pixel326 <int> 0, 83, 0, 195, 200, 254, 91, 254, 0, 240, 104, 199, 0, 191...
$ pixel327 <int> 0, 254, 0, 142, 253, 254, 0, 244, 0, 121, 253, 240, 0, 0, ...
$ pixel328 <int> 0, 254, 0, 0, 253, 33, 0, 194, 0, 7, 255, 43, 0, 0, 12, 0,...
$ pixel329 <int> 0, 254, 0, 0, 253, 0, 0, 15, 0, 0, 253, 0, 0, 0, 0, 0, 0, ...
$ pixel343 <int> 0, 254, 0, 56, 253, 0, 102, 8, 7, 0, 25, 0, 0, 0, 0, 0, 0,...
$ pixel344 <int> 0, 240, 0, 251, 253, 124, 0, 107, 199, 0, 223, 185, 0, 0, ...
$ pixel345 <int> 0, 24, 0, 21, 195, 254, 0, 112, 253, 0, 252, 252, 0, 0, 0,...
$ pixel346 <int> 0, 0, 0, 0, 0, 115, 0, 112, 253, 0, 253, 210, 0, 0, 0, 0, ...
$ pixel347 <int> 0, 0, 0, 0, 0, 0, 0, 112, 0, 24, 252, 21, 0, 128, 7, 0, 0,...
$ pixel348 <int> 16, 0, 0, 0, 0, 0, 0, 87, 0, 253, 252, 0, 0, 255, 29, 0, 0...
$ pixel349 <int> 209, 0, 89, 0, 0, 0, 0, 112, 25, 253, 214, 4, 89, 255, 29,...
$ pixel350 <int> 253, 0, 254, 0, 0, 0, 0, 141, 130, 253, 18, 12, 254, 255, ...
$ pixel351 <int> 254, 0, 184, 0, 0, 0, 51, 218, 235, 253, 0, 41, 184, 255, ...
$ pixel352 <int> 240, 0, 0, 0, 0, 0, 252, 248, 254, 235, 0, 231, 0, 255, 25...
$ pixel353 <int> 81, 0, 0, 0, 0, 160, 172, 177, 247, 233, 34, 249, 0, 128, ...
$ pixel354 <int> 0, 25, 0, 195, 26, 254, 10, 68, 145, 253, 215, 252, 0, 0, ...
$ pixel355 <int> 0, 240, 0, 227, 200, 239, 0, 20, 6, 253, 252, 252, 0, 0, 2...
$ pixel356 <int> 0, 254, 0, 0, 253, 23, 0, 0, 0, 185, 253, 55, 0, 0, 177, 0...
$ pixel371 <int> 0, 254, 0, 0, 253, 0, 102, 77, 20, 0, 0, 0, 0, 0, 0, 0, 0,...
$ pixel372 <int> 0, 186, 0, 222, 253, 203, 0, 221, 253, 0, 99, 242, 0, 0, 0...
$ pixel373 <int> 0, 7, 0, 153, 99, 254, 0, 254, 253, 0, 246, 252, 0, 0, 0, ...
$ pixel374 <int> 0, 0, 0, 5, 0, 35, 0, 254, 177, 0, 253, 218, 0, 0, 126, 0,...
$ pixel375 <int> 0, 0, 0, 0, 0, 0, 0, 254, 100, 8, 252, 154, 0, 64, 165, 0,...
$ pixel376 <int> 27, 0, 4, 0, 0, 0, 0, 254, 219, 150, 252, 154, 13, 255, 25...
$ pixel377 <int> 253, 0, 146, 0, 0, 0, 0, 254, 240, 194, 77, 184, 209, 255,...
$ pixel378 <int> 253, 0, 254, 0, 0, 0, 0, 225, 253, 194, 0, 252, 254, 255, ...
$ pixel379 <int> 254, 0, 184, 0, 0, 0, 214, 104, 253, 194, 7, 253, 178, 255...
$ pixel380 <int> 13, 0, 0, 0, 0, 0, 253, 39, 254, 53, 70, 252, 0, 255, 252,...
$ pixel381 <int> 0, 0, 0, 0, 0, 197, 102, 0, 253, 40, 203, 252, 0, 0, 252, ...
$ pixel382 <int> 0, 0, 0, 120, 0, 254, 0, 0, 253, 97, 252, 248, 0, 0, 252, ...
$ pixel383 <int> 0, 166, 0, 240, 25, 178, 0, 0, 125, 253, 252, 184, 0, 0, 2...
$ pixel384 <int> 0, 254, 0, 13, 231, 0, 0, 0, 0, 253, 173, 22, 0, 0, 252, 0...
$ pixel399 <int> 0, 254, 0, 0, 253, 23, 20, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ pixel400 <int> 0, 254, 0, 67, 253, 239, 0, 10, 193, 0, 0, 209, 0, 0, 0, 0...
$ pixel401 <int> 0, 29, 0, 251, 99, 221, 0, 32, 253, 0, 42, 252, 0, 0, 0, 0...
$ pixel402 <int> 0, 0, 0, 40, 0, 11, 0, 32, 253, 0, 253, 252, 0, 0, 226, 0,...
$ pixel403 <int> 20, 0, 0, 0, 0, 0, 0, 32, 254, 0, 252, 252, 37, 0, 253, 0,...
$ pixel404 <int> 206, 0, 9, 0, 0, 0, 0, 32, 253, 0, 252, 252, 209, 128, 253...
$ pixel405 <int> 254, 0, 254, 0, 0, 0, 0, 130, 253, 0, 236, 252, 254, 255, ...
$ pixel406 <int> 254, 0, 254, 0, 0, 0, 0, 215, 200, 0, 103, 252, 254, 255, ...
$ pixel407 <int> 198, 0, 184, 0, 0, 0, 253, 195, 155, 0, 160, 253, 69, 255,...
$ pixel408 <int> 7, 0, 0, 0, 0, 0, 252, 47, 155, 0, 252, 252, 0, 255, 140, ...
$ pixel409 <int> 0, 0, 0, 0, 0, 198, 102, 0, 238, 0, 252, 252, 0, 255, 140,...
$ pixel410 <int> 0, 0, 0, 94, 0, 255, 0, 0, 253, 122, 218, 196, 0, 191, 140...
$ pixel411 <int> 0, 75, 0, 255, 0, 123, 0, 0, 229, 253, 108, 0, 0, 64, 192,...
$ pixel412 <int> 0, 254, 0, 69, 223, 0, 0, 0, 23, 253, 0, 0, 0, 0, 253, 0, ...
$ pixel427 <int> 0, 254, 0, 0, 253, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20...
$ pixel428 <int> 0, 254, 0, 0, 253, 238, 0, 0, 61, 0, 0, 17, 0, 0, 0, 0, 0,...
$ pixel429 <int> 0, 29, 0, 234, 99, 178, 0, 0, 249, 0, 0, 57, 0, 0, 0, 0, 0...
$ pixel430 <int> 0, 0, 0, 184, 0, 0, 0, 0, 254, 0, 148, 142, 0, 0, 178, 0, ...
$ pixel431 <int> 168, 0, 0, 0, 0, 0, 0, 0, 241, 0, 252, 95, 107, 0, 252, 0,...
$ pixel432 <int> 253, 0, 9, 0, 0, 0, 0, 0, 150, 0, 252, 142, 254, 0, 242, 0...
$ pixel433 <int> 253, 0, 254, 0, 0, 0, 0, 0, 30, 0, 252, 61, 254, 0, 167, 7...
$ pixel434 <int> 196, 0, 254, 0, 0, 0, 62, 6, 0, 0, 252, 81, 254, 255, 106,...
$ pixel435 <int> 7, 0, 184, 0, 0, 0, 254, 111, 0, 0, 253, 253, 184, 255, 18...
$ pixel436 <int> 0, 0, 0, 0, 0, 10, 253, 231, 0, 0, 231, 252, 0, 255, 0, 0,...
$ pixel437 <int> 0, 0, 0, 0, 0, 219, 41, 174, 215, 55, 106, 209, 0, 255, 0,...
$ pixel438 <int> 0, 0, 0, 19, 0, 254, 0, 5, 254, 237, 14, 20, 0, 255, 0, 0,...
$ pixel439 <int> 0, 48, 0, 245, 0, 96, 0, 0, 254, 253, 0, 0, 0, 255, 12, 0,...
$ pixel440 <int> 0, 254, 0, 69, 127, 0, 0, 0, 58, 253, 0, 0, 0, 0, 228, 0, ...
$ pixel441 <int> 0, 254, 0, 0, 253, 0, 0, 0, 0, 170, 0, 0, 0, 0, 252, 0, 0,...
$ pixel455 <int> 0, 254, 0, 0, 253, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24...
$ pixel456 <int> 0, 254, 0, 0, 253, 249, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2...
$ pixel457 <int> 0, 29, 0, 234, 99, 204, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ pixel458 <int> 20, 0, 0, 169, 0, 0, 0, 0, 39, 0, 24, 0, 0, 0, 19, 0, 124,...
$ pixel459 <int> 203, 0, 0, 0, 0, 0, 0, 0, 30, 0, 253, 0, 3, 0, 55, 0, 253,...
$ pixel460 <int> 253, 0, 9, 0, 0, 0, 0, 0, 0, 0, 253, 0, 187, 0, 49, 0, 253...
$ pixel461 <int> 248, 0, 254, 0, 0, 0, 0, 0, 0, 0, 253, 11, 254, 0, 0, 107,...
$ pixel462 <int> 76, 0, 254, 0, 0, 0, 102, 0, 0, 0, 253, 177, 254, 0, 0, 25...
$ pixel463 <int> 0, 0, 184, 0, 0, 0, 253, 0, 0, 0, 255, 255, 134, 128, 0, 1...
$ pixel464 <int> 0, 0, 0, 0, 0, 25, 171, 40, 0, 0, 159, 230, 0, 191, 0, 0, ...
$ pixel465 <int> 0, 0, 0, 0, 0, 235, 0, 228, 214, 130, 7, 86, 0, 255, 0, 0,...
$ pixel466 <int> 0, 0, 0, 3, 0, 254, 0, 205, 253, 253, 0, 0, 0, 255, 0, 0, ...
$ pixel467 <int> 0, 48, 0, 199, 0, 62, 0, 35, 234, 253, 0, 0, 0, 255, 0, 0,...
$ pixel468 <int> 0, 254, 0, 182, 139, 0, 0, 0, 31, 253, 0, 0, 0, 255, 225, ...
$ pixel469 <int> 0, 254, 0, 10, 253, 0, 0, 0, 0, 170, 0, 0, 0, 0, 252, 0, 0...
$ pixel483 <int> 0, 254, 0, 0, 253, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25...
$ pixel484 <int> 0, 254, 0, 0, 253, 243, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1...
$ pixel485 <int> 22, 200, 0, 154, 99, 204, 0, 0, 0, 0, 43, 0, 0, 0, 0, 0, 4...
$ pixel486 <int> 188, 12, 0, 205, 0, 0, 0, 0, 0, 0, 118, 0, 0, 0, 0, 0, 220...
$ pixel487 <int> 253, 0, 0, 4, 0, 0, 0, 0, 0, 0, 252, 0, 0, 0, 0, 0, 253, 0...
$ pixel488 <int> 245, 0, 9, 0, 0, 0, 0, 0, 0, 0, 240, 12, 185, 0, 0, 0, 253...
$ pixel489 <int> 93, 0, 254, 0, 0, 0, 0, 0, 0, 0, 244, 124, 254, 0, 0, 188,...
$ pixel490 <int> 0, 0, 254, 26, 0, 0, 163, 0, 0, 4, 252, 252, 155, 0, 0, 25...
$ pixel491 <int> 0, 0, 184, 72, 0, 0, 254, 0, 0, 12, 253, 245, 3, 0, 0, 68,...
$ pixel492 <int> 0, 0, 0, 128, 0, 91, 91, 0, 41, 120, 231, 57, 0, 0, 0, 0, ...
$ pixel493 <int> 0, 0, 0, 203, 0, 254, 0, 56, 241, 193, 37, 0, 0, 64, 0, 0,...
$ pixel494 <int> 0, 16, 0, 208, 0, 248, 0, 212, 253, 253, 0, 0, 0, 191, 16,...
$ pixel495 <int> 0, 209, 0, 254, 78, 36, 0, 226, 183, 253, 0, 0, 0, 255, 92...
$ pixel496 <int> 0, 254, 0, 254, 248, 0, 0, 38, 0, 214, 0, 0, 0, 255, 243, ...
$ pixel511 <int> 0, 254, 0, 0, 253, 33, 0, 0, 0, 0, 19, 0, 0, 0, 0, 0, 0, 2...
$ pixel512 <int> 0, 254, 0, 0, 253, 254, 0, 0, 0, 0, 164, 0, 0, 0, 0, 0, 49...
$ pixel513 <int> 103, 254, 0, 61, 216, 204, 0, 0, 0, 0, 246, 0, 0, 64, 0, 0...
$ pixel514 <int> 253, 202, 0, 254, 34, 0, 0, 0, 0, 0, 253, 0, 0, 128, 0, 0,...
$ pixel515 <int> 253, 66, 0, 129, 0, 0, 0, 0, 0, 0, 187, 0, 0, 128, 0, 0, 2...
$ pixel516 <int> 191, 0, 9, 113, 0, 0, 0, 0, 0, 0, 50, 135, 185, 0, 0, 0, 2...
$ pixel517 <int> 0, 0, 254, 186, 0, 0, 0, 0, 0, 7, 99, 252, 254, 0, 0, 188,...
$ pixel518 <int> 0, 0, 254, 245, 0, 0, 203, 0, 0, 153, 246, 252, 238, 0, 0,...
$ pixel519 <int> 0, 0, 184, 251, 0, 67, 253, 0, 0, 253, 253, 86, 7, 0, 0, 0...
$ pixel520 <int> 0, 0, 0, 189, 0, 241, 50, 0, 201, 253, 252, 0, 0, 0, 0, 0,...
$ pixel521 <int> 0, 21, 0, 75, 0, 254, 0, 0, 253, 253, 69, 0, 0, 0, 16, 0, ...
$ pixel522 <int> 0, 161, 0, 56, 33, 133, 0, 30, 253, 253, 0, 0, 0, 64, 203,...
$ pixel523 <int> 0, 254, 0, 136, 152, 0, 0, 215, 102, 212, 0, 0, 0, 255, 25...
$ pixel524 <int> 0, 254, 0, 254, 253, 0, 0, 188, 0, 30, 0, 0, 0, 255, 252, ...
$ pixel538 <int> 0, 60, 0, 0, 253, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0, 253...
$ pixel539 <int> 0, 212, 0, 0, 253, 33, 0, 0, 0, 136, 80, 0, 0, 0, 0, 0, 0,...
$ pixel540 <int> 89, 254, 0, 0, 253, 254, 0, 0, 0, 70, 232, 0, 0, 0, 0, 0, ...
$ pixel541 <int> 240, 254, 0, 15, 253, 214, 0, 0, 0, 6, 252, 0, 0, 191, 0, ...
$ pixel542 <int> 253, 254, 0, 216, 140, 7, 0, 0, 0, 0, 203, 0, 0, 255, 0, 0...
$ pixel543 <int> 195, 194, 0, 233, 0, 0, 0, 0, 0, 27, 58, 79, 0, 128, 0, 0,...
$ pixel544 <int> 25, 48, 156, 233, 0, 0, 0, 0, 0, 67, 0, 248, 185, 0, 0, 10...
$ pixel545 <int> 0, 48, 254, 159, 0, 0, 51, 0, 0, 186, 0, 252, 254, 0, 0, 2...
$ pixel546 <int> 0, 34, 254, 104, 0, 50, 253, 0, 0, 253, 135, 233, 254, 0, ...
$ pixel547 <int> 0, 41, 184, 52, 0, 242, 254, 0, 114, 253, 253, 0, 8, 0, 0,...
$ pixel548 <int> 0, 48, 0, 0, 30, 254, 50, 0, 254, 253, 252, 0, 0, 0, 26, 0...
$ pixel549 <int> 0, 209, 0, 0, 139, 194, 0, 0, 253, 253, 121, 0, 0, 0, 207,...
$ pixel550 <int> 0, 254, 0, 0, 234, 24, 0, 0, 154, 234, 0, 0, 0, 64, 253, 0...
$ pixel551 <int> 0, 254, 0, 38, 253, 0, 0, 86, 5, 31, 0, 0, 0, 255, 255, 0,...
$ pixel552 <int> 0, 254, 0, 254, 253, 0, 0, 254, 0, 0, 0, 0, 0, 255, 215, 0...
$ pixel567 <int> 15, 86, 0, 0, 253, 5, 0, 0, 0, 253, 246, 0, 0, 0, 0, 0, 0,...
$ pixel568 <int> 220, 243, 0, 0, 253, 193, 0, 0, 0, 253, 252, 0, 0, 64, 0, ...
$ pixel569 <int> 253, 254, 0, 0, 253, 254, 0, 0, 0, 191, 200, 0, 0, 255, 0,...
$ pixel570 <int> 253, 254, 0, 0, 250, 78, 0, 0, 0, 183, 11, 0, 0, 255, 0, 0...
$ pixel571 <int> 80, 254, 0, 0, 208, 0, 0, 0, 0, 223, 0, 231, 0, 64, 0, 0, ...
$ pixel572 <int> 0, 254, 185, 0, 106, 0, 0, 0, 0, 253, 0, 252, 185, 0, 10, ...
$ pixel573 <int> 0, 254, 255, 0, 106, 19, 51, 0, 0, 253, 0, 202, 254, 0, 85...
$ pixel574 <int> 0, 233, 255, 0, 106, 128, 252, 0, 62, 253, 116, 12, 231, 0...
$ pixel575 <int> 0, 243, 184, 0, 200, 254, 253, 0, 254, 253, 253, 0, 7, 0, ...
$ pixel576 <int> 0, 254, 0, 0, 237, 195, 50, 0, 255, 172, 252, 0, 0, 0, 231...
$ pixel577 <int> 0, 254, 0, 0, 253, 36, 0, 0, 241, 216, 69, 0, 0, 0, 252, 0...
$ pixel578 <int> 0, 254, 0, 0, 253, 0, 0, 0, 30, 112, 0, 0, 0, 191, 252, 0,...
$ pixel579 <int> 0, 254, 0, 18, 253, 0, 0, 6, 0, 0, 0, 0, 0, 255, 168, 0, 2...
$ pixel580 <int> 0, 254, 0, 254, 253, 0, 0, 214, 0, 0, 0, 0, 0, 255, 33, 0,...
$ pixel595 <int> 94, 0, 0, 0, 253, 0, 0, 2, 0, 253, 253, 0, 0, 0, 57, 0, 0,...
$ pixel596 <int> 253, 114, 0, 0, 253, 103, 0, 0, 0, 253, 192, 0, 0, 0, 57, ...
$ pixel597 <int> 253, 254, 0, 0, 253, 254, 0, 0, 0, 253, 11, 20, 0, 255, 57...
$ pixel598 <int> 253, 254, 0, 0, 253, 222, 0, 0, 0, 253, 0, 175, 0, 255, 57...
$ pixel599 <int> 94, 254, 0, 0, 253, 74, 0, 0, 0, 253, 0, 248, 0, 191, 166,...
$ pixel600 <int> 0, 254, 185, 0, 253, 143, 0, 0, 10, 253, 0, 252, 185, 64, ...
$ pixel601 <int> 0, 254, 254, 0, 253, 235, 51, 0, 118, 253, 0, 136, 255, 0,...
$ pixel602 <int> 0, 254, 254, 0, 253, 254, 253, 0, 235, 253, 179, 0, 87, 0,...
$ pixel603 <int> 0, 254, 184, 0, 253, 228, 254, 0, 253, 253, 255, 0, 0, 0, ...
$ pixel604 <int> 0, 254, 0, 0, 253, 83, 50, 0, 249, 47, 253, 0, 0, 64, 239,...
$ pixel605 <int> 0, 254, 0, 0, 253, 0, 0, 0, 103, 25, 69, 0, 0, 191, 195, 0...
$ pixel606 <int> 0, 254, 0, 0, 253, 0, 0, 0, 0, 0, 0, 0, 0, 255, 118, 0, 25...
$ pixel607 <int> 0, 239, 0, 18, 253, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 253...
$ pixel624 <int> 251, 13, 0, 0, 253, 30, 0, 7, 55, 253, 119, 0, 0, 0, 252, ...
$ pixel625 <int> 253, 182, 0, 0, 253, 242, 0, 0, 81, 253, 0, 109, 0, 64, 25...
$ pixel626 <int> 250, 254, 0, 0, 253, 254, 0, 0, 0, 253, 5, 252, 0, 255, 25...
$ pixel627 <int> 131, 254, 0, 0, 253, 254, 0, 0, 102, 244, 47, 252, 0, 255,...
$ pixel628 <int> 0, 254, 185, 0, 253, 254, 0, 0, 211, 152, 47, 159, 173, 25...
$ pixel629 <int> 0, 254, 254, 0, 253, 254, 51, 0, 253, 223, 140, 6, 254, 25...
$ pixel630 <int> 0, 254, 254, 0, 253, 252, 252, 0, 253, 223, 244, 0, 87, 25...
$ pixel631 <int> 0, 254, 184, 0, 253, 84, 213, 0, 253, 109, 253, 0, 0, 255,...
$ pixel632 <int> 0, 254, 0, 0, 253, 0, 10, 0, 135, 4, 252, 0, 0, 255, 65, 0...
$ pixel633 <int> 0, 254, 0, 0, 253, 0, 0, 0, 0, 0, 69, 0, 0, 255, 0, 0, 253...
$ pixel653 <int> 218, 8, 0, 0, 129, 23, 0, 73, 243, 148, 184, 218, 0, 0, 25...
$ pixel654 <int> 95, 76, 0, 0, 208, 64, 0, 12, 234, 78, 191, 252, 0, 0, 127...
$ pixel655 <int> 0, 146, 0, 0, 253, 158, 0, 0, 254, 16, 252, 252, 0, 255, 1...
$ pixel656 <int> 0, 254, 63, 0, 253, 200, 0, 0, 253, 0, 252, 192, 9, 255, 1...
$ pixel657 <int> 0, 255, 254, 0, 253, 174, 152, 0, 253, 12, 252, 141, 254, ...
$ pixel658 <int> 0, 254, 254, 0, 253, 61, 253, 0, 216, 12, 252, 14, 87, 255...
$ pixel659 <int> 0, 255, 62, 0, 159, 0, 82, 0, 117, 0, 253, 0, 0, 255, 0, 0...
$ pixel660 <int> 0, 146, 0, 0, 129, 0, 0, 0, 0, 0, 240, 0, 0, 255, 0, 0, 25...
$ pixel683 <int> 0, 0, 0, 0, 0, 0, 0, 158, 254, 0, 252, 252, 0, 0, 0, 165, ...
$ pixel684 <int> 0, 0, 0, 0, 0, 0, 0, 74, 207, 0, 252, 205, 9, 64, 0, 124, ...
$ pixel685 <int> 0, 0, 0, 0, 0, 0, 233, 64, 126, 0, 252, 74, 254, 255, 0, 9...
$ label <fctr> 1, 0, 1, 4, 0, 0, 7, 3, 5, 3, 8, 9, 1, 3, 3, 1, 2, 0, 7, ...
In [170]:
#set aside a final full set of data to train on before splitting a validation set
final_dataset <- dataset
# split a validation dataset
validation_index <- createDataPartition(dataset$label, p=0.80, list=FALSE)
validation <- dataset[-validation_index,]
dataset <- dataset[validation_index,]
In [171]:
#make a smaller dataset
dataset_slice <- dplyr::select(dataset, everything()) %>% dplyr::slice(., 1:2000)
dim(dataset)
- 33604
- 253
In [172]:
formula <- label ~ .
In [173]:
# Evaluate Algorithms
# 10-fold cross validation with 3 repeats
control <- trainControl(method="cv", number=3
, sampling = "up"
)
metric <- "Accuracy"
In [174]:
# LDA
set.seed(7)
fit.lda <- train(formula, data=dataset_slice, method="lda", metric=metric, trControl=control)
# SVM
set.seed(7)
fit.svm <- train(formula, data=dataset_slice, method="svmRadial", metric=metric, trControl=control)
In [175]:
# Compare results
results <- resamples(list(
LDA = fit.lda,
SVM = fit.svm
))
summary(results)
bwplot(results)
dotplot(results)
Call:
summary.resamples(object = results)
Models: LDA, SVM
Number of resamples: 3
Accuracy
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
LDA 0.7695 0.7781 0.7868 0.7910 0.8018 0.8168 0
SVM 0.9147 0.9153 0.9159 0.9265 0.9324 0.9489 0
Kappa
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
LDA 0.7436 0.7533 0.7630 0.7676 0.7796 0.7963 0
SVM 0.9051 0.9058 0.9065 0.9183 0.9249 0.9432 0
In [176]:
fit.lda
Linear Discriminant Analysis
2000 samples
252 predictor
10 classes: '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
No pre-processing
Resampling: Cross-Validated (3 fold)
Summary of sample sizes: 1332, 1334, 1334
Addtional sampling using up-sampling
Resampling results:
Accuracy Kappa
0.7910216 0.7676065
In [177]:
fit.svm
Support Vector Machines with Radial Basis Function Kernel
2000 samples
252 predictor
10 classes: '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
No pre-processing
Resampling: Cross-Validated (3 fold)
Summary of sample sizes: 1332, 1334, 1334
Addtional sampling using up-sampling
Resampling results across tuning parameters:
C Accuracy Kappa
0.25 0.8850063 0.8721160
0.50 0.9110098 0.9010340
1.00 0.9265118 0.9182626
Tuning parameter 'sigma' was held constant at a value of 0.002237381
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were sigma = 0.002237381 and C = 1.
SVM had significantly better results. Tune SVM around its best performing parameter values
In [184]:
# Tune SVM
set.seed(13)
grid <- expand.grid(.sigma=seq(0.001, 0.010, by=0.001), .C=seq(1, 5, by=1))
fit.svm <- train(formula, data=dataset_slice, method="svmRadial", metric=metric, tuneGrid=grid, preProc=c("center","scale","BoxCox"), trControl=control, na.action=na.pass)
print(fit.svm)
plot(fit.svm)
Support Vector Machines with Radial Basis Function Kernel
2000 samples
252 predictor
10 classes: '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
Pre-processing: centered (252), scaled (252)
Resampling: Cross-Validated (3 fold)
Summary of sample sizes: 1333, 1333, 1334
Addtional sampling using up-sampling prior to pre-processing
Resampling results across tuning parameters:
sigma C Accuracy Kappa
0.001 1 0.8964984 0.8848877
0.001 2 0.9120020 0.9021390
0.001 3 0.9130000 0.9032444
0.001 4 0.9144992 0.9049073
0.001 5 0.9194990 0.9104726
0.002 1 0.9200002 0.9110347
0.002 2 0.9254997 0.9171461
0.002 3 0.9275017 0.9193729
0.002 4 0.9284997 0.9204810
0.002 5 0.9274995 0.9193694
0.003 1 0.9279985 0.9199227
0.003 2 0.9349995 0.9277093
0.003 3 0.9339985 0.9265948
0.003 4 0.9339992 0.9265955
0.003 5 0.9339992 0.9265959
0.004 1 0.9360005 0.9288221
0.004 2 0.9385015 0.9316042
0.004 3 0.9390012 0.9321621
0.004 4 0.9375005 0.9304924
0.004 5 0.9375005 0.9304924
0.005 1 0.9385000 0.9316048
0.005 2 0.9405005 0.9338261
0.005 3 0.9410002 0.9343829
0.005 4 0.9410002 0.9343829
0.005 5 0.9410002 0.9343826
0.006 1 0.9405005 0.9338286
0.006 2 0.9395002 0.9327131
0.006 3 0.9390005 0.9321574
0.006 4 0.9400007 0.9332702
0.006 5 0.9400007 0.9332702
0.007 1 0.9385030 0.9316038
0.007 2 0.9405020 0.9338253
0.007 3 0.9400015 0.9332690
0.007 4 0.9400015 0.9332690
0.007 5 0.9405020 0.9338260
0.008 1 0.9370030 0.9299353
0.008 2 0.9400022 0.9332701
0.008 3 0.9400022 0.9332705
0.008 4 0.9395025 0.9327142
0.008 5 0.9400022 0.9332701
0.009 1 0.9335032 0.9260401
0.009 2 0.9385037 0.9316023
0.009 3 0.9380032 0.9310459
0.009 4 0.9380032 0.9310459
0.009 5 0.9380032 0.9310459
0.010 1 0.9300065 0.9221472
0.010 2 0.9335055 0.9260399
0.010 3 0.9330057 0.9254836
0.010 4 0.9330057 0.9254836
0.010 5 0.9335055 0.9260399
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were sigma = 0.005 and C = 3.
In [185]:
# Build SVM model at best parameter values
# Give it the full dataset
set.seed(13)
grid <- expand.grid(.sigma=c(0.005), .C=3)
fit.svm <- train(formula, data=dataset, method="svmRadial", metric=metric, tuneGrid=grid, preProc=c("center","scale","BoxCox"), trControl=control, na.action=na.pass)
print(fit.svm)
#plot(fit.svm)
Support Vector Machines with Radial Basis Function Kernel
33604 samples
252 predictor
10 classes: '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
Pre-processing: centered (252), scaled (252)
Resampling: Cross-Validated (3 fold)
Summary of sample sizes: 22403, 22402, 22403
Addtional sampling using up-sampling prior to pre-processing
Resampling results:
Accuracy Kappa
0.9756279 0.9729113
Tuning parameter 'sigma' was held constant at a value of 0.005
Tuning
parameter 'C' was held constant at a value of 3
In [186]:
# See how it does on the validation set
set.seed(13)
predictions <- predict(fit.svm, newdata=validation)
confusionMatrix(predictions, validation$label)
Confusion Matrix and Statistics
Reference
Prediction 0 1 2 3 4 5 6 7 8 9
0 819 0 3 0 1 1 4 0 0 1
1 0 927 2 1 0 0 0 3 5 1
2 0 5 818 5 1 1 3 6 2 0
3 1 0 2 849 0 11 0 0 5 1
4 1 1 1 0 804 0 1 4 0 8
5 0 0 1 10 0 736 2 1 4 4
6 4 0 0 1 0 3 815 0 3 0
7 0 2 4 1 0 1 0 861 0 5
8 0 0 3 2 1 2 2 1 791 1
9 1 1 1 1 7 4 0 4 2 816
Overall Statistics
Accuracy : 0.9809
95% CI : (0.9778, 0.9838)
No Information Rate : 0.1115
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.9788
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
Sensitivity 0.99153 0.9904 0.97964 0.9759 0.98771 0.96970
Specificity 0.99868 0.9984 0.99696 0.9973 0.99789 0.99712
Pos Pred Value 0.98794 0.9872 0.97265 0.9770 0.98049 0.97098
Neg Pred Value 0.99907 0.9988 0.99775 0.9972 0.99868 0.99699
Prevalence 0.09838 0.1115 0.09945 0.1036 0.09695 0.09040
Detection Rate 0.09755 0.1104 0.09743 0.1011 0.09576 0.08766
Detection Prevalence 0.09874 0.1118 0.10017 0.1035 0.09767 0.09028
Balanced Accuracy 0.99510 0.9944 0.98830 0.9866 0.99280 0.98341
Class: 6 Class: 7 Class: 8 Class: 9
Sensitivity 0.98549 0.9784 0.97414 0.97491
Specificity 0.99855 0.9983 0.99842 0.99722
Pos Pred Value 0.98668 0.9851 0.98506 0.97491
Neg Pred Value 0.99841 0.9975 0.99723 0.99722
Prevalence 0.09850 0.1048 0.09671 0.09969
Detection Rate 0.09707 0.1025 0.09421 0.09719
Detection Prevalence 0.09838 0.1041 0.09564 0.09969
Balanced Accuracy 0.99202 0.9883 0.98628 0.98607
In [187]:
# Build Final SVM model at best parameter values
# Give it the full final dataset
set.seed(13)
grid <- expand.grid(.sigma=c(0.005), .C=3)
fit.svm <- train(formula, data=final_dataset, method="svmRadial", metric=metric, tuneGrid=grid, preProc=c("center","scale","BoxCox"), trControl=control, na.action=na.pass)
print(fit.svm)
#plot(fit.svm)
Support Vector Machines with Radial Basis Function Kernel
42000 samples
252 predictor
10 classes: '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
Pre-processing: centered (252), scaled (252)
Resampling: Cross-Validated (3 fold)
Summary of sample sizes: 28001, 27999, 28000
Addtional sampling using up-sampling prior to pre-processing
Resampling results:
Accuracy Kappa
0.9780953 0.9756537
Tuning parameter 'sigma' was held constant at a value of 0.005
Tuning
parameter 'C' was held constant at a value of 3
In [190]:
# make predictions on the test set for Kaggle submission
test$prediction <- predict(fit.svm, newdata = test, na.action = na.pass)
head(test)
nrow(data.frame(test))
ImageId pixel151 pixel152 pixel153 pixel154 pixel155 pixel156 pixel157 pixel158 pixel159 ... pixel655 pixel656 pixel657 pixel658 pixel659 pixel660 pixel683 pixel684 pixel685 prediction
1 253 253 253 253 253 253 253 48 0 ... 0 0 0 0 0 0 0 0 0 2
2 0 0 0 0 0 0 0 0 0 ... 83 83 27 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 ... 90 0 0 0 0 0 0 0 0 9
4 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 9
5 253 249 134 134 212 254 253 182 0 ... 0 0 0 0 0 0 0 0 0 3
6 0 0 0 0 0 0 0 0 0 ... 2 167 253 111 0 0 88 253 240 7
28000
In [193]:
#bring in the ImageId's to use for submissino
test <- cbind(ImageId = sample$ImageId, test)
head(test)
ImageId ImageId.1 ImageId.2 pixel151 pixel152 pixel153 pixel154 pixel155 pixel156 pixel157 ... pixel655 pixel656 pixel657 pixel658 pixel659 pixel660 pixel683 pixel684 pixel685 prediction
1 1 1 253 253 253 253 253 253 253 ... 0 0 0 0 0 0 0 0 0 2
2 2 2 0 0 0 0 0 0 0 ... 83 83 27 0 0 0 0 0 0 0
3 3 3 0 0 0 0 0 0 0 ... 90 0 0 0 0 0 0 0 0 9
4 4 4 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 9
5 5 5 253 249 134 134 212 254 253 ... 0 0 0 0 0 0 0 0 0 3
6 6 6 0 0 0 0 0 0 0 ... 2 167 253 111 0 0 88 253 240 7
In [197]:
my_solution <- dplyr::select(test, ImageId = ImageId, Label = prediction)
readr::write_csv(x = data.frame(my_solution), path = "C:\\Work\\my_solution.csv")
head(my_solution, n=20)
tail(my_solution, n=20)
ImageId Label
1 2
2 0
3 9
4 9
5 3
6 7
7 0
8 3
9 0
10 3
11 5
12 7
13 4
14 0
15 4
16 3
17 3
18 1
19 9
20 0
ImageId Label
27981 27981 3
27982 27982 1
27983 27983 1
27984 27984 0
27985 27985 4
27986 27986 5
27987 27987 2
27988 27988 2
27989 27989 9
27990 27990 6
27991 27991 7
27992 27992 6
27993 27993 1
27994 27994 9
27995 27995 7
27996 27996 9
27997 27997 7
27998 27998 3
27999 27999 9
28000 28000 2
In [ ]:
Content source: jsphyg/Machine_Learning_Notebooks
Similar notebooks: