In [1]:
library(tidyverse)
library(caret)
library(keras)


Loading tidyverse: ggplot2
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
Conflicts with tidy packages ---------------------------------------------------
filter(): dplyr, stats
lag():    dplyr, stats
Loading required package: lattice

Attaching package: ‘caret’

The following object is masked from ‘package:purrr’:

    lift


In [2]:
train <- read_csv("data/train.csv")
test <- read_csv("data/test.csv")


Parsed with column specification:
cols(
  .default = col_integer()
)
See spec(...) for full column specifications.
Parsed with column specification:
cols(
  .default = col_integer()
)
See spec(...) for full column specifications.

In [3]:
train_index <- createDataPartition(train$label, p=0.9, list=FALSE)
cv_train <- train[train_index, ]
cv_validation <- train[-train_index, ]

In [4]:
train_data <- select(cv_train, -label)
train_labels <- cv_train$label

validation_data <- select(cv_validation, -label)
validation_labels <- cv_validation$label

In [5]:
x_train <- data.matrix(train_data)
x_valid <- data.matrix(validation_data)

In [6]:
y_train <- to_categorical(train_labels, 10)
y_valid <- to_categorical(validation_labels, 10)

In [7]:
x_train <- x_train / 255
x_valid <- x_valid / 255

Modeling


In [8]:
model <- keras_model_sequential() 
model %>% 
  layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>% 
  layer_dropout(rate = 0.4) %>% 
  layer_dense(units = 128, activation = 'relu') %>%
  layer_dropout(rate = 0.3) %>%
  layer_dense(units = 10, activation = 'softmax')

In [9]:
summary(model)


________________________________________________________________________________
Layer (type)                        Output Shape                    Param #     
================================================================================
dense_1 (Dense)                     (None, 256)                     200960      
________________________________________________________________________________
dropout_1 (Dropout)                 (None, 256)                     0           
________________________________________________________________________________
dense_2 (Dense)                     (None, 128)                     32896       
________________________________________________________________________________
dropout_2 (Dropout)                 (None, 128)                     0           
________________________________________________________________________________
dense_3 (Dense)                     (None, 10)                      1290        
================================================================================
Total params: 235,146
Trainable params: 235,146
Non-trainable params: 0
________________________________________________________________________________

In [10]:
model %>% compile(
  loss = 'categorical_crossentropy',
  optimizer = optimizer_rmsprop(),
  metrics = c('accuracy')
)

In [11]:
history <- model %>% fit(
    x_train, y_train, 
    epochs = 30, 
    batch_size = 128, 
    validation_data = list(x_valid, y_valid)
)

Model evaluation


In [12]:
plot(history)



In [13]:
as_labels <- function(predictions) {
    data <- as_tibble(predictions)
    colnames(data) <- 0:9
    return(
        as.integer(colnames(data)[max.col(data, ties.method="first")])
    )
}

In [14]:
predictions <- model %>% predict(x_valid, batch_size = 128)
pred_labels <- as_labels(predictions)

confusion_matrix <- caret::confusionMatrix(
    table(predicted=pred_labels, actual=validation_labels)
)

In [15]:
confusion_matrix


Confusion Matrix and Statistics

         actual
predicted   0   1   2   3   4   5   6   7   8   9
        0 429   0   1   0   0   0   3   0   1   0
        1   0 458   1   0   1   1   0   2   0   0
        2   2   3 395   4   0   0   0   1   2   1
        3   0   4   1 422   0   7   0   0   1   0
        4   0   0   2   0 400   0   1   0   0   1
        5   0   0   1   7   0 387   1   1   3   1
        6   0   0   0   0   0   2 387   0   1   0
        7   0   0   1   0   1   0   0 434   1   4
        8   0   1   0   2   0   1   0   0 400   1
        9   0   0   0   2   3   2   0   3   0 408

Overall Statistics
                                          
               Accuracy : 0.9812          
                 95% CI : (0.9766, 0.9851)
    No Information Rate : 0.111           
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.9791          
 Mcnemar's Test P-Value : NA              

Statistics by Class:

                     Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
Sensitivity            0.9954   0.9828  0.98259   0.9657  0.98765  0.96750
Specificity            0.9987   0.9987  0.99658   0.9965  0.99895  0.99631
Pos Pred Value         0.9885   0.9892  0.96814   0.9701  0.99010  0.96509
Neg Pred Value         0.9995   0.9979  0.99815   0.9960  0.99868  0.99658
Prevalence             0.1026   0.1110  0.09574   0.1041  0.09645  0.09526
Detection Rate         0.1022   0.1091  0.09407   0.1005  0.09526  0.09216
Detection Prevalence   0.1034   0.1103  0.09717   0.1036  0.09621  0.09550
Balanced Accuracy      0.9970   0.9907  0.98958   0.9811  0.99330  0.98191
                     Class: 6 Class: 7 Class: 8 Class: 9
Sensitivity           0.98724   0.9841  0.97800  0.98077
Specificity           0.99921   0.9981  0.99868  0.99736
Pos Pred Value        0.99231   0.9841  0.98765  0.97608
Neg Pred Value        0.99869   0.9981  0.99763  0.99788
Prevalence            0.09336   0.1050  0.09740  0.09907
Detection Rate        0.09216   0.1034  0.09526  0.09717
Detection Prevalence  0.09288   0.1050  0.09645  0.09955
Balanced Accuracy     0.99323   0.9911  0.98834  0.98906

Sending to Kaggle


In [16]:
x_kaggle_train <- data.matrix(select(train, -label))
y_kaggle_train <- train$label

In [17]:
x_kaggle_train <- x_kaggle_train / 255
y_kaggle_train <- to_categorical(y_kaggle_train, 10)

In [18]:
kaggle_test <- data.matrix(test)
kaggle_test <- kaggle_test / 255

In [19]:
model <- keras_model_sequential() 
model %>% 
  layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>% 
  layer_dropout(rate = 0.4) %>% 
  layer_dense(units = 128, activation = 'relu') %>%
  layer_dropout(rate = 0.3) %>%
  layer_dense(units = 10, activation = 'softmax')

In [20]:
model %>% compile(
  loss = 'categorical_crossentropy',
  optimizer = optimizer_rmsprop(),
  metrics = c('accuracy')
)

In [21]:
model %>% fit(
    x_kaggle_train, y_kaggle_train, 
    epochs = 30, 
    batch_size = 128
)

In [22]:
predictions <- model %>% predict(kaggle_test, batch_size = 128)

In [23]:
write_predictions_to_csv <- function(predictions, file_name) {
    pred_labels <- as_labels(predictions)
    predictions <- tibble(
        ImageId=1:length(pred_labels), 
        Label=pred_labels
    )
    write_csv(predictions, file_name)
}

In [24]:
write_predictions_to_csv(predictions, "results/simple_nn.csv")

CNN model


In [25]:
img_rows <- 28
img_cols <- 28

In [26]:
input_shape <- c(img_rows, img_cols, 1)

In [27]:
model <- keras_model_sequential()
model %>%
  layer_conv_2d(filters = 32, kernel_size = c(3,3), activation = 'relu',
                input_shape = input_shape) %>% 
  layer_conv_2d(filters = 64, kernel_size = c(3,3), activation = 'relu') %>% 
  layer_max_pooling_2d(pool_size = c(2, 2)) %>% 
  layer_dropout(rate = 0.25) %>% 
  layer_flatten() %>% 
  layer_dense(units = 128, activation = 'relu') %>% 
  layer_dropout(rate = 0.5) %>% 
  layer_dense(units = 10, activation = 'softmax')

In [28]:
model %>% compile(
  loss = loss_categorical_crossentropy,
  optimizer = optimizer_adadelta(),
  metrics = c('accuracy')
)

In [29]:
x_kaggle_train <- array_reshape(x_kaggle_train, c(nrow(x_kaggle_train), img_rows, img_cols, 1))

In [30]:
model %>% fit(
  x_kaggle_train, y_kaggle_train, 
  batch_size = 128,
  epochs = 20
)

In [31]:
kaggle_test <- array_reshape(kaggle_test, c(nrow(kaggle_test), img_rows, img_cols, 1))

In [32]:
predictions <- model %>% predict(kaggle_test, batch_size = 128)

In [33]:
write_predictions_to_csv(predictions, "results/cnn.csv")