Let's Load the Data

from __future__ import print_function

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
from keras.models import load_model
import sklearn
import numpy as np

Using TensorFlow backend.

#from sklearn.model_selection import train_test_split
#I did not write this code. It was provided in the kaggle page
quizzes = np.zeros((1000000, 81), np.int32)
solutions = np.zeros((1000000, 81), np.int32)
for i, line in enumerate(open('sudoku.csv', 'r').read().splitlines()[1:]):
    quiz, solution = line.split(",")
    for j, q_s in enumerate(zip(quiz, solution)):
        q, s = q_s
        quizzes[i, j] = q
        solutions[i, j] = s
#quizzes = quizzes.reshape((-1, 9, 9))
#solutions = solutions.reshape((-1, 9, 9))
# make a train test split
#quizzes are inputs
#conver the outputs to be categorical outputs

#y_test = keras.utils.to_categorical(y_test, num_classes)

#quizzes = np.expand_dims(quizzes, -1)

Try changing the data types to float32 it might get faster

array([8, 6, 4, 3, 7, 1, 2, 5, 9, 3, 2, 5, 8, 4, 9, 7, 6, 1, 9, 7, 1, 2, 6,
       5, 8, 4, 3, 4, 3, 6, 1, 9, 2, 5, 8, 7, 1, 9, 8, 6, 5, 7, 4, 3, 2, 2,
       5, 7, 4, 8, 3, 9, 1, 6, 6, 8, 9, 7, 3, 4, 1, 2, 5, 7, 1, 3, 5, 2, 8,
       6, 9, 4, 5, 4, 2, 9, 1, 6, 3, 7, 8])

from sklearn.model_selection import train_test_split
quiz_train, quiz_test, output_train, output_test = sklearn.model_selection.train_test_split(quizzes, solutions, test_size = 0.2, random_state = 42)

Let's Build a Basic MLP

from keras.layers.normalization import BatchNormalization

#need 729 outputs 81 cells. 81 possible probabilities
mlp = Sequential()
mlp.add(Dense(256, activation = 'relu', input_shape = (81,)))
mlp.add(Dense(256, activation = 'relu'))
mlp.add(Dense(256, activation = 'relu'))
mlp.add(Dense(256, activation = 'relu'))
mlp.add(Dense(256, activation = 'relu'))
mlp.add(Dense(81, activation = 'relu'))


mlp.compile(loss='mean_squared_error', optimizer=RMSprop(), 

history = mlp.fit(quizzes[0:500000], solutions[0:500000], batch_size = 100, epochs = 3, 
                  verbose = 1, validation_data = (quizzes[500000:600000,solutions[500000:600000]))
score = mlp.evaluate(quizzes, solutions, verbose = 1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Layer (type)                 Output Shape              Param #   
dense_110 (Dense)            (None, 256)               20992     
batch_normalization_87 (Batc (None, 256)               1024      
dense_111 (Dense)            (None, 256)               65792     
batch_normalization_88 (Batc (None, 256)               1024      
dense_112 (Dense)            (None, 256)               65792     
batch_normalization_89 (Batc (None, 256)               1024      
dense_113 (Dense)            (None, 256)               65792     
batch_normalization_90 (Batc (None, 256)               1024      
dense_114 (Dense)            (None, 256)               65792     
batch_normalization_91 (Batc (None, 256)               1024      
dense_115 (Dense)            (None, 81)                20817     
Total params: 310,097.0
Trainable params: 307,537
Non-trainable params: 2,560.0
Train on 1000000 samples, validate on 1000000 samples
Epoch 1/3
1000000/1000000 [==============================] - 296s - loss: 4.7881 - acc: 0.0752 - val_loss: 3.7484 - val_acc: 0.0823
Epoch 2/3
1000000/1000000 [==============================] - 300s - loss: 3.8775 - acc: 0.0870 - val_loss: 3.5816 - val_acc: 0.1059
Epoch 3/3
1000000/1000000 [==============================] - 299s - loss: 3.7716 - acc: 0.0910 - val_loss: 3.4905 - val_acc: 0.0940
 999872/1000000 [============================>.] - ETA: 0sTest loss: 3.49049021709
Test accuracy: 0.093992

Let's Try A Basic CNN

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, Conv1D, BatchNormalization

X_train = quizzes.reshape(quizzes.shape[0], 81, 1)
#X_test = X_test.reshape(X_test.shape[0], img_cols, img_rows, 1)

cnn = Sequential()cnn.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=(81,1)))
cnn.add(Conv1D(64, kernel_size=3, activation='relu'))
cnn.add(Conv1D(64, kernel_size=3, activation='relu'))
cnn.add(Conv1D(32, kernel_size=3, activation='relu'))
cnn.add(Conv1D(32, kernel_size=3, activation='relu'))
cnn.add(Dense(81, activation = 'linear'))
#cnn.add(MaxPooling2D(pool_size=(2, 2)))
#cnn.add(Dense(num_classes, activation='softmax'))


history2 = cnn.fit(X_train,solutions,
          validation_data=(X_train, solutions))
score = cnn.evaluate(X_train, solutions, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 1000000 samples, validate on 1000000 samples
Epoch 1/3
1000000/1000000 [==============================] - 3147s - loss: 3.4106 - acc: 0.0889 - val_loss: 3.0947 - val_acc: 0.0986
Epoch 2/3
 215700/1000000 [=====>........................] - ETA: 1726s - loss: 3.1296 - acc: 0.0968
Let's Try a Deeper CNN with less Iterations per layer

cnn2 = Sequential()
cnn2.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=(81,1)))
cnn2.add(Conv1D(64, kernel_size=3, activation='relu'))
cnn2.add(Conv1D(64, kernel_size=3, activation='relu'))
cnn2.add(Conv1D(64, kernel_size=3, activation='relu'))
cnn2.add(Conv1D(64, kernel_size=3, activation='relu'))
cnn2.add(Conv1D(64, kernel_size=3, activation='relu'))
cnn2.add(Conv1D(64, kernel_size=3, activation='relu'))
cnn2.add(Conv1D(64, kernel_size=3, activation='relu'))
cnn2.add(Conv1D(64, kernel_size=3, activation='relu'))
cnn2.add(Dense(81, activation = 'linear'))
#cnn.add(MaxPooling2D(pool_size=(2, 2)))
#cnn.add(Dense(num_classes, activation='softmax'))


history2 = cnn2.fit(X_train[0:500000],solutions[0:500000],
          validation_data=(X_train[500000:600000], solutions[500000:600000]))
score2 = cnn2.evaluate(X_train[500000:600000], solutions[500000:600000], verbose=1)
print('Test loss:', score2[0])
print('Test accuracy:', score2[1])

Train on 500000 samples, validate on 100000 samples
Epoch 1/1
500000/500000 [==============================] - 2744s - loss: 3.6823 - acc: 0.0708 - val_loss: 3.1559 - val_acc: 0.1048
100000/100000 [==============================] - 214s   
Test loss: 3.15591818687
Test accuracy: 0.10479

Let's Make an Iterative Solver for the NN

testPuzzle = quizzes[-1]
#testPuzzle = np.array([1])
zeros = np.where(testPuzzle == 0)[0]
#testPuzzle = testPuzzle.reshape((1,) + testPuzzle.shape)
#edit the tensor dimensions so that it can be used for the cnn
#testPuzzle = testPuzzle.reshape(1, 81, 1)


testSolution = solutions[-1]

array([3, 9, 8, 4, 7, 1, 6, 2, 5, 1, 2, 6, 3, 8, 5, 4, 7, 9, 7, 4, 5, 6, 2,
       9, 8, 3, 1, 6, 5, 7, 8, 1, 3, 9, 4, 2, 9, 1, 4, 7, 6, 2, 5, 8, 3, 8,
       3, 2, 9, 5, 4, 1, 6, 7, 4, 8, 9, 5, 3, 7, 2, 1, 6, 2, 6, 3, 1, 9, 8,
       7, 5, 4, 5, 7, 1, 2, 4, 6, 3, 9, 8])

nn = load_model('deepSudokuCNN.h5')

mlp = load_model('sudokuMLP.h5')

#testPuzzle = testPuzzle.reshape((1,) + testPuzzle.shape)
prediction = mlp.predict(testPuzzle)
#change the type to int so that you we can evaluate the prediction
rounded = np.around(prediction)
cast = prediction.astype(int)

(1, 81)
[[  3.5401125    5.72158146   6.50185347   3.8828671    5.95534897
    4.10434341   6.63568544   2.17235756   6.17044353   3.75974274
    4.00889921   6.09584236   2.73304057   8.48139      6.75350094
    4.3255477    4.39978123   4.05042887   6.48901224   3.91274762
    4.63744736   3.53534222   1.71981168   7.48899508   8.07629108
    2.92731833   5.90356302   6.11454678   4.73067474   5.91628075
    8.50193882   3.06285739   3.28868794   3.47838187   4.82240391
    4.70163584   6.5533886    3.95110679   6.22024536   6.34457493
    3.9594121    3.63688636   4.73794365   6.66503668   2.60050392
    6.25598335   3.19452167   1.73619318   8.7619915    3.78815675
    3.27103472   5.5864296    5.69591713   6.37348461   3.84966779
    8.33215809   6.49803162   4.58518553   3.2171905    5.59290028
    4.94527817   2.47479534   5.16980267   2.2210536    3.76610446
    2.83277059   4.46191978  10.05767155   4.87902832   4.42970276
    6.20001841   5.75801277   5.84390402   7.06069756   4.21345949
    1.83711433   4.41840506   5.55848598   2.52247715   9.30567932
array([[ 3,  5,  6,  3,  5,  4,  6,  2,  6,  3,  4,  6,  2,  8,  6,  4,  4,
         4,  6,  3,  4,  3,  1,  7,  8,  2,  5,  6,  4,  5,  8,  3,  3,  3,
         4,  4,  6,  3,  6,  6,  3,  3,  4,  6,  2,  6,  3,  1,  8,  3,  3,
         5,  5,  6,  3,  8,  6,  4,  3,  5,  4,  2,  5,  2,  3,  2,  4, 10,
         4,  4,  6,  5,  5,  7,  4,  1,  4,  5,  2,  9,  3]])

def solve(nn, testBoard, solution, netType):
    #into our cnn
    # 1:mlp, 2:1d cnn, 3:2d cnn
    tensor = None
    #depending on the type of net you want to predict with set the tensor dimensions
    if netType == 2:
        tensor = testBoard.reshape(1, 81, 1)
    elif netType == 1:
        #print("Reshaping the tensor for mlp")
        tensor = testBoard.reshape(1,81)
    prediction = nn.predict(tensor)
    rounded = np.around(prediction)
    cast = prediction.astype(int)
    correct = 0
    if netType == 2 or netType == 1:
        for current in range(81):
            #compare the values of the cast and the solution
            if cast[0][current] == solution[current]:
                correct += 1
            accuracy = correct / 81
    print("The accuracy of the board was: " + str(accuracy))

solve(nn, quizzes[-1], solutions[-1], 2)
solve(mlp, quizzes[-1], solutions[-1], 1)

[3 0 0 4 0 1 6 2 0 1 0 0 0 8 0 4 0 0 0 0 5 0 2 0 8 3 0 0 5 7 8 0 0 0 0 0 0
 0 0 7 0 0 5 0 3 0 0 2 9 0 4 0 0 7 4 8 0 5 3 0 0 1 0 2 0 3 0 9 0 0 0 0 0 7
 0 0 0 6 0 9 0]
[[ 3  6  7  3  7  1  5  2  6  1  3  4  4  8  6  3  5  5  7  4  5  3  2  5
   7  2  5  6  4  8  7  2  3  3  4  3  6  3  4  7  4  4  4  5  2  6  2  1
   8  3  3  5  6  7  4  8  7  3  3  7  4  0  5  2  3  2  3  9  6  5  6  5
   5  6  3  2  2  6  3 10  3]]
The accuracy of the board was: 0.3333333333333333
[[ 3  5  6  3  5  4  6  2  6  3  4  6  2  8  6  4  4  4  6  3  4  3  1  7
   8  2  5  6  4  5  8  3  3  3  4  4  6  3  6  6  3  3  4  6  2  6  3  1
   8  3  3  5  5  6  3  8  6  4  3  5  4  2  5  2  3  2  4 10  4  4  6  5
   5  7  4  1  4  5  2  9  3]]
The accuracy of the board was: 0.2345679012345679
[3 0 0 4 0 1 6 2 0 1 0 0 0 8 0 4 0 0 0 0 5 0 2 0 8 3 0 0 5 7 8 0 0 0 0 0 0
 0 0 7 0 0 5 0 3 0 0 2 9 0 4 0 0 7 4 8 0 5 3 0 0 1 0 2 0 3 0 9 0 0 0 0 0 7
 0 0 0 6 0 9 0]

#keep going until the there are no more zeros in the input
#use the nn to predict the solution
#repredict the using the update input
def iterative(nn, testBoard, solution, netType):
    zeros = np.where(testBoard == 0)[0]
    while len(zeros) != 0:
        if netType == 2:
            tensor = testBoard.reshape(1, 81, 1)
        elif netType == 1:
            #print("Reshaping the tensor for mlp")
            tensor = testBoard.reshape(1,81)
        prediction = nn.predict(tensor)
        rounded = np.around(prediction)
        cast = prediction.astype(int)
        #update the testboard
        index = zeros[0]
        testBoard[index] = cast[0][index]
        #remove the first element from zeros
        zeros = np.delete(zeros, [0])
    correct = 0
    if netType == 2 or netType == 1:
        for current in range(81):
            #compare the values of the cast and the solution
            if cast[0][current] == solution[current]:
                correct += 1
            accuracy = correct / 81
    print("The accuracy of the board was: " + str(accuracy))

iterative(mlp, np.copy(quizzes[-1]), solutions[-1], netType = 1)

[3 0 0 4 0 1 6 2 0 1 0 0 0 8 0 4 0 0 0 0 5 0 2 0 8 3 0 0 5 7 8 0 0 0 0 0 0
 0 0 7 0 0 5 0 3 0 0 2 9 0 4 0 0 7 4 8 0 5 3 0 0 1 0 2 0 3 0 9 0 0 0 0 0 7
 0 0 0 6 0 9 0]
The accuracy of the board was: 0.32098765432098764
[3 0 0 4 0 1 6 2 0 1 0 0 0 8 0 4 0 0 0 0 5 0 2 0 8 3 0 0 5 7 8 0 0 0 0 0 0
 0 0 7 0 0 5 0 3 0 0 2 9 0 4 0 0 7 4 8 0 5 3 0 0 1 0 2 0 3 0 9 0 0 0 0 0 7
 0 0 0 6 0 9 0]

array([3, 6, 7, 4, 7, 1, 6, 2, 6, 1, 3, 5, 4, 8, 7, 4, 6, 6, 7, 4, 5, 3, 2,
       7, 8, 3, 5, 7, 5, 7, 8, 1, 3, 4, 5, 5, 5, 4, 3, 7, 6, 4, 5, 6, 3, 4,
       3, 2, 9, 4, 4, 6, 7, 7, 4, 8, 6, 5, 3, 6, 4, 1, 5, 2, 4, 3, 3, 9, 5,
       6, 7, 5, 5, 7, 4, 2, 4, 6, 5, 9, 2])

array([3, 0, 0, 4, 0, 1, 6, 2, 0, 1, 0, 0, 0, 8, 0, 4, 0, 0, 0, 0, 5, 0, 2,
       0, 8, 3, 0, 0, 5, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 5, 0, 3, 0,
       0, 2, 9, 0, 4, 0, 0, 7, 4, 8, 0, 5, 3, 0, 0, 1, 0, 2, 0, 3, 0, 9, 0,
       0, 0, 0, 0, 7, 0, 0, 0, 6, 0, 9, 0])

#need 729 outputs 81 cells. 81 possible probabilities
mlp2 = Sequential()
mlp2.add(Dense(128, activation = 'relu', input_shape = (81,)))
mlp2.add(Dense(128, activation = 'relu'))
mlp2.add(Dense(128, activation = 'relu'))
mlp2.add(Dense(128, activation = 'relu'))
mlp2.add(Dense(128, activation = 'relu'))
mlp2.add(Dense(output_dim = 810, activation = 'softmax'))


mlp2.compile(loss='categorical_crossentropy', optimizer=RMSprop(), 

history = mlp2.fit(quizzes[0:500000], y_train, batch_size = 100, epochs = 3, 
                  verbose = 1, validation_data = (quizzes[500000:600000], y_test))
score = mlp2.evaluate(quizzes[500000:600000], y_test, verbose = 1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Layer (type)                 Output Shape              Param #   
dense_43 (Dense)             (None, 128)               10496     
batch_normalization_36 (Batc (None, 128)               512       
dense_44 (Dense)             (None, 128)               16512     
batch_normalization_37 (Batc (None, 128)               512       
dense_45 (Dense)             (None, 128)               16512     
batch_normalization_38 (Batc (None, 128)               512       
dense_46 (Dense)             (None, 128)               16512     
batch_normalization_39 (Batc (None, 128)               512       
dense_47 (Dense)             (None, 128)               16512     
batch_normalization_40 (Batc (None, 128)               512       
dense_48 (Dense)             (None, 810)               104490    
Total params: 183,594.0
Trainable params: 182,314
Non-trainable params: 1,280.0
C:\Users\Pawn\Anaconda3\lib\site-packages\ipykernel\__main__.py:13: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(activation="softmax", units=810)`
