Data from: the MNIST dataset
In [ ]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
import pandas as pd
import sklearn
import os
import requests
from tqdm._tqdm_notebook import tqdm_notebook
import tarfile
I've made the dataset into a zipped tar file. You'll have to download it now.
In [ ]:
def download_file(url,file):
# Streaming, so we can iterate over the response.
r = requests.get(url, stream=True)
# Total size in bytes.
total_size = int(r.headers.get('content-length', 0));
block_size = 1024
wrote = 0
with open(file, 'wb') as f:
for data in tqdm_notebook(r.iter_content(block_size), total=np.ceil(total_size//block_size) , unit='KB', unit_scale=True):
wrote = wrote + len(data)
f.write(data)
if total_size != 0 and wrote != total_size:
print("ERROR, something went wrong")
url = "https://github.com/chi-hung/PythonTutorial/raw/master/datasets/mnist.tar.gz"
file = "mnist.tar.gz"
print('Retrieving the MNIST dataset...')
download_file(url,file)
print('Extracting the MNIST dataset...')
tar = tarfile.open(file)
tar.extractall()
tar.close()
print('Completed fetching the MNIST dataset.')
In [ ]:
10 folders of images will be extracted from the downloaded tar file.
In [ ]:
def filePathsGen(rootPath):
paths=[]
dirs=[]
for dirPath,dirNames,fileNames in os.walk(rootPath):
for fileName in fileNames:
fullPath=os.path.join(dirPath,fileName)
paths.append((int(dirPath[len(rootPath) ]),fullPath))
dirs.append(dirNames)
return dirs,paths
In [ ]:
dirs,paths=filePathsGen('mnist/') # load the image paths
dfPath=pd.DataFrame(paths,columns=['class','path']) # save image paths as a Pandas DataFrame
dfPath.head(5) # see the first 5 paths of the DataFrame
In [ ]:
dfCountPerClass=dfPath.groupby('class').count()
dfCountPerClass.rename(columns={'path':'amount of figures'},inplace=True)
dfCountPerClass.plot(kind='bar',rot=0)
In [ ]:
train=dfPath.sample(frac=0.7) # sample 70% data to be the train dataset
test=dfPath.drop(train.index) # the rest 30% are now the test dataset
# take 50% of the test dataset as the validation dataset
val=test.sample(frac=1/2)
test=test.drop(val.index)
# let's check the length of the train, val and test dataset.
print('number of all figures = {:10}.'.format(len(dfPath)))
print('number of train figures= {:9}.'.format(len(train)))
print('number of val figures= {:10}.'.format(len(val)))
print('number of test figures= {:9}.'.format(len(test)))
# let's take a look: plotting 3 figures from the train dataset
for j in range(3):
img=plt.imread(train['path'].iloc[j])
plt.imshow(img,cmap="gray")
plt.axis("off")
plt.show()
In [ ]:
def dataLoad(dfPath):
paths=dfPath['path'].values
x=np.zeros((len(paths),28,28),dtype=np.float32 )
for j in range(len(paths)):
x[j,:,:]=plt.imread(paths[j])/255
y=dfPath['class'].values
return x,y
In [ ]:
train_x,train_y=dataLoad(train)
val_x,val_y=dataLoad(val)
test_x,test_y=dataLoad(test)
Remark: loading all images to RAM might take a while.
In [ ]:
print("tensor shapes:\n")
print('train:',train_x.shape,train_y.shape)
print('val :',val_x.shape,val_y.shape)
print('test :',test_x.shape,test_y.shape)
In [ ]:
from keras.models import Sequential
from keras.layers import Dense,Flatten
from keras.optimizers import SGD
Onehot-encoding the labels:
In [ ]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
train_y_onehot = np.float32( enc.fit_transform(train_y.reshape(-1,1)) \
.toarray() )
val_y_onehot = np.float32( enc.fit_transform(val_y.reshape(-1,1)) \
.toarray() )
test_y_onehot = np.float32( enc.fit_transform(test_y.reshape(-1,1)) \
.toarray() )
Construct the model:
In [ ]:
model = Sequential()
model.add(Flatten(input_shape=(28,28)))
model.add(Dense(10, activation='softmax') )
sgd=SGD(lr=0.2, momentum=0.0, decay=0.0)
model.compile(optimizer='sgd',
loss='categorical_crossentropy',
metrics=['accuracy'])
More details about the constructed model:
In [ ]:
model.summary()
Train the model:
In [ ]:
hist=model.fit(train_x, train_y_onehot,
epochs=20, batch_size=128,
validation_data=(val_x,val_y_onehot))
See how the accuracy climbs during training:
In [ ]:
plt.plot(hist.history['acc'],ms=5,marker='o',label='accuracy')
plt.plot(hist.history['val_acc'],ms=5,marker='o',label='val accuracy')
plt.legend()
plt.show()
Now, you'll probably want to evaluate or save the trained model.
In [ ]:
# calculate loss & accuracy (evaluated on the test dataset)
score = model.evaluate(test_x, test_y_onehot, batch_size=128)
print("LOSS (evaluated on the test dataset)= {}".format(score[0]))
print("ACCURACY (evaluated on the test dataset)= {}".format(score[1]))
Save model architecture & weights:
In [ ]:
import json
with open('first_try.json', 'w') as jsOut:
json.dump(model.to_json(), jsOut)
model.save_weights('first_try.h5')
Load the saved model architecture & weights:
In [ ]:
from keras.models import model_from_json
In [ ]:
with open('first_try.json', 'r') as jsIn:
model_architecture=json.load(jsIn)
model_new=model_from_json(model_architecture)
model_new.load_weights('first_try.h5')
In [ ]:
model_new.summary()
Output the classification report (see if the trained model works well on the test data):
In [ ]:
pred_y=model.predict(test_x).argmax(axis=1)
from sklearn.metrics import classification_report
print( classification_report(test_y,pred_y) )
Reshape the tensors (this step is necessary, because the CNN model wants the input tensor to be 4D):
In [ ]:
train_x = np.expand_dims(train_x,axis=1)
val_x = np.expand_dims(val_x,axis=1)
test_x = np.expand_dims(test_x,axis=1)
Create the model:
In [ ]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten,Conv2D, MaxPooling2D
from keras.layers import Activation
from keras.optimizers import SGD
in_shape=(1,28,28)
# ========== BEGIN TO CREATE THE MODEL ==========
model = Sequential()
# feature extraction (2 conv layers)
model.add(Conv2D(32, (3,3),
activation='relu',
input_shape=in_shape))
model.add(Conv2D(64, (3,3), activation='relu')
)
model.add(MaxPooling2D(pool_size=(2, 2))
)
model.add(Dropout(0.5))
model.add(Flatten())
# classification (2 dense layers)
model.add(Dense(128, activation='relu')
)
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
# ========== COMPLETED THE MODEL CREATION========
# Compile the model before training.
model.compile(loss='categorical_crossentropy',
optimizer=SGD(lr=0.01,momentum=0.1),
metrics=['accuracy'],
context=['gpu(0)'])
Train the model:
In [ ]:
%%time
hist=model.fit(train_x, train_y_onehot,
epochs=20,
batch_size=32,
validation_data=(val_x,val_y_onehot),
)
See how the accuracy climbs during training:
In [ ]:
plt.plot(hist.history['acc'],ms=5,marker='o',label='accuracy')
plt.plot(hist.history['val_acc'],ms=5,marker='o',label='val accuracy')
plt.legend()
plt.show()
Output the classification report (see if the trained model works well on the test data):
In [ ]:
pred_y=model.predict(test_x).argmax(axis=1)
from sklearn.metrics import classification_report
print( classification_report(test_y,pred_y) )
In [ ]: