In [2]:
import watermark
%watermark -v -m -p pandas
import utils
url = "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz"
filename = utils.download_file(url)
print(filename)
In [1]:
import gzip
import struct
with gzip.open('train-images-idx3-ubyte.gz', 'rb') as f:
file_content = f.read()
In [38]:
import numpy as np
import pandas
df = pandas.DataFrame()
with gzip.open('train-images-idx3-ubyte.gz', 'rb') as f:
# read the header info
magic_num, nimages, nrows, ncols = struct.unpack(
'>iiii', f.read(16))
print("Magic number\t\t %d"%magic_num)
print("Number of Images\t %d"%nimages)
print("Image size\t\t %dx%d"%(nrows, ncols))
nparr = np.empty(shape=(nimages,nrows*ncols), dtype=np.dtype('B'))
# Read pixels
for i in range(nimages):
#for j in range(nrows*ncols):
row = struct.unpack('B'*nrows*ncols, f.read(nrows*ncols))
nparr[i,:] = np.array(list(row))
In [47]:
import matplotlib.pyplot as plt
%matplotlib inline
def display_image(img, width=28, height=28):
plt.figure()
fig = plt.imshow(img.reshape(width,height))
fig.set_cmap('gray_r')
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
# test:
display_image(nparr[45,:])
In [ ]: