In [2]:
import watermark
%watermark -v -m -p pandas 


import utils
url = "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz"
filename = utils.download_file(url)
print(filename)


Downloading: http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz Bytes: 9912422
         9912422   [100.00%]
train-images-idx3-ubyte.gz

In [1]:
import gzip
import struct

with gzip.open('train-images-idx3-ubyte.gz', 'rb') as f:
    file_content = f.read()


In [38]:
import numpy as np
import pandas

df = pandas.DataFrame()
with gzip.open('train-images-idx3-ubyte.gz', 'rb') as f:
    # read the header info
    magic_num, nimages, nrows, ncols = struct.unpack(
    '>iiii', f.read(16))

    print("Magic number\t\t %d"%magic_num)
    print("Number of Images\t %d"%nimages)
    print("Image size\t\t %dx%d"%(nrows, ncols))

    nparr = np.empty(shape=(nimages,nrows*ncols), dtype=np.dtype('B'))
    
    # Read pixels
    for i in range(nimages):
        #for j in range(nrows*ncols):
        row = struct.unpack('B'*nrows*ncols, f.read(nrows*ncols))
        nparr[i,:] = np.array(list(row))


Magic number		 2051
Number of Images	 60000
Image size		 28x28

In [47]:
import matplotlib.pyplot as plt
%matplotlib inline

def display_image(img, width=28, height=28):
    plt.figure()
    fig = plt.imshow(img.reshape(width,height))
    fig.set_cmap('gray_r')
    fig.axes.get_xaxis().set_visible(False)
    fig.axes.get_yaxis().set_visible(False)
    
# test:
display_image(nparr[45,:])



In [ ]: