In [2]:
# import the mnist class
from mnist import MNIST
# init with the 'data' dir
mndata = MNIST('./data')
# Load data
mndata.load_training()
mndata.load_testing()
# The number of pixels per side of all images
img_side = 28
# Each input is a raw vector.
# The number of units of the network
# corresponds to the number of input elements
n_mnist_pixels = img_side*img_side
Then you find a function to plot mnist images in a row of subplots:
In [3]:
# Set the maximum number of plots to be printed in a row
windows = 8
# A custom plot that uses imshow to draw a matrix
# x : array The matrix to be plotted
# fig : figure object Figure device to use
# window : int The current subplot position
# windows: int Number of subplot
def plot_img(x, fig, window, windows = windows) :
ax = fig.add_subplot(1, windows, window)
ax.imshow(x, interpolation = 'none',
aspect = 'auto', cmap=cm.Greys)
axis('off')
fig.canvas.draw()
and a function to reshape raw mnist data as squared matrices:
In [4]:
#-------------------------------------------------------------
# transform a raw input in an image matrix
# x: array the raw input vector
# return array a squared matrix
def to_mat(x) :
return x.reshape( img_side, img_side )
Finally you have some general use functions.
A list of transfer functions:
In [5]:
#-------------------------------------------------------------
# Add a bias unit to the input
def biased(x) :
return hstack([1,x])
#-------------------------------------------------------------
# step function
# return: 1 if x > 0
# 0 otherwise
def step(x) :
return 1.0*(x>0)
#-------------------------------------------------------------
# sigmoid function
# t float temperature
def sigmfun(x, t = 1.0) :
return 1.0/(1.0 + exp(-x/t))
# sigmoid derivative
def sigmder(y) :
return y*(1-y)
#-------------------------------------------------------------
# hyperbolic tangent function
# th float threshold
# alpha float amplitude
def tanhfun(x, t = 0.0, alpha = 1.0) :
return tanh(alpha*(x - th))
# hyperbolic tangent derivative
def tanhder(y) :
return 1-y**2
A function to build a dataset of points based on two lists (belonging/not belonging) centroids.
In [6]:
#-------------------------------------------------------------
# Create an array with 2-dimensional patterns belonging to two categories
# n_patterns : int Number of patterns
# std_deviation : float Standard deviation of noise
# centroids1 : 2-elements-vectors Points from which the
# list patterns belonging to
# the class are generated
# centroids2 : 2-elements-vectors Points from which the
# list patterns not belonging
# to the class are generated
# returns : array Each row contains a pattern as
# its first two elements and
# the group (belonging/not
# belonging/) as its third element
def build_dataset( n_patterns = 100,
std_deviation = 0.2,
centroids1 = [ [-1.2, 1.8], [-1.8, 1.2] ],
centroids2 = [ [-0.2, 0.4], [-0.8, -0.2 ] ] ) :
# Decide to which group patterns are from.
# First half belongs to the class
categories = arange(n_patterns)/(n_patterns/2)
# Each row of this array will contain a 2-element-wide
# input pattern plus an integer defining to which category
# the pattern belongs
data = zeros([n_patterns,3])
# Iterate the patterns to generate
for t in xrange(n_patterns) :
pattern = zeros(2)
if categories[t] > 0 :
index = int( rand()*len(centroids1) )
pattern = array(centroids1[index])
else :
index = int( rand()*len(centroids2) )
pattern = array(centroids2[index])
# Add noise to each element of the centroid
pattern += std_deviation*randn(2)
# Fill up data
data[t,:] = hstack([pattern, categories[t]])
return data
Next cell is just for styling
In [1]:
from IPython.core.display import HTML
def css_styling():
styles = open("../style/ipybn.css", "r").read()
return HTML(styles)
css_styling()
Out[1]: