This ipynb shows the results of shapes classification

  • labels:
    • circle --> 0
    • rectangle --> 1
    • triangle --> 2

In [1]:
import numpy as np
import os, sys
import matplotlib.pyplot as plt
from pylab import *
import glob
import collections
import random
import math
from PIL import Image, ImageDraw
%matplotlib inline


caffe_root = '../../../'
import caffe
from caffe import layers as L, params as P

In [2]:
## define workspace paramsworkspace
workspace='examples/mywork/shape/'

In [3]:
## params setting
Numtrain = 6000
Numval = 1000
Numtest = 2000

analysis function definition


In [4]:
def calcu_loss_acc(net, batch_size = 1, Numval = 0):
    ''' calculate the average loss and accuracy of dataset (default: validation dataset)
        
        input  solver(Caffe solver)
               batch_size
               Numval(default = 0)
        
        return avg_loss, 
               avg_accuracy
    '''
    
    # batch_size = net.blobs['data'].num
    _loss = []
    _accuracy = []
    for i in range(Numval/batch_size):
        rs = net.forward()
        #print rs
        _loss.append(rs['loss'].tolist())
        _accuracy.append(rs['accuracy'].tolist())
    
    avg_loss = mean(_loss)
    avg_accuracy = mean(_accuracy)
#     print 'avg_loss: ', avg_loss, 'avg_accuracy ', avg_accuracy
    
    return avg_loss, avg_accuracy

In [5]:
def calcu_ave_acc(model_def, model_weights, Numdata):
    net = None
    caffe.set_mode_gpu()
    net = caffe.Net(model_def,      # defines the structure of the model
                    model_weights,  # contains the trained weights
                    caffe.TEST)     # use test mode (e.g., don't perform dropout)
    
    
    batch_size = net.blobs['data'].num
    tlab_result = np.array([])
    ground_truths = np.array([])
    for i in range(Numdata/batch_size):
        rs = net.forward()
        tlab_result = np.append(tlab_result, rs['prob'].argmax(1))
        ground_truths = np.append(ground_truths, net.blobs['label'].data)
    
    ave_acc = float(sum(tlab_result == ground_truths))/float(len(tlab_result))
    print "average accuracy: ", ave_acc
    return ave_acc, tlab_result, ground_truths

In [6]:
## calculate tp, tn, fp, fn
def acc_prec_recall(tlab_result, ground_truths):

    types = set(tlab_result)
    if len(tlab_result) != len(ground_truths):
        assert len(tlab_result) == len(ground_truths), 'The length of predicted results and ground truth labels are not match!'

    N = len(tlab_result) # N = tp + tn + fp + fn

    precision = []
    recall = []

    for _type in types:


        ind_tlab = np.where(tlab_result == _type)[0]             # 37
        ind_truth = np.where(ground_truths == _type)[0]          # 39
        ind_flab = np.where(tlab_result != _type)[0]             # 163
        ind_false = np.where(ground_truths != _type)[0]          # 161


        tp_list = [i for i in ind_tlab if i in ind_truth]
        fp_list = [i for i in ind_tlab if i not in ind_truth]
        tn_list = [i for i in ind_flab if i in ind_false]
        fn_list = [i for i in ind_flab if i not in ind_false]
        tp = float(len(tp_list))
        fp = float(len(fp_list))
        tn = float(len(tn_list))
        fn = float(len(fn_list))


        precision.append(tp/(tp+fp))
        recall.append(tp/(tp+fn))

    return precision, recall

In [7]:
## change the work root !!!
os.chdir(caffe_root)
print "the work root now is: ", os.getcwd()


the work root now is:  /media/jupiter/mengnan/caffe-master

3 types of shapes

  • average accuracy: validation

In [8]:
## params setting
val_net_path = "train_val_5layers_bn_shape.prototxt"
test_net_path = "train_test_5layers_bn_shape.prototxt"
deploy_net_path = "deploy_5layers_bn_shape.prototxt"
caffemodel_path = "model/5layers_bn_iter_1200.caffemodel"
model_val = os.path.join(workspace + val_net_path)
model_test = os.path.join(workspace + test_net_path)
model_deploy = os.path.join(workspace + deploy_net_path)
model_weights = os.path.join(workspace + caffemodel_path)

In [9]:
## calculation val
ave_acc, tlab_result, ground_truths = calcu_ave_acc(model_val, model_weights, Numval)
print 'The val images accuracy is: ',ave_acc


average accuracy:  0.947
The val images accuracy is:  0.947
  • precision and recall of each cell type: validation

In [10]:
cell_types = ['circle', 'rectangle', 'triangle']

precision, recall = acc_prec_recall(tlab_result, ground_truths)
print 'precision: ', zip(cell_types, precision)
print 'recall:    ', zip(cell_types, recall)


precision:  [('circle', 0.9749216300940439), ('rectangle', 0.9721362229102167), ('triangle', 0.8994413407821229)]
recall:     [('circle', 0.886039886039886), ('rectangle', 0.9843260188087775), ('triangle', 0.9757575757575757)]
  • average accuracy: test

In [11]:
## calculation test
ave_acc, tlab_result, ground_truths = calcu_ave_acc(model_test, model_weights, Numtest)
print 'The test images accuracy is: ',ave_acc


average accuracy:  0.935
The test images accuracy is:  0.935

In [12]:
cell_types = ['circle', 'rectangle', 'triangle']

precision, recall = acc_prec_recall(tlab_result, ground_truths)
print 'precision: ', zip(cell_types, precision)
print 'recall:    ', zip(cell_types, recall)


precision:  [('circle', 0.9668874172185431), ('rectangle', 0.966412213740458), ('triangle', 0.8812415654520918)]
recall:     [('circle', 0.8755622188905547), ('rectangle', 0.9518796992481203), ('triangle', 0.9775449101796407)]

now we test the network with a generated shape

  • The following part shows the network performance for classifying a randomly generated shape. We first generate three types of shapes (circle, rectangle, triangle) and then classify them individually and get the result.

  • remember our label assignments are:

    1. circle --> 0
    2. rectangle --> 1
    3. triangle --> 2

random shape generation functions definition


In [13]:
def draw_triangle(img, cxy, r, fill, fuzzy=0):
    x, y = cxy[0], cxy[1]
    ax = x
    ay = y-r
    bx = x + math.floor(r*math.cos(math.pi/6))
    by = y+math.floor(r*math.sin(math.pi/6))
    cx = x - math.floor(r*math.cos(math.pi/6))
    cy = y+math.floor(r*math.sin(math.pi/6))
    if fuzzy > 0:
        ax = math.floor(ax * random.uniform(fuzzy,1.0))
        ay = math.floor(ay * random.uniform(fuzzy,1.0))
        bx = math.floor(bx * random.uniform(fuzzy,1.0))
        by = math.floor(by * random.uniform(fuzzy,1.0))
        cx = math.floor(cx * random.uniform(fuzzy,1.0))
        cy = math.floor(cy * random.uniform(fuzzy,1.0))

    pts = [(ax, ay), (bx, by), (cx, cy)]
    draw = ImageDraw.Draw(img)
    draw.polygon(pts, fill, outline=None)

In [14]:
def draw_circle(img, cxy, r, fill, bb=False, fuzzy=0):
    draw = ImageDraw.Draw(img)
    tlx=cxy[0]-r
    tly=cxy[1]-r
    brx=cxy[0]+r
    bry=cxy[1]+r
    if fuzzy > 0:
        tlx=math.floor(tlx * random.uniform(fuzzy,1.0))
        tly=math.floor(tly * random.uniform(fuzzy,1.0))
        brx=math.floor(brx * random.uniform(fuzzy,1.0))
        bry=math.floor(bry * random.uniform(fuzzy,1.0))
        #print ("Fuzzy is set. (%d,%d,%d,%d) => (%d,%d,%d,%d)" %
        #       (cxy[0]-r,cxy[1]-r,cxy[0]+r,cxy[1]+r, tlx, tly, brx, bry))
    if bb:
        draw.rectangle([tlx, tly, brx, bry],fill,outline=None)
    else:
        draw.ellipse([tlx, tly, brx, bry],fill,outline=None)
    del draw

In [15]:
def BgColor():
    # fixed color: white
    bcA = 225
    # fixed color: grey
    bcB = 150
    # fixed color: darkgrey
    bcC = 80
    # pick one, or use a fixed one:
    #bgcolor = bcA
    bgcolor = random.choice([bcA, bcB, bcC])
    
    return bgcolor

def FgColor():
    fgcolor = random.randint(0,250)
    #fgcolor = 100
    return fgcolor

In [16]:
def GenShapeFA(shape, crmin, crmax, n, size, clipok, fuzzy=0):
    for i in range(0,n):
        bgcolor = BgColor()
        img = Image.new('L', (size,size), bgcolor)
        imgcx = size/2
        imgcy = size/2
        if clipok:
            cdelta = size/2
        else:
            cdelta = size/2 - crmax
        cx = imgcx + random.randint(-cdelta,cdelta)
        cy = imgcy + random.randint(-cdelta,cdelta)
        if clipok:
            # this line may clip
            r = random.randint(crmin,crmax)
        else:
            # I want r to always fall within image boundary
            maxr_noclip = min([cx, size-cx, cy, size-cy])
            rmax = min([crmax, maxr_noclip])
            rmin = min([crmin, rmax])
            r = random.randint(rmin,rmax)
        
        fgcolor = FgColor()
        if shape == "rectangle":
            prefix="s"
            draw_circle(img, (cx, cy), r, fgcolor, bb=True, fuzzy=fuzzy)
        elif shape == "circle":
            prefix="c"
            draw_circle(img, (cx, cy), r, fgcolor, bb=False, fuzzy=fuzzy)
        elif shape == "triangle":
            prefix="t"
            draw_triangle(img, (cx, cy), r, fgcolor, fuzzy=fuzzy)
        elif shape == "arc":
            prefix="a"
            draw_arc(img, (cx, cy), r, fgcolor, fuzzy=fuzzy)
            
        # img.save(outdir + "/" + prefix + "_%04d_%03d_%03d" % (i,fgcolor,bgcolor) + ".jpg")
    return img

parameters setting

  • #### parameters

In [27]:
size=305
clipok=False
    
# shapes are defined relative to a bounding/inscribing circle
crmin=40
crmax=70
nrcircle=1
fuzziness=0.8

classes = ['circle', 'rectangle', 'triangle']
  • #### load model

In [18]:
net = None
caffe.set_mode_gpu()

net = caffe.Net(model_deploy,   # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)     # use test mode (e.g., don't perform dropout)

In [19]:
# set the input shape
net.blobs['data'].reshape(1,        # batch size
                          3,         # 3-channel (BGR) images
                          305, 305)  # image size is 305x305
  • #### generate random shape

    1. we first generate the shape and
    2. then change the format to fit the caffe requirement
      • format: float32
      • color channel: BGR

In [23]:
img = GenShapeFA("rectangle", crmin, crmax, nrcircle, size, clipok, fuzzy=fuzziness)
img = np.asarray(img)
img = np.tile(img, (3,1,1))
img = img.astype('float32')/255
img = img.transpose(1,2,0)
plt.imshow(img)
plt.axis("off")
print img.shape


(305, 305, 3)
  • change the color channel to BGR

In [24]:
# load the mean for subtraction
mu = np.load(os.path.join(workspace, 'data/shape_mean.npy'))
mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel values
print 'mean-subtracted values:', zip('BGR', mu)

# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
transformer.set_mean('data', mu)            # subtract the dataset-mean value in each channel
transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR


mean-subtracted values: [('B', 158.98750144903136), ('G', 158.98750144903136), ('R', 158.98750144903136)]

In [25]:
transformed_image = transformer.preprocess('data', img)

In [28]:
# copy the image data into the memory allocated for the net
net.blobs['data'].data[...] = transformed_image

### perform classification
output = net.forward()

output_prob = output['prob'][0]  # the output probability vector for the first image in the batch

print 'predicted class is:', classes[output_prob.argmax()]


predicted class is: rectangle

Next, we integrate all the process into a function and make it clear


In [30]:
def generate_shape(choice):
    img = GenShapeFA(choice, crmin, crmax, nrcircle, size, clipok, fuzzy=fuzziness)
    img = np.asarray(img)
    img = np.tile(img, (3,1,1))
    img = img.astype('float32')/255
    img = img.transpose(1,2,0)
    plt.imshow(img)
    plt.axis("off")
    
    return img

In [32]:
def classify_shape(transformer, img): 
    transformed_image = transformer.preprocess('data', img)
    # copy the image data into the memory allocated for the net
    net.blobs['data'].data[...] = transformed_image

    ### perform classification
    output = net.forward()

    output_prob = output['prob'][0]  # the output probability vector for the first image in the batch

    print 'predicted class is:', classes[output_prob.argmax()]
    
    return

see the classification performance !!!

  • circle

In [39]:
img = generate_shape("circle")
classify_shape(transformer, img)


predicted class is: circle

In [40]:
img = generate_shape("circle")
classify_shape(transformer, img)


predicted class is: circle

In [41]:
img = generate_shape("circle")
classify_shape(transformer, img)


predicted class is: circle
  • rectangle

In [42]:
img = generate_shape("rectangle")
classify_shape(transformer, img)


predicted class is: rectangle

In [43]:
img = generate_shape("rectangle")
classify_shape(transformer, img)


predicted class is: rectangle

In [45]:
img = generate_shape("rectangle")
classify_shape(transformer, img)


predicted class is: rectangle
  • triangle

In [46]:
img = generate_shape("triangle")
classify_shape(transformer, img)


predicted class is: triangle

In [47]:
img = generate_shape("triangle")
classify_shape(transformer, img)


predicted class is: triangle

In [48]:
img = generate_shape("triangle")
classify_shape(transformer, img)


predicted class is: triangle

wrong classification examples

  • sometimes the model output the wrong answer, since the whole accuracy cannot achieve 100 percent. like what I showed below. And actually each w

In [54]:
img = generate_shape("triangle")
classify_shape(transformer, img)


predicted class is: circle

In [57]:
img = generate_shape("triangle")
classify_shape(transformer, img)


predicted class is: circle

In [78]:
img = generate_shape("rectangle")
classify_shape(transformer, img)


predicted class is: circle

In [97]:
img = generate_shape("circle")
classify_shape(transformer, img)


predicted class is: triangle