In [1]:
%matplotlib inline
%cd ../


E:\ProjectWorkplace\SpatialPyramidMatchExp

In [2]:
from pprint import pprint
import os
from os import path as ospath
from functools import reduce

import matplotlib.pyplot as plt
import numpy as np

from cvtools import io
from cvtools import spm

In [3]:
train_data_path = 'dataset/training'
test_data_path = 'dataset/testing'
save_data_dir = 'data'

In [4]:
# 导入训练数据数据
train_data = tuple(io.get_images_name(train_data_path,recursive=True))
train_images = io.load_image2ndarray(train_data)
train_labels = io.get_image_label_in_filename(train_data)

In [5]:
path_prefix = save_data_dir
filename_fmt = 'spm_%s_%d.pkl'# 第一个是使用的核函数,第二个是训练集大小
# 导入或构造SPM模型
def get_spm_model(svm_kernel,filename = 'spm.pkl'):
    spm_model_path = ospath.join(path_prefix,filename)
    spm_model = None
    if ospath.exists(spm_model_path):
        print("SPM Model of %s is existed,loading ..."  % (svm_kernel,))
        spm_model = io.load_data(path_prefix=path_prefix, filename=filename)
        print("loading done!")
    else:
        print("Training SPM Model of %s ...." % (svm_kernel,))
        spm_model = spm.SpatialPyramidMatch(train_images,train_labels,svm_kernel=svm_kernel) 
        spm_model_path = io.save_data(spm_model, path_prefix=path_prefix, filename=filename)
        print("SPM Model Save Path:",spm_model_path)
    return spm_model

In [6]:
# 性能评估方法
def classification(spm_model,test_data,test_images,test_labels,show_all=False):
    answer = spm_model.predict_images(test_images)
    output_labels = [ spm_model.get_label(ans) for ans in answer ]
    correct = len(tuple(filter(lambda x:x[0]==x[1],zip(test_labels,output_labels))))
    accuracy = correct / len(test_labels)
    print("Kernel: %12s\taccuracy:%.2f%%(%d/%d)" % (spm_model.svm_kernel,accuracy*100,correct,len(test_labels)))
    if show_all:
        for path,target,output in zip(test_data,test_labels,output_labels):
            if target != output:
                print(test_data,test_labels,output_labels)

In [7]:
# 使用不同的核函数构造模型
spm_models = {}
svm_kernel = ('linear','poly','rbf','sigmoid','precomputed')
train_data_size = len(train_images)
def run_and_get_time(kernel,train_data_size):
    spm_models[kernel] = get_spm_model(kernel,filename)
    
for kernel in svm_kernel:
    filename = filename_fmt % (kernel,train_data_size)
    %time run_and_get_time(kernel,train_data_size)


SPM Model of linear is existed,loading ...
loading done!
Wall time: 6 ms
Training SPM Model of poly ....
SPM Model Save Path: data\spm_poly_18.pkl
Wall time: 27.9 s
Training SPM Model of rbf ....
SPM Model Save Path: data\spm_rbf_18.pkl
Wall time: 28 s
Training SPM Model of sigmoid ....
SPM Model Save Path: data\spm_sigmoid_18.pkl
Wall time: 27 s
Training SPM Model of precomputed ....
SPM Model Save Path: data\spm_precomputed_18.pkl
Wall time: 26.9 s

In [8]:
# 导入测试数据
test_data = tuple(io.get_images_name(test_data_path,recursive=True))
test_images = io.load_image2ndarray(test_data)
test_labels = io.get_image_label_in_filename(test_data)

In [9]:
# 测试
print("testing ...")
for kernel,model in spm_models.items():
    classification(model,test_data,test_images,test_labels,show_all=False)


testing ...
Kernel:       linear	accuracy:29.17%(35/120)
Kernel:         poly	accuracy:16.67%(20/120)
Kernel:          rbf	accuracy:32.50%(39/120)
Kernel:      sigmoid	accuracy:30.00%(36/120)
Kernel:  precomputed	accuracy:32.50%(39/120)