In [1]:
%matplotlib inline
%cd ../
In [2]:
from pprint import pprint
import os
from os import path as ospath
from functools import reduce
import matplotlib.pyplot as plt
import numpy as np
from cvtools import io
from cvtools import spm
In [3]:
train_data_path = 'dataset/training'
test_data_path = 'dataset/testing'
save_data_dir = 'data'
In [4]:
# 导入训练数据数据
train_data = tuple(io.get_images_name(train_data_path,recursive=True))
train_images = io.load_image2ndarray(train_data)
train_labels = io.get_image_label_in_filename(train_data)
In [5]:
path_prefix = save_data_dir
filename_fmt = 'spm_%s_%d.pkl'# 第一个是使用的核函数,第二个是训练集大小
# 导入或构造SPM模型
def get_spm_model(svm_kernel,filename = 'spm.pkl'):
spm_model_path = ospath.join(path_prefix,filename)
spm_model = None
if ospath.exists(spm_model_path):
print("SPM Model of %s is existed,loading ..." % (svm_kernel,))
spm_model = io.load_data(path_prefix=path_prefix, filename=filename)
print("loading done!")
else:
print("Training SPM Model of %s ...." % (svm_kernel,))
spm_model = spm.SpatialPyramidMatch(train_images,train_labels,svm_kernel=svm_kernel)
spm_model_path = io.save_data(spm_model, path_prefix=path_prefix, filename=filename)
print("SPM Model Save Path:",spm_model_path)
return spm_model
In [6]:
# 性能评估方法
def classification(spm_model,test_data,test_images,test_labels,show_all=False):
answer = spm_model.predict_images(test_images)
output_labels = [ spm_model.get_label(ans) for ans in answer ]
correct = len(tuple(filter(lambda x:x[0]==x[1],zip(test_labels,output_labels))))
accuracy = correct / len(test_labels)
print("Kernel: %12s\taccuracy:%.2f%%(%d/%d)" % (spm_model.svm_kernel,accuracy*100,correct,len(test_labels)))
if show_all:
for path,target,output in zip(test_data,test_labels,output_labels):
if target != output:
print(test_data,test_labels,output_labels)
In [7]:
# 使用不同的核函数构造模型
spm_models = {}
svm_kernel = ('linear','poly','rbf','sigmoid','precomputed')
train_data_size = len(train_images)
def run_and_get_time(kernel,train_data_size):
spm_models[kernel] = get_spm_model(kernel,filename)
for kernel in svm_kernel:
filename = filename_fmt % (kernel,train_data_size)
%time run_and_get_time(kernel,train_data_size)
In [8]:
# 导入测试数据
test_data = tuple(io.get_images_name(test_data_path,recursive=True))
test_images = io.load_image2ndarray(test_data)
test_labels = io.get_image_label_in_filename(test_data)
In [9]:
# 测试
print("testing ...")
for kernel,model in spm_models.items():
classification(model,test_data,test_images,test_labels,show_all=False)