Prepare


In [106]:
import numpy as np
import cv2
from skimage.feature import local_binary_pattern
from algae_core import cmap
import pickle
import os, fnmatch

#lbp
radius = 3
cmax=640
rmax=480
hws=3
def get_image_pair(file_pair):
    """ """
    oim=cv2.imread(file_pair['raw'])#BGR
    im=cv2.resize(oim, (cmax,rmax),interpolation = cv2.INTER_CUBIC) 
       
    ocl=cv2.imread(file_pair['label'],0)
    cl=cv2.resize(ocl, (cmax,rmax),interpolation = cv2.INTER_NEAREST )
    #print cl.shape
    if cl is None:# not found
         cl=np.zeros( (im.shape[0],im.shape[1]),dtype=np.uint8 )
    
    print "raw: %s, label: %s"%(file_pair['raw'], file_pair['label'])
    
    im_grey=cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    lbp = local_binary_pattern( im_grey, 8 * radius, radius, 'uniform')
    #cv2.imshow("lbp", lbp)
    return cl,im,lbp

def show_overlay(cl,im):
    ol=im.copy() 
    for r in xrange(rmax):
        for c in xrange(cmax):
            if cl[r,c]:
                ol[r,c] = cmap[cl[r,c]-1]
    cv2.imshow("overlay", ol)

def get_xy(train_pairs):
    xrmax= ( (rmax-2*hws)*(cmax-2*hws)*len(train_pairs) )
    xcmax= 3+(2*hws)**2    
    x=np.zeros((xrmax,xcmax), dtype=np.uint8)
    
    yrmax= ( (rmax-2*hws)*(cmax-2*hws)*len(train_pairs) )
    y=np.zeros((yrmax), dtype=np.uint8)
    k=0
    for count,i in enumerate(train_pairs):    
        #print "------- processed %d from %d"%(count,len(train_pairs))
        cl,im,lbp=get_image_pair(i)
        #show_overlay(cl,im)
        if im.shape[0]!=rmax or im.shape[1]!=cmax or lbp.shape[0]!=rmax or lbp.shape[1]!=cmax:
            print "Error: image pair has missmatch size."
        for r in xrange(hws,lbp.shape[0]-hws):
            for c in xrange(hws,lbp.shape[1]-hws):
                xrow=np.concatenate([im[r,c],
                        lbp[r-hws:r+hws,c-hws:c+hws].reshape(1,-1)[0],])
                x[k,:]=xrow
                y[k]=cl[r,c]
                k+=1
    return x,y

In [107]:
def preprocess(dataset_path):
    raw_files=fnmatch.filter(os.listdir(dataset_path), '*.jpg')
    raw_list=[i[:-4] for i in raw_files]

    label_files=fnmatch.filter(os.listdir(dataset_path), '*.png')
    label_list={i[:-4]:None for i in label_files}

    train_pairs=[]
    for count,i in enumerate(raw_list):
        if i in label_list:
            train_pairs.append({
                'raw': os.path.join(dataset_path,i)+'.jpg',
                'label': os.path.join(dataset_path,i)+'.png',
            })

    with open(os.path.join(dataset_path,'train_pairs.obj'),'wb') as f:
        pickle.dump(train_pairs, f, pickle.HIGHEST_PROTOCOL)

    for e,i in enumerate(train_pairs):
        x,y = get_xy([i])
        with open(os.path.join(dataset_path,'%03d.xy'%e),'wb') as f:
            pickle.dump({'x':x, 'y':y }, f, pickle.HIGHEST_PROTOCOL)

Train


In [84]:
import numpy as np
import pickle
import os
from sklearn.cross_validation import cross_val_score
from sklearn.ensemble import ExtraTreesClassifier
from datetime import datetime
from sklearn.utils import shuffle

dataset_path=os.path.join('storage','training')
preprocess(dataset_path)

xy_files=fnmatch.filter(os.listdir(dataset_path), '*.xy')
xlist=[]
ylist=[]
for i in xy_files:
    with open(os.path.join(dataset_path,i),'rb') as f:
        xy = pickle.load(f)
        xlist.append( xy['x'] )
        ylist.append( xy['y'] )
        
x=np.concatenate(xlist, axis=0)
y=np.concatenate(ylist)

x, y = shuffle(x, y, random_state=datetime.now().second)

print "x.shape: %s, y.shape: %s"%(x.shape, y.shape)


x.shape: (15025800L, 39L), y.shape: (15025800L,)

In [85]:
clf_et = ExtraTreesClassifier(n_estimators=1, max_depth=30, n_jobs=-1,
    min_samples_split=1, random_state=datetime.now().second, 
    max_features=None, verbose=True)
scores = cross_val_score(clf_et, x, y, cv=5)
print scores
print "--Extra Tree: %s"%scores.mean()
print "--Training"
t=clf_et.fit( x, y)
fname=datetime.now().strftime("%m%d%H%M%S")+'.tree'
with open(os.path.join(dataset_path,fname),'wb') as f:
    pickle.dump(t,f, pickle.HIGHEST_PROTOCOL)

print '--Finished'


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  9.5min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  9.5min finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.3s finished
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  9.3min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  9.3min finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.3s finished
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  8.6min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  8.6min finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    1.2s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.2s finished
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  8.7min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  8.7min finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    1.4s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.4s finished
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  8.7min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  8.7min finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.3s finished
[ 0.96780008  0.96829656  0.96791086  0.96852778  0.96813743]
--Extra Tree: 0.968134542009
--Training
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 12.6min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 12.6min finished
--Finished

Combine tree


In [108]:
def combine_trees(et_a, et_b):
    et_a.estimators_ += et_b.estimators_
    et_a.n_estimators = len(et_a.estimators_)
    return et_a

In [109]:
t_list=fnmatch.filter(os.listdir(dataset_path), '*.tree')
tree_list=[]
for i in t_list:
    with open(os.path.join(dataset_path, i ),'rb') as f:
        tree_list.append(pickle.load(f))
forest = reduce(combine_trees, tree_list)
with open(os.path.join(dataset_path,'forest.pic'),'wb') as f:
    pickle.dump(forest,f, pickle.HIGHEST_PROTOCOL)

Predict


In [111]:
%matplotlib inline
import numpy as np
import os
from matplotlib import pyplot as plt
import pickle

dataset_path=os.path.join('storage','training')
with open(os.path.join(dataset_path,'forest.pic'),'rb') as f:
    forest = pickle.load(f)
    
dataset_path=os.path.join('storage','evaluation')
preprocess(dataset_path)

cmap=np.array([  
    ( 0  , 255, 255,  ),
    ( 14 , 127, 255,  ),
    ( 44 , 160, 44 ,  ),
    ( 40 , 39 , 214,  ),
    ( 0  , 0  , 255,  ),
    ( 0  , 255, 0  ,  ),
    ( 194, 119, 227,  ),
    ( 255, 0  , 0  ,  ),
    ( 34 , 189, 188,  ),
    ( 207, 190, 23 ,  ),])

with open(os.path.join(dataset_path,'train_pairs.obj'),'rb') as f:
    train_pairs = pickle.load(f)

#i=train_pairs[1]

In [112]:
plt.rcParams['figure.figsize'] = (8.0, 6.0)
for i in train_pairs:
    x,y=get_xy( [i] )
    oim=cv2.imread(i['raw'])#BGR
    im=cv2.resize(oim, (cmax,rmax),interpolation = cv2.INTER_CUBIC) 
    cl_p=forest.predict( x )
    cl_p.resize((rmax-2*hws,cmax-2*hws))
    ol=im.copy()
    for r in xrange(rmax-2*hws):
        for c in xrange(cmax-2*hws):
            predicting_class=cl_p[r,c]
            if predicting_class>0:
                ol[r+hws,c+hws] = cmap[ (predicting_class+9)%10 ]
    ol_rgb = cv2.cvtColor(ol, cv2.COLOR_BGR2RGB)
    plt.imshow(ol_rgb)
    plt.show()


[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished