Import


In [419]:
%matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import cv2
from skimage.feature import local_binary_pattern
#from algae_core import cmap
import pickle
import os, fnmatch
from scipy import ndimage as ndi
from skimage.util import img_as_float
from skimage.filters import gabor_kernel


#lbp
radius = 5
cmax=640
rmax=480
hws= 15

def get_image_pair(file_pair):
    """ """
    oim=cv2.imread(file_pair['raw'])#BGR
    im=cv2.resize(oim, (cmax,rmax),interpolation = cv2.INTER_CUBIC) 
       
    ocl=cv2.imread(file_pair['label'],0)
    cl=cv2.resize(ocl, (cmax,rmax),interpolation = cv2.INTER_NEAREST )
    #print cl.shape
    if cl is None:# not found
         cl=np.zeros( (im.shape[0],im.shape[1]),dtype=np.uint8 )
    
    print "raw: %s, label: %s"%(file_pair['raw'], file_pair['label'])
    
    im_grey=img_as_float( cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) )
    #lbp = local_binary_pattern( im_grey, 8 * radius, radius, 'uniform')
    #cv2.imshow("lbp", lbp)
    return cl,im,im_grey#,lbp

def show_overlay(cl,im):
    ol=im.copy() 
    for r in xrange(rmax):
        for c in xrange(cmax):
            if cl[r,c]:
                ol[r,c] = cmap[cl[r,c]-1]
    cv2.imshow("overlay", ol)

kernels = []
for theta in range(8):
    theta = theta / 4. * np.pi
    for sigma in (1, 3, 5 , 7):
        for frequency in (0.05, 0.25, 0.50, 0.75):
            kernel = np.real(gabor_kernel(frequency, theta=theta,
                                          sigma_x=sigma, sigma_y=sigma))
            kernels.append(kernel)
    
def cf(image, kernels):
    ft = np.zeros((len(kernels), image.shape[0], image.shape[1]), dtype=np.double)
    for k, kernel in enumerate(kernels):
        ft[k,:,:] = ndi.convolve(image, kernel, mode='wrap')
    return ft

def get_xy(train_pairs):
    m=len(kernels)
    n=rmax*cmax
    x=np.zeros((n,m), dtype=np.float32)
    y=np.zeros((n), dtype=np.uint8)
    for count,i in enumerate(train_pairs):    
        #print "------- processed %d from %d"%(count,len(train_pairs))
        cl,im,im_grey=get_image_pair(i)
        ft=cf(im_grey,kernels)
        if im.shape[0]!=rmax or im.shape[1]!=cmax:
            print "Error: image pair has missmatch size."
        a=ft.reshape(m,-1,1)[:,:,0]
        x[count*n:(count+1)*n, :]=a.T
        y[count*n:(count+1)*n]=cl.reshape(1,-1)
        print "get_xy::y.max(): %s"%y.max()
    return x,y

def preprocess(dataset_path):
    raw_files=fnmatch.filter(os.listdir(dataset_path), '*.jpg')
    raw_list=[i[:-4] for i in raw_files]

    label_files=fnmatch.filter(os.listdir(dataset_path), '*.png')
    label_list={i[:-4]:None for i in label_files}

    train_pairs=[]
    for count,i in enumerate(raw_list):
        if i in label_list:
            train_pairs.append({
                'raw': os.path.join(dataset_path,i)+'.jpg',
                'label': os.path.join(dataset_path,i)+'.png',
            })

    with open(os.path.join(dataset_path,'train_pairs.obj'),'wb') as f:
        pickle.dump(train_pairs, f, pickle.HIGHEST_PROTOCOL)

    for e,i in enumerate(train_pairs):
        x,y = get_xy([i])
        print "preprocess::y.max(): %s"%y.max()
        with open(os.path.join(dataset_path,'%03d.xy'%e),'wb') as f:
            pickle.dump({'x':x, 'y':y }, f, pickle.HIGHEST_PROTOCOL)


Using matplotlib backend: Qt4Agg

1. Prepare


In [420]:
## loading xy
import numpy as np
import pickle
import os
from sklearn.cross_validation import cross_val_score
from sklearn.ensemble import ExtraTreesClassifier
from datetime import datetime
from sklearn.utils import shuffle

dataset_path=os.path.join('storage','training')
preprocess(dataset_path)

xy_files=fnmatch.filter(os.listdir(dataset_path), '*.xy')
xlist=[]
ylist=[]
for i in xy_files:
    with open(os.path.join(dataset_path,i),'rb') as f:
        xy = pickle.load(f)
        xlist.append( xy['x'] )
        ylist.append( xy['y'] )


raw: storage\training\1.jpg, label: storage\training\1.png
get_xy::y.max(): 1
preprocess::y.max(): 1
raw: storage\training\2.jpg, label: storage\training\2.png
get_xy::y.max(): 1
preprocess::y.max(): 1

1.1 Sampling


In [431]:
x=np.concatenate(xlist, axis=0)
y=np.concatenate(ylist)

x, y = shuffle(x, y, random_state=datetime.now().second)
n=int(x.shape[0]*0.3)
x=x[:n,:]
y=y[:n]
print "x.shape: %s, y.shape: %s"%(x.shape, y.shape)


x.shape: (184320L, 128L), y.shape: (184320L,)

2. Train


In [432]:
clf_et = ExtraTreesClassifier(n_estimators=1, max_depth=30, n_jobs=-1,
    min_samples_split=1, random_state=datetime.now().second, 
    max_features=None, verbose=True)

#scores = cross_val_score(clf_et, x, y, cv=5)
#print scores
#print "--Extra Tree: %s"%scores.mean()

print "--Training"
t=clf_et.fit( x, y)
fname=datetime.now().strftime("%m%d%H%M%S")+'.tree'
with open(os.path.join(dataset_path,fname),'wb') as f:
    pickle.dump(t,f, pickle.HIGHEST_PROTOCOL)

print '--Finished'


--Training
--Finished
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   10.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   10.3s finished

3. Combine tree


In [433]:
import os, fnmatch
import pickle

dataset_path=os.path.join('storage','training')
def combine_trees(et_a, et_b):
    et_a.estimators_ += et_b.estimators_
    et_a.n_estimators = len(et_a.estimators_)
    return et_a

t_list=fnmatch.filter(os.listdir(dataset_path), '*.tree')
tree_list=[]
for i in t_list:
    with open(os.path.join(dataset_path, i ),'rb') as f:
        tree_list.append(pickle.load(f))
forest = reduce(combine_trees, tree_list)
with open(os.path.join(dataset_path,'forest.pic'),'wb') as f:
    pickle.dump(forest,f, pickle.HIGHEST_PROTOCOL)
    
print "combining %s"%t_list
print "to generate %s"%fnmatch.filter(os.listdir(dataset_path), '*.pic')


combining ['0307012723.tree']
to generate ['forest.pic']

4. Predict


In [434]:
import numpy as np
import os
import pickle

dataset_path=os.path.join('storage','training')
with open(os.path.join(dataset_path,'forest.pic'),'rb') as f:
    forest = pickle.load(f)
    
dataset_path=os.path.join('storage','evaluation')
preprocess(dataset_path)

cmap=np.array([  
    ( 0  , 255, 255,  ),
    ( 14 , 127, 255,  ),
    ( 44 , 160, 44 ,  ),
    ( 40 , 39 , 214,  ),
    ( 0  , 0  , 255,  ),
    ( 0  , 255, 0  ,  ),
    ( 194, 119, 227,  ),
    ( 255, 0  , 0  ,  ),
    ( 34 , 189, 188,  ),
    ( 207, 190, 23 ,  ),])

with open(os.path.join(dataset_path,'train_pairs.obj'),'rb') as f:
    train_pairs = pickle.load(f)

#i=train_pairs[1]


raw: storage\evaluation\3_predict.jpg, label: storage\evaluation\3_predict.png
get_xy::y.max(): 1
preprocess::y.max(): 1

In [572]:
plt.rcParams['figure.figsize'] = (12.0, 9.0)
for i in train_pairs:
    x,y=get_xy( [i] )
    oim=cv2.imread(i['raw'])#BGR
    im=cv2.resize(oim, (cmax,rmax),interpolation = cv2.INTER_CUBIC) 
    cl_p=forest.predict( x )
    cl_p.resize((rmax,cmax))
    ol=im.copy()
    for r in xrange(rmax):
        for c in xrange(cmax):
            predicting_class=cl_p[r,c]
            if predicting_class>0:
                ol[r,c] = cmap[ predicting_class-1 ]
    ol_rgb = cv2.cvtColor(ol, cv2.COLOR_BGR2RGB)
    plt.imshow(ol_rgb)
    plt.show()


raw: storage\evaluation\3_predict.jpg, label: storage\evaluation\3_predict.png
get_xy::y.max(): 1
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished

In [573]:
from skimage import filters
a=filters.gaussian_filter(cl_p.astype(float), sigma=0.3*radius)
plt.imshow(a)


Out[573]:
<matplotlib.image.AxesImage at 0x51490a58>

In [574]:
from skimage.feature import blob_doh
blobs = blob_doh(a, max_sigma=3*radius, threshold=.001)

In [575]:
from skimage.draw import circle_perimeter_aa
ol2=cv2.cvtColor(im.copy(), cv2.COLOR_BGR2RGB)
for b in blobs:
    rr, cc, val = circle_perimeter_aa(b[0] , b[1] , b[2] ,shape=(rmax,cmax))
    ol2[rr, cc, 1] = val * 255       
plt.imshow(ol2)


Out[575]:
<matplotlib.image.AxesImage at 0x5256d588>

In [ ]: