In [2]:
from keras.layers import Convolution2D ,MaxPooling2D,Flatten
from keras.layers.core import Dense, Dropout, Activation
from sklearn.cross_validation import train_test_split
from keras.callbacks import History,Callback
from sklearn.metrics import classification_report
from sklearn.cross_validation import StratifiedKFold ,cross_val_score
from keras.models import model_from_config
from keras.models import Sequential
from keras.optimizers import SGD
from keras.utils import np_utils
import theano
import keras
import pickle
In [3]:
def cat2lab (cat):
'''only for binary category
#Args:
cat : binary categorical variable
'''
return np.array([0 if s[0] else 1 for s in cat])
In [4]:
img_pickle = open('d://labels_new.p')
lab_pickle = open('d://images_new.p')
labels = np.array(pickle.load(img_pickle))
imgs = np.array(pickle.load(lab_pickle))
In [5]:
reg_imgs = imgs /255
cat_labels = np_utils.to_categorical(labels,nb_classes=2)
In [6]:
reg_imgs_2d =[]
for img in reg_imgs:
reg_imgs_2d.append(np.reshape(img,(50,50)))
reg_imgs_2d = np.array(reg_imgs_2d)
In [7]:
#reshape to shape (1,50,50) for CNN
reg_imgs_3d =[]
for img in reg_imgs:
reg_imgs_3d.append(np.reshape(img,(1,50,50)))
reg_imgs_3d = np.array(reg_imgs_3d)
In [8]:
x_tr1,x_te1,y_tr1,y_te1 = train_test_split(reg_imgs,cat_labels,test_size= 0.2,random_state= 123)
x_tr2,x_te2,y_tr2,y_te2 = train_test_split(reg_imgs_2d,cat_labels,test_size= 0.2,random_state= 123)
x_tr3,x_te3,y_tr3,y_te3 = train_test_split(reg_imgs_3d,cat_labels,test_size= 0.2,random_state= 123)
x_trn1,x_ten1,y_trn1,y_ten1 = train_test_split(reg_imgs,labels,test_size= 0.2,random_state= 123)
In [9]:
model1 = Sequential()
model1.add(Dense(2500, input_dim=2500,init ='uniform'))
model1.add(Activation('relu'))
model1.add(Dense(2, activation="softmax"))
model1.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01,decay= 1e-6,momentum=0.1,nesterov=True))
In [10]:
%time hist1 = model1.fit(x_tr1, y_tr1, nb_epoch=300,validation_split=0.2 ,batch_size=50,show_accuracy=True,verbose=0)
In [11]:
model1.summary()
In [12]:
plt.plot(hist1.history['acc'],label='Accuracy on training set')
plt.plot(hist1.history['loss'],label='Loss on training set')
plt.plot(hist1.history['val_acc'],'--',label='Accuracy on validation set')
plt.plot(hist1.history['val_loss'],'--',label='Loss on validation set')
plt.grid('off')
plt.legend()
Out[12]:
In [13]:
model1.evaluate(x_te1,y_te1,batch_size=50,show_accuracy=True)
Out[13]:
In [14]:
y_pred1 = model1.predict_classes(x_te1,20)
y_pred1
Out[14]:
In [15]:
y_ten1 = cat2lab(y_te1)
y_ten1
Out[15]:
In [16]:
print(classification_report(y_ten1,y_pred1))
In [ ]:
In [17]:
model2 = Sequential()
model2.add(Convolution2D(10,10, 10, border_mode='same', input_shape=(1, 50, 50)))
model2.add(Activation('relu'))
# model2.add(Convolution2D(50, 5, 5,init='uniform'))
# model2.add(Activation('relu'))
model2.add(MaxPooling2D(pool_size=(2, 2)))
model2.add(Dropout(0.5))
model2.add(Convolution2D(10, 10, 10,init='uniform' ,border_mode='same'))
model2.add(Activation('relu'))
# model2.add(Convolution2D(100, 5, 5,init='uniform'))
# model2.add(Activation('relu'))
model2.add(MaxPooling2D(pool_size=(2, 2)))
model2.add(Dropout(0.3))
model2.add(Flatten())
model2.add(Dense(1250,init='uniform'))
model2.add(Activation('relu'))
model2.add(Dense(2,activation='softmax'))
model2.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01,decay=1e-6,
momentum=0.5,
nesterov=True))
In [18]:
%time hist2 = model2.fit(x_tr3, y_tr3, nb_epoch=300 , batch_size=50 ,validation_split=0.2, show_accuracy=True ,shuffle=True,verbose =1)
In [19]:
plt.figure(figsize=(25,15))
plt.plot(hist2.history['acc'],label='acc')
plt.plot(hist2.history['loss'],label='loss')
plt.plot(hist2.history['val_acc'],'--',label='val_acc')
plt.plot(hist2.history['val_loss'],'--',label='val_loss')
plt.legend()
plt.ylim(0,max(hist2.history['acc'])+0.05)
plt.grid('off')
In [20]:
model2.summary()
In [21]:
model2.evaluate(x_te3,y_te3,batch_size=50,show_accuracy=True,verbose=1)
Out[21]:
In [22]:
def plot_wegh (model):
'''
Plot weights of convolution layer
#Args
model : fitted model
'''
wegh_arr = model.get_weights()
num = len(wegh_arr[0])
if type(np.sqrt(num)) is int:
col = np.sqrt(num)
row = np.sqrt(num)
else:
col = int(num/2)
row = int(num/col)
fig ,axes = plt.subplots(row,col, subplot_kw={'xticks': [], 'yticks': []})
plt.subplots_adjust(hspace=0.02,wspace = 0.05)
for i, ax in zip(xrange(num),axes.flat):
ax.imshow(wegh_arr[0][i][0])
ax.grid('off')
plt.show()
In [23]:
plot_wegh(model2)
In [24]:
y_pred2 = model2.predict_classes(x_te3)
y_pred2
Out[24]:
In [25]:
y_ten2 = cat2lab(y_te3)
y_ten2
Out[25]:
In [26]:
print(classification_report(y_ten2,y_pred2))
In [31]:
from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC
In [32]:
cv = StratifiedKFold(labels,n_folds=10,shuffle=True)
In [33]:
params = {'C' : [1e1, 1e2, 1e3,1e4,1e5],
'gamma' : [0.0001,0.0005,0.001,0.005,0.01]}
In [34]:
clf_grid = GridSearchCV(SVC(kernel='rbf'),params,cv=cv)
In [35]:
model3 = clf_grid.fit(reg_imgs,labels)
In [36]:
model3.best_score_ , model3.best_params_
Out[36]:
In [37]:
#demostration of upper GridSearchCV method
svc_rslt = []
for x,y in cv:
clf = SVC(kernel='rbf',C=10.0,gamma = 0.005,)
clf.fit(reg_imgs[x],labels[x])
svc_rslt.append(clf.score(reg_imgs[y], labels[y]))
svc_rslt = np.array(svc_rslt)
svc_rslt
Out[37]:
In [38]:
print('cross valdated SVC score is ' , svc_rslt.mean())
In [39]:
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
In [40]:
ens1 = RandomForestClassifier(n_estimators = 250 , max_depth= None,verbose=1)
ens2 = AdaBoostClassifier(SVC(kernel='rbf',gamma=0.005,C = 10.0),
algorithm="SAMME",
n_estimators=100,
learning_rate=0.01)
ens3 = AdaBoostClassifier(DecisionTreeClassifier(max_depth=None),
algorithm="SAMME",
n_estimators=100,
learning_rate=0.01)
In [41]:
ens1.fit(x_trn1, y_trn1)
ens2.fit(x_trn1, y_trn1)
ens3.fit(x_trn1, y_trn1)
Out[41]:
In [42]:
ens1.score(x_ten1,y_ten1)
Out[42]:
In [43]:
ens2.score(x_ten1,y_ten1)
Out[43]:
In [44]:
ens3.score(x_ten1,y_ten1)
Out[44]: