In [66]:
%%bash
jupyter nbconvert --to html Training_Network_to_Idenfitying_HandPicked_Classes.ipynb && mv Training_Network_to_Idenfitying_HandPicked_Classes.html ../notebook_htmls/Training_Network_to_Idenfitying_HandPicked_Classes_v1.html
Input:
Output:
In [3]:
import sys
import os
sys.path.append(os.getcwd()+'/../')
# sklearn
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import permutation_test_score
# other
import numpy as np
import glob
import pandas as pd
import ntpath
# plotting
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
def perf_measure(y_actual, y_hat):
TP = 0
FP = 0
TN = 0
FN = 0
for i in range(len(y_hat)):
if y_actual[i]==y_hat[i]==1:
TP += 1
for i in range(len(y_hat)):
if (y_hat[i]==1) and (y_actual[i]!=y_hat[i]):
FP += 1
for i in range(len(y_hat)):
if y_actual[i]==y_hat[i]==0:
TN += 1
for i in range(len(y_hat)):
if (y_hat[i]==0) and (y_actual[i]!=y_hat[i]):
FN += 1
return(TP, FP, TN, FN)
In [4]:
# image folder
target_img_folder ='../data_img_classes/class_red_pink/'
In [5]:
# all the image folders
non_target_img_folders = ['../original_img/']
target_img_paths=glob.glob(target_img_folder+'*')
target_img_paths_stemless = [ntpath.basename(t) for t in target_img_paths]
non_target_img_paths =[]
for non_target_folder in non_target_img_folders:
for img_path in glob.glob(non_target_folder+'*'):
if ntpath.basename(img_path) not in target_img_paths_stemless: # remove targets from non-target list
non_target_img_paths.append(img_path)
# create data frame with image name and label
img_paths = np.append(target_img_paths,non_target_img_paths)
labels = np.append(np.ones(len(target_img_paths)),np.zeros(len(non_target_img_paths)))
df = pd.DataFrame(data=np.vstack((img_paths,labels)).T,columns=['img_path','label'])
df['img_name'] = df['img_path'].apply(lambda x: ntpath.basename(x)) # add image name
df['label'] = df['label'].apply(lambda x: float(x)) # add label
In [7]:
df.head()
Out[7]:
In [ ]:
# load up features per image
img_feature_df = pd.read_csv('../data_nn_features/img_features_all.csv',index_col=0)
img_feature_df.head()
In [13]:
# get target and non-target lists
def create_image_class_dataframe(target_img_folder):
# create feature matrix out of loaded up features.
for i,row in df.iterrows():
features = img_feature_df.loc[img_feature_df.img_name==row['img_name'],'nn_features'].as_matrix()[0].replace(']','').replace('[','').split(',')
features = [np.float(f) for f in features]
lab = row['img_name']
if i==0:
X = features
labs = lab
else:
X = np.vstack((X,features))
labs = np.append(labs,lab)
xcolumns = ['x'+str(i) for i in np.arange(X.shape[1])]
X_df = pd.DataFrame(np.hstack((labs[:,np.newaxis],X)),columns=['img_name']+xcolumns)
# merge together
df = df.merge(X_df,on='img_name')
# make sure there is only one instance per image in dataframe
lens = np.array([])
for img_name in df.img_name.unique():
lens = np.append(lens,len(df.loc[df.img_name==img_name]))
assert len(np.unique(lens)[:])==1
return(df)
In [14]:
# remove some non-targets to make dataset smaller #
# i_class0 = np.where(df.label==0.0)[0]
# i_class0_remove = np.random.choice(i_class0,int(np.round(len(i_class0)/1.1)))
# df_smaller = df.drop(i_class0_remove)
#df_smaller.to_csv('test.csv')
In [15]:
# image folder
target_img_folder ='../data_img_classes/class_horiztonal_striped/'
df = create_image_class_dataframe(target_img_folder)
df.head()
Out[15]:
In [31]:
print('target class')
plt.figure(figsize=(12,3))
for i in range(5):
img_path= df['img_path'][i]
img = image.load_img(img_path, target_size=(224, 224))
plt.subplot(1,5,i+1)
plt.imshow(img)
plt.grid(b=False)
In [35]:
xcolumns=['x'+str(i) for i in np.arange(2024)]
X = df.loc[:,xcolumns].as_matrix().astype('float')
y= df.loc[:,'label'].as_matrix().astype('float')
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,stratify=y,test_size=.33)
print(' training shape {0} \n testing shape {1}').format(X_train.shape,X_test.shape)
print('\n target/non-target \n (train) {0}\{1} \n (test) {2}\{3}').format(y_train.sum(),(1-y_train).sum(),y_test.sum(),(1-y_test).sum())
In [52]:
# classifiers
C = 1.0
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
clf_svm = sklearn.svm.SVC(C=C,kernel='linear')
In [53]:
clf_LR.fit(X_train, y_train)
clf_svm.fit(X_train, y_train)
Out[53]:
In [44]:
coef = clf_LR.coef_[0,:]
plt.figure(figsize=(12,3))
sns.set_style('white')
plt.scatter(np.arange(len(coef)),coef)
plt.xlabel('nnet feature')
plt.ylabel('LogReg coefficient')
sns.despine()
In [54]:
y_pred = clf_LR.predict(X_test)
(TP,FP,TN,FN) =perf_measure(y_test,y_pred)
print('TruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)
In [46]:
y_pred = clf_svm.predict(X_test)
(TP,FP,TN,FN) =perf_measure(y_test,y_pred)
print('TruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)
In [48]:
# from sklearn.model_selection import StratifiedKFold
# skf = StratifiedKFold(n_splits=5,shuffle=True)
# for train, test in skf.split(X, y):
# #print("%s %s" % (train, test))
# C=1.0
# clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
# clf_LR.fit(X[train], y[train])
# y_pred = clf_LR.predict(X[test])
# (TP,FP,TN,FN) =perf_measure(y[test],y_pred)
# print('\nTruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)
In [49]:
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
skf = StratifiedKFold(n_splits=5,shuffle=True)
score, permutation_scores, pvalue = permutation_test_score(
clf_LR, X, y, scoring="accuracy", cv=skf, n_permutations=100)
In [56]:
#
In [50]:
plt.hist(permutation_scores)
plt.axvline(score)
sns.despine()
plt.xlabel('accuracy')
print(pvalue)
In [58]:
# image folder
target_img_folder ='../data_img_classes/class_red_pink/'
df = create_image_class_dataframe(target_img_folder)
df.head()
Out[58]:
In [60]:
print('target class')
plt.figure(figsize=(12,3))
for i in range(5):
img_path= df['img_path'][i+1]
img = image.load_img(img_path, target_size=(224, 224))
plt.subplot(1,5,i+1)
plt.imshow(img)
plt.grid(b=False)
In [62]:
# split data
xcolumns=['x'+str(i) for i in np.arange(2024)]
X = df.loc[:,xcolumns].as_matrix().astype('float')
y= df.loc[:,'label'].as_matrix().astype('float')
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,stratify=y,test_size=.33)
print(' training shape {0} \n testing shape {1}').format(X_train.shape,X_test.shape)
print('\n target/non-target \n (train) {0}\{1} \n (test) {2}\{3}').format(y_train.sum(),(1-y_train).sum(),y_test.sum(),(1-y_test).sum())
In [63]:
# Train
clf_svm.fit(X_train, y_train)
# test
y_pred = clf_svm.predict(X_test)
(TP,FP,TN,FN) =perf_measure(y_test,y_pred)
print('TruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)
In [64]:
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
skf = StratifiedKFold(n_splits=5,shuffle=True)
score, permutation_scores, pvalue = permutation_test_score(
clf_LR, X, y, scoring="accuracy", cv=skf, n_permutations=100)
In [65]:
plt.hist(permutation_scores)
plt.axvline(score)
sns.despine()
plt.xlabel('accuracy')
plt.title('permutation test on test set classification')
print(pvalue)
In [1]:
%%bash
jupyter nbconvert --to html Training_Network_to_Idenfitying_HandPicked_Classes.ipynb && mv Training_Network_to_Idenfitying_HandPicked_Classes.html ../notebook_htmls/Training_Network_to_Idenfitying_HandPicked_Classes_v2.html
cp Training_Network_to_Idenfitying_HandPicked_Classes.ipynb ../notebook_versions/Training_Network_to_Idenfitying_HandPicked_Classes_v2.ipynb
In [ ]: