In [66]:

    
%%bash 
jupyter nbconvert --to html Training_Network_to_Idenfitying_HandPicked_Classes.ipynb && mv Training_Network_to_Idenfitying_HandPicked_Classes.html ../notebook_htmls/Training_Network_to_Idenfitying_HandPicked_Classes_v1.html









    



[NbConvertApp] Converting notebook Training_Network_to_Idenfitying_HandPicked_Classes.ipynb to html
[NbConvertApp] Writing 640614 bytes to Training_Network_to_Idenfitying_HandPicked_Classes.html

Overview: Training Network for Useful Features.

Input:

set of images that match along some interpretable feature. (e.g. striped dress)
a whole bunch of images that don't match
set of image features

Output:

estimates and saves out weights for those neural network features to predict the interpreable feature class
- regularized logisitic regression or other classifiers.



In [3]:

    
import sys 
import os
sys.path.append(os.getcwd()+'/../')

# sklearn
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import permutation_test_score

# other
import numpy as np
import glob
import pandas as pd
import ntpath

# plotting
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline



In [3]:

    
def perf_measure(y_actual, y_hat):
    TP = 0
    FP = 0
    TN = 0
    FN = 0

    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==1:
           TP += 1
    for i in range(len(y_hat)): 
        if (y_hat[i]==1) and (y_actual[i]!=y_hat[i]):
           FP += 1
    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==0:
           TN += 1
    for i in range(len(y_hat)): 
        if (y_hat[i]==0) and (y_actual[i]!=y_hat[i]):
           FN += 1

    return(TP, FP, TN, FN)

Predicting Own Labels from Selected Images

within a folder (find class 1, class 0).
(split into test train)
get matrix of img X features X class
fit logistic regression (or other classifier)
assess test set-fit.
html (sample images used to define class; top and bottom predictions from test-set.



In [4]:

    
# image folder 
target_img_folder ='../data_img_classes/class_red_pink/'



In [5]:

    
# all the image folders
non_target_img_folders = ['../original_img/']

target_img_paths=glob.glob(target_img_folder+'*')
target_img_paths_stemless = [ntpath.basename(t) for t in target_img_paths]
non_target_img_paths =[]
for non_target_folder in non_target_img_folders:
    for img_path in glob.glob(non_target_folder+'*'):
        if ntpath.basename(img_path) not in target_img_paths_stemless: # remove targets from non-target list
            non_target_img_paths.append(img_path)

# create data frame with image name and label
img_paths = np.append(target_img_paths,non_target_img_paths)
labels = np.append(np.ones(len(target_img_paths)),np.zeros(len(non_target_img_paths)))
df = pd.DataFrame(data=np.vstack((img_paths,labels)).T,columns=['img_path','label']) 
df['img_name'] = df['img_path'].apply(lambda x: ntpath.basename(x)) # add image name
df['label'] = df['label'].apply(lambda x: float(x)) # add label



In [7]:

    
df.head()









    Out[7]:







  
    
      
      img_path
      label
      img_name
    
  
  
    
      0
      ../data_img_classes/class_red_pink/ANGEL-62140...
      1.0
      ANGEL-6214020T0805-5.jpg
    
    
      1
      ../data_img_classes/class_red_pink/ANGEL-62140...
      1.0
      ANGEL-621402220501-5.jpg
    
    
      2
      ../data_img_classes/class_red_pink/ANGELCITIZ-...
      1.0
      ANGELCITIZ-621308290602-5.jpg
    
    
      3
      ../data_img_classes/class_red_pink/Bai-B520N01...
      1.0
      Bai-B520N015-5.jpg
    
    
      4
      ../data_img_classes/class_red_pink/BAIYI-B1008...
      1.0
      BAIYI-B1008N289-5.jpg



In [ ]:

    
# load up features per image
img_feature_df = pd.read_csv('../data_nn_features/img_features_all.csv',index_col=0)
img_feature_df.head()



In [13]:

    
# get target and non-target lists

def create_image_class_dataframe(target_img_folder):


    # create feature matrix out of loaded up features. 
    for i,row in df.iterrows():
        features = img_feature_df.loc[img_feature_df.img_name==row['img_name'],'nn_features'].as_matrix()[0].replace(']','').replace('[','').split(',')
        features = [np.float(f) for f in features]
        lab = row['img_name']
        if i==0:
            X = features
            labs = lab
        else:
            X = np.vstack((X,features))
            labs = np.append(labs,lab)

    xcolumns = ['x'+str(i) for i in np.arange(X.shape[1])]
    X_df = pd.DataFrame(np.hstack((labs[:,np.newaxis],X)),columns=['img_name']+xcolumns)

    # merge together 
    df = df.merge(X_df,on='img_name')
    
    # make sure there is only one instance per image in dataframe
    lens = np.array([])
    for img_name in df.img_name.unique():
        lens = np.append(lens,len(df.loc[df.img_name==img_name]))


    assert len(np.unique(lens)[:])==1
    
    return(df)



In [14]:

    
# remove some non-targets to make dataset smaller #
# i_class0 = np.where(df.label==0.0)[0]
# i_class0_remove = np.random.choice(i_class0,int(np.round(len(i_class0)/1.1)))
# df_smaller = df.drop(i_class0_remove)
#df_smaller.to_csv('test.csv')

Horizontal Striped Data



In [15]:

    
# image folder 
target_img_folder ='../data_img_classes/class_horiztonal_striped/'
df = create_image_class_dataframe(target_img_folder)
df.head()









    Out[15]:







  
    
      
      img_path
      label
      img_name
      x0
      x1
      x2
      x3
      x4
      x5
      x6
      ...
      x2038
      x2039
      x2040
      x2041
      x2042
      x2043
      x2044
      x2045
      x2046
      x2047
    
  
  
    
      0
      ../data_img_classes/class_horiztonal_striped/E...
      1.0
      EUROMODA-U125256-39-5.jpg
      0.080648147
      0.0092789298
      0.0014473638
      0.79745281
      0.1980352
      0.0092308726
      1.1527375
      ...
      0.010522826
      0.52378851
      0.0
      4.1638546
      0.0
      0.0023334951
      1.5376362
      0.41636777
      0.0
      0.37979186
    
    
      1
      ../data_img_classes/class_horiztonal_striped/E...
      1.0
      EUROMODA-U125267-79-5.jpg
      0.012774257
      1.0192471
      0.13633534
      0.7930606
      0.41112542
      0.0
      1.4454148
      ...
      0.36394235
      0.0
      0.035090849
      2.28038
      0.12486018
      0.0
      1.5214183
      0.35948351
      0.037030876
      0.047698129
    
    
      2
      ../data_img_classes/class_horiztonal_striped/E...
      1.0
      EUROMODA-U127278-03-5.jpg
      0.2103394
      0.44533923
      0.23877689
      2.1717458
      0.040404715
      0.0
      0.19045945
      ...
      2.9992921
      0.0041331076
      0.054148678
      2.9987047
      0.0011503234
      0.0
      0.84170794
      0.56640506
      0.079589754
      0.015616337
    
    
      3
      ../data_img_classes/class_horiztonal_striped/E...
      1.0
      EUROMODA-U127278-13-5.jpg
      0.095383428
      0.87436837
      0.075488105
      0.60814637
      0.10280731
      0.052728202
      0.30020541
      ...
      0.48274636
      0.24174443
      0.079249993
      2.4447916
      0.21308827
      0.035023067
      0.06211203
      0.52482486
      0.10131172
      0.0
    
    
      4
      ../data_img_classes/class_horiztonal_striped/E...
      1.0
      EUROMODAJ-U125406-09-5.jpg
      0.49232581
      0.055619191
      0.043276276
      2.4512403
      0.21075039
      0.0
      0.2511763
      ...
      0.89770401
      0.12855974
      0.0
      2.7500679
      0.44566065
      0.0
      1.6538981
      3.4805861
      0.05550551
      1.0497173
    
  

5 rows × 2051 columns



In [31]:

    
print('target class')
plt.figure(figsize=(12,3))
for i in range(5):
    img_path= df['img_path'][i]
    img = image.load_img(img_path, target_size=(224, 224))
    plt.subplot(1,5,i+1)
    plt.imshow(img)
    plt.grid(b=False)









    



target class



In [35]:

    
xcolumns=['x'+str(i) for i in np.arange(2024)]
X = df.loc[:,xcolumns].as_matrix().astype('float')
y= df.loc[:,'label'].as_matrix().astype('float')
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,stratify=y,test_size=.33)
print(' training shape {0} \n testing shape {1}').format(X_train.shape,X_test.shape)
print('\n target/non-target \n (train) {0}\{1} \n (test) {2}\{3}').format(y_train.sum(),(1-y_train).sum(),y_test.sum(),(1-y_test).sum())









    



 training shape (338, 2024) 
 testing shape (167, 2024)

 target/non-target 
 (train) 11.0\327.0 
 (test) 6.0\161.0



In [52]:

    
# classifiers 
C = 1.0
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
clf_svm = sklearn.svm.SVC(C=C,kernel='linear')



In [53]:

    
clf_LR.fit(X_train, y_train)
clf_svm.fit(X_train, y_train)









    Out[53]:





SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)



In [44]:

    
coef = clf_LR.coef_[0,:]
plt.figure(figsize=(12,3))
sns.set_style('white')
plt.scatter(np.arange(len(coef)),coef)
plt.xlabel('nnet feature')
plt.ylabel('LogReg coefficient')
sns.despine()



In [54]:

    
y_pred = clf_LR.predict(X_test)

(TP,FP,TN,FN) =perf_measure(y_test,y_pred)
print('TruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)









    



TruePos:1
FalsePos:0
TrueNeg:161
FalseNeg:5



In [46]:

    
y_pred = clf_svm.predict(X_test)

(TP,FP,TN,FN) =perf_measure(y_test,y_pred)
print('TruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)









    



TruePos:2
FalsePos:0
TrueNeg:161
FalseNeg:4

neither the svm or the logistic reg is doing well



In [48]:

    
# from sklearn.model_selection import StratifiedKFold
# skf = StratifiedKFold(n_splits=5,shuffle=True)
# for train, test in skf.split(X, y):
#     #print("%s %s" % (train, test))
#     C=1.0
#     clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
#     clf_LR.fit(X[train], y[train])
#     y_pred = clf_LR.predict(X[test])
#     (TP,FP,TN,FN) =perf_measure(y[test],y_pred)
#     print('\nTruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)



In [49]:

    
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
skf = StratifiedKFold(n_splits=5,shuffle=True)
score, permutation_scores, pvalue = permutation_test_score(
    clf_LR, X, y, scoring="accuracy", cv=skf, n_permutations=100)



In [56]:

    
#



In [50]:

    
plt.hist(permutation_scores)
plt.axvline(score)
sns.despine()
plt.xlabel('accuracy')
print(pvalue)









    



0.00990099009901

the accuracy achieved is above chance (as determined by permutation testing)

Red / Pink Data



In [58]:

    
# image folder 
target_img_folder ='../data_img_classes/class_red_pink/'
df = create_image_class_dataframe(target_img_folder)
df.head()









    Out[58]:







  
    
      
      img_path
      label
      img_name
      x0
      x1
      x2
      x3
      x4
      x5
      x6
      ...
      x2038
      x2039
      x2040
      x2041
      x2042
      x2043
      x2044
      x2045
      x2046
      x2047
    
  
  
    
      0
      ../data_img_classes/class_red_pink/ANGEL-62140...
      1.0
      ANGEL-6214020T0805-5.jpg
      0.27172932
      0.54065263
      1.2518882
      0.71433866
      0.0
      0.223846
      0.208391
      ...
      3.1341801
      0.078170836
      0.19200282
      1.3976613
      0.01351728
      0.0097718844
      0.92253286
      0.46201733
      2.3602607
      0.12272973
    
    
      1
      ../data_img_classes/class_red_pink/ANGEL-62140...
      1.0
      ANGEL-621402220501-5.jpg
      0.15732542
      0.85577351
      0.13256542
      1.6754812
      0.14064166
      0.72851104
      0.15100212
      ...
      0.61663407
      0.321567
      0.13439243
      1.693658
      0.022544336
      0.03912805
      0.11787287
      0.29207376
      0.59155571
      0.38405305
    
    
      2
      ../data_img_classes/class_red_pink/ANGELCITIZ-...
      1.0
      ANGELCITIZ-621308290602-5.jpg
      0.41256633
      0.38114852
      0.18842269
      1.5292635
      0.85203356
      0.27785954
      0.18870671
      ...
      0.18259989
      0.49224538
      0.46193609
      3.8138292
      0.19726405
      0.097800381
      0.22442091
      1.3731562
      0.31209072
      0.75006706
    
    
      3
      ../data_img_classes/class_red_pink/Bai-B520N01...
      1.0
      Bai-B520N015-5.jpg
      0.039242335
      0.36203983
      0.0042010327
      0.37699968
      0.46601561
      0.0
      0.46742466
      ...
      0.15885612
      0.13920899
      0.16824563
      3.6293392
      0.10784438
      0.0039167427
      0.33787274
      2.1860485
      0.28497639
      0.93075883
    
    
      4
      ../data_img_classes/class_red_pink/BAIYI-B1008...
      1.0
      BAIYI-B1008N289-5.jpg
      0.43477067
      0.33263576
      0.0
      0.40581283
      0.14094441
      0.017958783
      0.63126558
      ...
      0.70636362
      0.19716582
      0.12621519
      4.4085803
      0.11626053
      0.0
      0.27553368
      1.5186014
      0.13309085
      1.9339614
    
  

5 rows × 2051 columns



In [60]:

    
print('target class')
plt.figure(figsize=(12,3))
for i in range(5):
    img_path= df['img_path'][i+1]
    img = image.load_img(img_path, target_size=(224, 224))
    plt.subplot(1,5,i+1)
    plt.imshow(img)
    plt.grid(b=False)









    



target class



In [62]:

    
# split data 
xcolumns=['x'+str(i) for i in np.arange(2024)]
X = df.loc[:,xcolumns].as_matrix().astype('float')
y= df.loc[:,'label'].as_matrix().astype('float')
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,stratify=y,test_size=.33)
print(' training shape {0} \n testing shape {1}').format(X_train.shape,X_test.shape)
print('\n target/non-target \n (train) {0}\{1} \n (test) {2}\{3}').format(y_train.sum(),(1-y_train).sum(),y_test.sum(),(1-y_test).sum())









    



 training shape (338, 2024) 
 testing shape (167, 2024)

 target/non-target 
 (train) 41.0\297.0 
 (test) 21.0\146.0



In [63]:

    
# Train
clf_svm.fit(X_train, y_train)

# test 
y_pred = clf_svm.predict(X_test)
(TP,FP,TN,FN) =perf_measure(y_test,y_pred)
print('TruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)









    



TruePos:14
FalsePos:6
TrueNeg:140
FalseNeg:7

classification performance is mucher better on this dataset



In [64]:

    
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
skf = StratifiedKFold(n_splits=5,shuffle=True)
score, permutation_scores, pvalue = permutation_test_score(
    clf_LR, X, y, scoring="accuracy", cv=skf, n_permutations=100)



In [65]:

    
plt.hist(permutation_scores)
plt.axvline(score)
sns.despine()
plt.xlabel('accuracy')
plt.title('permutation test on test set classification')
print(pvalue)









    



0.00990099009901



In [1]:

    
%%bash 
jupyter nbconvert --to html Training_Network_to_Idenfitying_HandPicked_Classes.ipynb && mv Training_Network_to_Idenfitying_HandPicked_Classes.html ../notebook_htmls/Training_Network_to_Idenfitying_HandPicked_Classes_v2.html
cp Training_Network_to_Idenfitying_HandPicked_Classes.ipynb ../notebook_versions/Training_Network_to_Idenfitying_HandPicked_Classes_v2.ipynb









    



[NbConvertApp] Converting notebook Training_Network_to_Idenfitying_HandPicked_Classes.ipynb to html
[NbConvertApp] Writing 640629 bytes to Training_Network_to_Idenfitying_HandPicked_Classes.html



In [ ]:

	img_path	label	img_name
0	../data_img_classes/class_red_pink/ANGEL-62140...	1.0	ANGEL-6214020T0805-5.jpg
1	../data_img_classes/class_red_pink/ANGEL-62140...	1.0	ANGEL-621402220501-5.jpg
2	../data_img_classes/class_red_pink/ANGELCITIZ-...	1.0	ANGELCITIZ-621308290602-5.jpg
3	../data_img_classes/class_red_pink/Bai-B520N01...	1.0	Bai-B520N015-5.jpg
4	../data_img_classes/class_red_pink/BAIYI-B1008...	1.0	BAIYI-B1008N289-5.jpg

	img_path	label	img_name	x0	x1	x2	x3	x4	x5	x6	...	x2038	x2039	x2040	x2041	x2042	x2043	x2044	x2045	x2046	x2047
0	../data_img_classes/class_horiztonal_striped/E...	1.0	EUROMODA-U125256-39-5.jpg	0.080648147	0.0092789298	0.0014473638	0.79745281	0.1980352	0.0092308726	1.1527375	...	0.010522826	0.52378851	0.0	4.1638546	0.0	0.0023334951	1.5376362	0.41636777	0.0	0.37979186
1	../data_img_classes/class_horiztonal_striped/E...	1.0	EUROMODA-U125267-79-5.jpg	0.012774257	1.0192471	0.13633534	0.7930606	0.41112542	0.0	1.4454148	...	0.36394235	0.0	0.035090849	2.28038	0.12486018	0.0	1.5214183	0.35948351	0.037030876	0.047698129
2	../data_img_classes/class_horiztonal_striped/E...	1.0	EUROMODA-U127278-03-5.jpg	0.2103394	0.44533923	0.23877689	2.1717458	0.040404715	0.0	0.19045945	...	2.9992921	0.0041331076	0.054148678	2.9987047	0.0011503234	0.0	0.84170794	0.56640506	0.079589754	0.015616337
3	../data_img_classes/class_horiztonal_striped/E...	1.0	EUROMODA-U127278-13-5.jpg	0.095383428	0.87436837	0.075488105	0.60814637	0.10280731	0.052728202	0.30020541	...	0.48274636	0.24174443	0.079249993	2.4447916	0.21308827	0.035023067	0.06211203	0.52482486	0.10131172	0.0
4	../data_img_classes/class_horiztonal_striped/E...	1.0	EUROMODAJ-U125406-09-5.jpg	0.49232581	0.055619191	0.043276276	2.4512403	0.21075039	0.0	0.2511763	...	0.89770401	0.12855974	0.0	2.7500679	0.44566065	0.0	1.6538981	3.4805861	0.05550551	1.0497173

	img_path	label	img_name	x0	x1	x2	x3	x4	x5	x6	...	x2038	x2039	x2040	x2041	x2042	x2043	x2044	x2045	x2046	x2047
0	../data_img_classes/class_red_pink/ANGEL-62140...	1.0	ANGEL-6214020T0805-5.jpg	0.27172932	0.54065263	1.2518882	0.71433866	0.0	0.223846	0.208391	...	3.1341801	0.078170836	0.19200282	1.3976613	0.01351728	0.0097718844	0.92253286	0.46201733	2.3602607	0.12272973
1	../data_img_classes/class_red_pink/ANGEL-62140...	1.0	ANGEL-621402220501-5.jpg	0.15732542	0.85577351	0.13256542	1.6754812	0.14064166	0.72851104	0.15100212	...	0.61663407	0.321567	0.13439243	1.693658	0.022544336	0.03912805	0.11787287	0.29207376	0.59155571	0.38405305
2	../data_img_classes/class_red_pink/ANGELCITIZ-...	1.0	ANGELCITIZ-621308290602-5.jpg	0.41256633	0.38114852	0.18842269	1.5292635	0.85203356	0.27785954	0.18870671	...	0.18259989	0.49224538	0.46193609	3.8138292	0.19726405	0.097800381	0.22442091	1.3731562	0.31209072	0.75006706
3	../data_img_classes/class_red_pink/Bai-B520N01...	1.0	Bai-B520N015-5.jpg	0.039242335	0.36203983	0.0042010327	0.37699968	0.46601561	0.0	0.46742466	...	0.15885612	0.13920899	0.16824563	3.6293392	0.10784438	0.0039167427	0.33787274	2.1860485	0.28497639	0.93075883
4	../data_img_classes/class_red_pink/BAIYI-B1008...	1.0	BAIYI-B1008N289-5.jpg	0.43477067	0.33263576	0.0	0.40581283	0.14094441	0.017958783	0.63126558	...	0.70636362	0.19716582	0.12621519	4.4085803	0.11626053	0.0	0.27553368	1.5186014	0.13309085	1.9339614