Overview: Training Network for Useful Features.

we provide:

set of images that match along some interpretable feature. (e.g. striped dress)
a whole bunch of images that don't match

Code:

estimates neural network features from trained resnet 50.
estimates weights for those neural network features to predict the interpreable feature class
- do so with cross-validation.
- regularized logisitic regression.
- other classifiers.

Evaluation:

save out weights to use as new features (new features = w*original features)



In [30]:

    
import sys 
import os
sys.path.append(os.getcwd()+'/../')

# our lib
from lib.resnet50 import ResNet50
from lib.imagenet_utils import preprocess_input, decode_predictions

#keras 
from keras.preprocessing import image
from keras.models import Model

# sklearn
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import permutation_test_score

# other
import numpy as np
import glob
import pandas as pd
import ntpath

# plotting
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline



In [31]:

    
def preprocess_img(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return(x,img)



In [32]:

    
def perf_measure(y_actual, y_hat):
    TP = 0
    FP = 0
    TN = 0
    FN = 0

    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==1:
           TP += 1
    for i in range(len(y_hat)): 
        if (y_hat[i]==1) and (y_actual[i]!=y_hat[i]):
           FP += 1
    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==0:
           TN += 1
    for i in range(len(y_hat)): 
        if (y_hat[i]==0) and (y_actual[i]!=y_hat[i]):
           FN += 1

    return(TP, FP, TN, FN)

Extract NN Features



In [33]:

    
# instantiate the model
base_model = ResNet50(include_top=False, weights='imagenet') #this will pull the weights from the folder 

# cut the model to lower levels only 
model = Model(input=base_model.input, output=base_model.get_layer('avg_pool').output)









    



/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:165: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (7, 7), strides=(2, 2), name="conv1")`
  x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:90: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (1, 1), strides=(1, 1), name="res2a_branch2a")`
  name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:95: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), padding="same", name="res2a_branch2b")`
  name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:99: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), name="res2a_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:103: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), strides=(1, 1), name="res2a_branch1")`
  name=conv_name_base + '1')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:106: UserWarning: The `merge` function is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.
  x = merge([x, shortcut], mode='sum')
/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/keras/legacy/layers.py:456: UserWarning: The `Merge` layer is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.
  name=name)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (1, 1), name="res2b_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), padding="same", name="res2b_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), name="res2b_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:63: UserWarning: The `merge` function is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.
  x = merge([x, input_tensor], mode='sum')
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (1, 1), name="res2c_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), padding="same", name="res2c_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), name="res2c_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:90: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (1, 1), strides=(2, 2), name="res3a_branch2a")`
  name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:95: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), padding="same", name="res3a_branch2b")`
  name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:99: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (1, 1), name="res3a_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:103: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (1, 1), strides=(2, 2), name="res3a_branch1")`
  name=conv_name_base + '1')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (1, 1), name="res3b_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), padding="same", name="res3b_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (1, 1), name="res3b_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (1, 1), name="res3c_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), padding="same", name="res3c_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (1, 1), name="res3c_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (1, 1), name="res3d_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), padding="same", name="res3d_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (1, 1), name="res3d_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:90: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), strides=(2, 2), name="res4a_branch2a")`
  name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:95: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), padding="same", name="res4a_branch2b")`
  name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:99: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(1024, (1, 1), name="res4a_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:103: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(1024, (1, 1), strides=(2, 2), name="res4a_branch1")`
  name=conv_name_base + '1')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), name="res4b_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), padding="same", name="res4b_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(1024, (1, 1), name="res4b_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), name="res4c_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), padding="same", name="res4c_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(1024, (1, 1), name="res4c_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), name="res4d_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), padding="same", name="res4d_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(1024, (1, 1), name="res4d_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1),





    




limit_output extension: Maximum message size of 10000 exceeded with 10852 characters



In [34]:

    
#img_paths = glob.glob('../img/baiyi/*')
# 
img_paths = glob.glob('../original_img/*')
img_paths[0:3]









    Out[34]:





['../original_img/ANGEL-325500130901-5.jpg',
 '../original_img/ANGEL-621400250101-5.jpg',
 '../original_img/ANGEL-621401661304-5.jpg']



In [35]:

    
# single image
x,img = preprocess_img(img_path) # preprocess
model_output = model.predict(x)[0,0,0,:]



In [37]:

    
len(model_output)









    Out[37]:





2048



In [38]:

    
# create dataframe with all image features
img_feature_df = pd.DataFrame()
for i,img_path in enumerate(img_paths):
    x,img = preprocess_img(img_path) # preprocess
    model_output = model.predict(x)[0,0,0,:]
    img_feature_df.loc[i,'img_path']=img_path
    img_feature_df.loc[i,'nn_features']=str(list(model_output))









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-38-edfdc4c6a69e> in <module>()
      3 for i,img_path in enumerate(img_paths):
      4     x,img = preprocess_img(img_path) # preprocess
----> 5     model_output = model.predict(x)[0,0,0,:]
      6     img_feature_df.loc[i,'img_path']=img_path
      7     img_feature_df.loc[i,'nn_features']=str(list(model_output))

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/keras/engine/training.pyc in predict(self, x, batch_size, verbose)
   1570         f = self.predict_function
   1571         return self._predict_loop(f, ins,
-> 1572                                   batch_size=batch_size, verbose=verbose)
   1573 
   1574     def train_on_batch(self, x, y,

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/keras/engine/training.pyc in _predict_loop(self, f, ins, batch_size, verbose)
   1200                 ins_batch = _slice_arrays(ins, batch_ids)
   1201 
-> 1202             batch_outs = f(ins_batch)
   1203             if not isinstance(batch_outs, list):
   1204                 batch_outs = [batch_outs]

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/keras/backend/tensorflow_backend.pyc in __call__(self, inputs)
   2071         session = get_session()
   2072         updated = session.run(self.outputs + [self.updates_op],
-> 2073                               feed_dict=feed_dict)
   2074         return updated[:len(self.outputs)]
   2075 

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    765     try:
    766       result = self._run(None, fetches, feed_dict, options_ptr,
--> 767                          run_metadata_ptr)
    768       if run_metadata:
    769         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
    963     if final_fetches or final_targets:
    964       results = self._do_run(handle, final_targets, final_fetches,
--> 965                              feed_dict_string, options, run_metadata)
    966     else:
    967       results = []

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1013     if handle is None:
   1014       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1015                            target_list, options, run_metadata)
   1016     else:
   1017       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
   1020   def _do_call(self, fn, *args):
   1021     try:
-> 1022       return fn(*args)
   1023     except errors.OpError as e:
   1024       message = compat.as_text(e.message)

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1002         return tf_session.TF_Run(session, options,
   1003                                  feed_dict, fetch_list, target_list,
-> 1004                                  status, run_metadata)
   1005 
   1006     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt:



In [ ]:

    
img_feature_df['img_name'] = img_feature_df['img_path'].apply(lambda x: ntpath.basename(x))



In [ ]:

    
img_feature_df.head()



In [ ]:

    
img_feature_df.to_csv('../data_nn_features/img_features_all.csv')

Predicting Own Labels from Selected Images

within a folder (find class 1, class 0).
(split into test train)
get matrix of img X features X class
fit logistic regression (or other classifier)
assess test set-fit.
html (sample images used to define class; top and bottom predictions from test-set.



In [4]:

    
# get target and non-target lists

def create_image_class_dataframe(target_img_folder):


    # all the image folders
    non_target_img_folders = ['../original_img/']

    
    target_img_paths=glob.glob(target_img_folder+'*')
    target_img_paths_stemless = [ntpath.basename(t) for t in target_img_paths]
    non_target_img_paths =[]
    for non_target_folder in non_target_img_folders:
        for img_path in glob.glob(non_target_folder+'*'):
            if ntpath.basename(img_path) not in target_img_paths_stemless: # remove targets from non-target list
                non_target_img_paths.append(img_path)

    # create data frame with image name and label
    img_paths = np.append(target_img_paths,non_target_img_paths)
    labels = np.append(np.ones(len(target_img_paths)),np.zeros(len(non_target_img_paths)))
    df = pd.DataFrame(data=np.vstack((img_paths,labels)).T,columns=['img_path','label']) 
    df['img_name'] = df['img_path'].apply(lambda x: ntpath.basename(x)) # add image name
    df['label'] = df['label'].apply(lambda x: float(x)) # add label 

    # load up features per image
    img_feature_df = pd.read_csv('../data_nn_features/img_features_all.csv',index_col=0)
    img_feature_df.head()


    # create feature matrix out of loaded up features. 
    for i,row in df.iterrows():
        features = img_feature_df.loc[img_feature_df.img_name==row['img_name'],'nn_features'].as_matrix()[0].replace(']','').replace('[','').split(',')
        features = [np.float(f) for f in features]
        lab = row['img_name']
        if i==0:
            X = features
            labs = lab
        else:
            X = np.vstack((X,features))
            labs = np.append(labs,lab)

    xcolumns = ['x'+str(i) for i in np.arange(X.shape[1])]
    X_df = pd.DataFrame(np.hstack((labs[:,np.newaxis],X)),columns=['img_name']+xcolumns)

    # merge together 
    df = df.merge(X_df,on='img_name')
    
    # make sure there is only one instance per image in dataframe
    lens = np.array([])
    for img_name in df.img_name.unique():
        lens = np.append(lens,len(df.loc[df.img_name==img_name]))


    assert len(np.unique(lens)[:])==1
    
    return(df)



In [5]:

    
# remove some non-targets to make dataset smaller #
# i_class0 = np.where(df.label==0.0)[0]
# i_class0_remove = np.random.choice(i_class0,int(np.round(len(i_class0)/1.1)))
# df_smaller = df.drop(i_class0_remove)
#df_smaller.to_csv('test.csv')

Horizontal Striped Data



In [15]:

    
# image folder 
target_img_folder ='../data_img_classes/class_horiztonal_striped/'
df = create_image_class_dataframe(target_img_folder)
df.head()









    Out[15]:







  
    
      
      img_path
      label
      img_name
      x0
      x1
      x2
      x3
      x4
      x5
      x6
      ...
      x2038
      x2039
      x2040
      x2041
      x2042
      x2043
      x2044
      x2045
      x2046
      x2047
    
  
  
    
      0
      ../data_img_classes/class_horiztonal_striped/E...
      1.0
      EUROMODA-U125256-39-5.jpg
      0.080648147
      0.0092789298
      0.0014473638
      0.79745281
      0.1980352
      0.0092308726
      1.1527375
      ...
      0.010522826
      0.52378851
      0.0
      4.1638546
      0.0
      0.0023334951
      1.5376362
      0.41636777
      0.0
      0.37979186
    
    
      1
      ../data_img_classes/class_horiztonal_striped/E...
      1.0
      EUROMODA-U125267-79-5.jpg
      0.012774257
      1.0192471
      0.13633534
      0.7930606
      0.41112542
      0.0
      1.4454148
      ...
      0.36394235
      0.0
      0.035090849
      2.28038
      0.12486018
      0.0
      1.5214183
      0.35948351
      0.037030876
      0.047698129
    
    
      2
      ../data_img_classes/class_horiztonal_striped/E...
      1.0
      EUROMODA-U127278-03-5.jpg
      0.2103394
      0.44533923
      0.23877689
      2.1717458
      0.040404715
      0.0
      0.19045945
      ...
      2.9992921
      0.0041331076
      0.054148678
      2.9987047
      0.0011503234
      0.0
      0.84170794
      0.56640506
      0.079589754
      0.015616337
    
    
      3
      ../data_img_classes/class_horiztonal_striped/E...
      1.0
      EUROMODA-U127278-13-5.jpg
      0.095383428
      0.87436837
      0.075488105
      0.60814637
      0.10280731
      0.052728202
      0.30020541
      ...
      0.48274636
      0.24174443
      0.079249993
      2.4447916
      0.21308827
      0.035023067
      0.06211203
      0.52482486
      0.10131172
      0.0
    
    
      4
      ../data_img_classes/class_horiztonal_striped/E...
      1.0
      EUROMODAJ-U125406-09-5.jpg
      0.49232581
      0.055619191
      0.043276276
      2.4512403
      0.21075039
      0.0
      0.2511763
      ...
      0.89770401
      0.12855974
      0.0
      2.7500679
      0.44566065
      0.0
      1.6538981
      3.4805861
      0.05550551
      1.0497173
    
  

5 rows × 2051 columns



In [31]:

    
print('target class')
plt.figure(figsize=(12,3))
for i in range(5):
    img_path= df['img_path'][i]
    img = image.load_img(img_path, target_size=(224, 224))
    plt.subplot(1,5,i+1)
    plt.imshow(img)
    plt.grid(b=False)









    



target class



In [35]:

    
xcolumns=['x'+str(i) for i in np.arange(2048)]
X = df.loc[:,xcolumns].as_matrix().astype('float')
y= df.loc[:,'label'].as_matrix().astype('float')
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,stratify=y,test_size=.33)
print(' training shape {0} \n testing shape {1}').format(X_train.shape,X_test.shape)
print('\n target/non-target \n (train) {0}\{1} \n (test) {2}\{3}').format(y_train.sum(),(1-y_train).sum(),y_test.sum(),(1-y_test).sum())









    



 training shape (338, 2024) 
 testing shape (167, 2024)

 target/non-target 
 (train) 11.0\327.0 
 (test) 6.0\161.0



In [52]:

    
# classifiers 
C = 1.0
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
clf_svm = sklearn.svm.SVC(C=C,kernel='linear')



In [53]:

    
clf_LR.fit(X_train, y_train)
clf_svm.fit(X_train, y_train)









    Out[53]:





SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)



In [44]:

    
coef = clf_LR.coef_[0,:]
plt.figure(figsize=(12,3))
sns.set_style('white')
plt.scatter(np.arange(len(coef)),coef)
plt.xlabel('nnet feature')
plt.ylabel('LogReg coefficient')
sns.despine()



In [40]:

    
#len(coef)



In [54]:

    
y_pred = clf_LR.predict(X_test)

(TP,FP,TN,FN) =perf_measure(y_test,y_pred)
print('TruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)









    



TruePos:1
FalsePos:0
TrueNeg:161
FalseNeg:5



In [46]:

    
y_pred = clf_svm.predict(X_test)

(TP,FP,TN,FN) =perf_measure(y_test,y_pred)
print('TruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)









    



TruePos:2
FalsePos:0
TrueNeg:161
FalseNeg:4

neither the svm or the logistic reg is doing well



In [48]:

    
# from sklearn.model_selection import StratifiedKFold
# skf = StratifiedKFold(n_splits=5,shuffle=True)
# for train, test in skf.split(X, y):
#     #print("%s %s" % (train, test))
#     C=1.0
#     clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
#     clf_LR.fit(X[train], y[train])
#     y_pred = clf_LR.predict(X[test])
#     (TP,FP,TN,FN) =perf_measure(y[test],y_pred)
#     print('\nTruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)



In [49]:

    
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
skf = StratifiedKFold(n_splits=5,shuffle=True)
score, permutation_scores, pvalue = permutation_test_score(
    clf_LR, X, y, scoring="accuracy", cv=skf, n_permutations=100)



In [56]:

    
#



In [50]:

    
plt.hist(permutation_scores)
plt.axvline(score)
sns.despine()
plt.xlabel('accuracy')
print(pvalue)









    



0.00990099009901

the accuracy achieved is above chance (as determined by permutation testing)

Red / Pink Data



In [41]:

    
# image folder 
target_img_folder ='../data_img_classes/class_red_pink/'
df = create_image_class_dataframe(target_img_folder)
df.head()









    Out[41]:







  
    
      
      img_path
      label
      img_name
      x0
      x1
      x2
      x3
      x4
      x5
      x6
      ...
      x2038
      x2039
      x2040
      x2041
      x2042
      x2043
      x2044
      x2045
      x2046
      x2047
    
  
  
    
      0
      ../data_img_classes/class_red_pink/ANGEL-62140...
      1.0
      ANGEL-6214020T0805-5.jpg
      0.27172932
      0.54065263
      1.2518882
      0.71433866
      0.0
      0.223846
      0.208391
      ...
      3.1341801
      0.078170836
      0.19200282
      1.3976613
      0.01351728
      0.0097718844
      0.92253286
      0.46201733
      2.3602607
      0.12272973
    
    
      1
      ../data_img_classes/class_red_pink/ANGEL-62140...
      1.0
      ANGEL-621402220501-5.jpg
      0.15732542
      0.85577351
      0.13256542
      1.6754812
      0.14064166
      0.72851104
      0.15100212
      ...
      0.61663407
      0.321567
      0.13439243
      1.693658
      0.022544336
      0.03912805
      0.11787287
      0.29207376
      0.59155571
      0.38405305
    
    
      2
      ../data_img_classes/class_red_pink/ANGELCITIZ-...
      1.0
      ANGELCITIZ-621308290602-5.jpg
      0.41256633
      0.38114852
      0.18842269
      1.5292635
      0.85203356
      0.27785954
      0.18870671
      ...
      0.18259989
      0.49224538
      0.46193609
      3.8138292
      0.19726405
      0.097800381
      0.22442091
      1.3731562
      0.31209072
      0.75006706
    
    
      3
      ../data_img_classes/class_red_pink/Bai-B520N01...
      1.0
      Bai-B520N015-5.jpg
      0.039242335
      0.36203983
      0.0042010327
      0.37699968
      0.46601561
      0.0
      0.46742466
      ...
      0.15885612
      0.13920899
      0.16824563
      3.6293392
      0.10784438
      0.0039167427
      0.33787274
      2.1860485
      0.28497639
      0.93075883
    
    
      4
      ../data_img_classes/class_red_pink/BAIYI-B1008...
      1.0
      BAIYI-B1008N289-5.jpg
      0.43477067
      0.33263576
      0.0
      0.40581283
      0.14094441
      0.017958783
      0.63126558
      ...
      0.70636362
      0.19716582
      0.12621519
      4.4085803
      0.11626053
      0.0
      0.27553368
      1.5186014
      0.13309085
      1.9339614
    
  

5 rows × 2051 columns



In [43]:

    
df.columns.values[-1]









    Out[43]:





'x2047'



In [44]:

    
print('target class')
plt.figure(figsize=(12,3))
for i in range(5):
    img_path= df['img_path'][i+1]
    img = image.load_img(img_path, target_size=(224, 224))
    plt.subplot(1,5,i+1)
    plt.imshow(img)
    plt.grid(b=False)









    



target class

Split Set Assessment



In [45]:

    
# split data 
xcolumns=['x'+str(i) for i in np.arange(2048)]
X = df.loc[:,xcolumns].as_matrix().astype('float')
y= df.loc[:,'label'].as_matrix().astype('float')
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,stratify=y,test_size=.33)
print(' training shape {0} \n testing shape {1}').format(X_train.shape,X_test.shape)
print('\n target/non-target \n (train) {0}\{1} \n (test) {2}\{3}').format(y_train.sum(),(1-y_train).sum(),y_test.sum(),(1-y_test).sum())









    



 training shape (338, 2048) 
 testing shape (167, 2048)

 target/non-target 
 (train) 41.0\297.0 
 (test) 21.0\146.0



In [46]:

    
# Train
C = 1.0
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
clf_LR.fit(X_train, y_train)

# test 
y_pred = clf_LR.predict(X_test)
(TP,FP,TN,FN) =perf_measure(y_test,y_pred)
print('TruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)









    



TruePos:13
FalsePos:4
TrueNeg:142
FalseNeg:8

classification performance is mucher better on this dataset

Permutation Assessment



In [47]:

    
from sklearn.model_selection import StratifiedKFold
C = 1.0
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
skf = StratifiedKFold(n_splits=5,shuffle=True)
score, permutation_scores, pvalue = permutation_test_score(
    clf_LR, X, y, scoring="accuracy", cv=skf, n_permutations=100)



In [48]:

    
plt.hist(permutation_scores)
plt.axvline(score)
sns.despine()
plt.xlabel('accuracy')
plt.title('permutation test on test set classification')
print(pvalue)









    



0.00990099009901

Re-train on whole dataset



In [49]:

    
C = 1.0
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
clf_LR.fit(X, y)









    Out[49]:





LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l1', random_state=None, solver='liblinear', tol=0.01,
          verbose=0, warm_start=False)



In [50]:

    
coef = clf_LR.coef_[0,:]
plt.figure(figsize=(12,3))
sns.set_style('white')
plt.scatter(np.arange(len(coef)),coef)
plt.xlabel('nnet feature')
plt.ylabel('LogReg coefficient')
sns.despine()



In [51]:

    
len(coef)









    Out[51]:





2048

Save out



In [52]:

    
np.savetxt('../data_nn_features/class_weights_LR_redpink.txt',coef)

Save



In [28]:

    
%%bash
jupyter nbconvert --to html Training_Network_to_Idenfitying_HandPicked_Classes.ipynb && mv Training_Network_to_Idenfitying_HandPicked_Classes.html ../notebook_htmls/Training_Network_to_Idenfitying_HandPicked_Classes_v2.html
cp Training_Network_to_Idenfitying_HandPicked_Classes.ipynb ../notebook_versions/Training_Network_to_Idenfitying_HandPicked_Classes_v2.ipynb









    



[NbConvertApp] Converting notebook Training_Network_to_Idenfitying_HandPicked_Classes.ipynb to html
[NbConvertApp] Writing 663582 bytes to Training_Network_to_Idenfitying_HandPicked_Classes.html

	img_path	label	img_name	x0	x1	x2	x3	x4	x5	x6	...	x2038	x2039	x2040	x2041	x2042	x2043	x2044	x2045	x2046	x2047
0	../data_img_classes/class_horiztonal_striped/E...	1.0	EUROMODA-U125256-39-5.jpg	0.080648147	0.0092789298	0.0014473638	0.79745281	0.1980352	0.0092308726	1.1527375	...	0.010522826	0.52378851	0.0	4.1638546	0.0	0.0023334951	1.5376362	0.41636777	0.0	0.37979186
1	../data_img_classes/class_horiztonal_striped/E...	1.0	EUROMODA-U125267-79-5.jpg	0.012774257	1.0192471	0.13633534	0.7930606	0.41112542	0.0	1.4454148	...	0.36394235	0.0	0.035090849	2.28038	0.12486018	0.0	1.5214183	0.35948351	0.037030876	0.047698129
2	../data_img_classes/class_horiztonal_striped/E...	1.0	EUROMODA-U127278-03-5.jpg	0.2103394	0.44533923	0.23877689	2.1717458	0.040404715	0.0	0.19045945	...	2.9992921	0.0041331076	0.054148678	2.9987047	0.0011503234	0.0	0.84170794	0.56640506	0.079589754	0.015616337
3	../data_img_classes/class_horiztonal_striped/E...	1.0	EUROMODA-U127278-13-5.jpg	0.095383428	0.87436837	0.075488105	0.60814637	0.10280731	0.052728202	0.30020541	...	0.48274636	0.24174443	0.079249993	2.4447916	0.21308827	0.035023067	0.06211203	0.52482486	0.10131172	0.0
4	../data_img_classes/class_horiztonal_striped/E...	1.0	EUROMODAJ-U125406-09-5.jpg	0.49232581	0.055619191	0.043276276	2.4512403	0.21075039	0.0	0.2511763	...	0.89770401	0.12855974	0.0	2.7500679	0.44566065	0.0	1.6538981	3.4805861	0.05550551	1.0497173

	img_path	label	img_name	x0	x1	x2	x3	x4	x5	x6	...	x2038	x2039	x2040	x2041	x2042	x2043	x2044	x2045	x2046	x2047
0	../data_img_classes/class_red_pink/ANGEL-62140...	1.0	ANGEL-6214020T0805-5.jpg	0.27172932	0.54065263	1.2518882	0.71433866	0.0	0.223846	0.208391	...	3.1341801	0.078170836	0.19200282	1.3976613	0.01351728	0.0097718844	0.92253286	0.46201733	2.3602607	0.12272973
1	../data_img_classes/class_red_pink/ANGEL-62140...	1.0	ANGEL-621402220501-5.jpg	0.15732542	0.85577351	0.13256542	1.6754812	0.14064166	0.72851104	0.15100212	...	0.61663407	0.321567	0.13439243	1.693658	0.022544336	0.03912805	0.11787287	0.29207376	0.59155571	0.38405305
2	../data_img_classes/class_red_pink/ANGELCITIZ-...	1.0	ANGELCITIZ-621308290602-5.jpg	0.41256633	0.38114852	0.18842269	1.5292635	0.85203356	0.27785954	0.18870671	...	0.18259989	0.49224538	0.46193609	3.8138292	0.19726405	0.097800381	0.22442091	1.3731562	0.31209072	0.75006706
3	../data_img_classes/class_red_pink/Bai-B520N01...	1.0	Bai-B520N015-5.jpg	0.039242335	0.36203983	0.0042010327	0.37699968	0.46601561	0.0	0.46742466	...	0.15885612	0.13920899	0.16824563	3.6293392	0.10784438	0.0039167427	0.33787274	2.1860485	0.28497639	0.93075883
4	../data_img_classes/class_red_pink/BAIYI-B1008...	1.0	BAIYI-B1008N289-5.jpg	0.43477067	0.33263576	0.0	0.40581283	0.14094441	0.017958783	0.63126558	...	0.70636362	0.19716582	0.12621519	4.4085803	0.11626053	0.0	0.27553368	1.5186014	0.13309085	1.9339614