Overview: Training Network for Useful Features.

we provide:

  • set of images that match along some interpretable feature. (e.g. striped dress)
  • a whole bunch of images that don't match

Code:

  • estimates neural network features from trained resnet 50.
  • estimates weights for those neural network features to predict the interpreable feature class
    • do so with cross-validation.
    • regularized logisitic regression.
    • other classifiers.

Evaluation:

  • save out weights to use as new features (new features = w*original features)

In [30]:
import sys 
import os
sys.path.append(os.getcwd()+'/../')

# our lib
from lib.resnet50 import ResNet50
from lib.imagenet_utils import preprocess_input, decode_predictions

#keras 
from keras.preprocessing import image
from keras.models import Model

# sklearn
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import permutation_test_score

# other
import numpy as np
import glob
import pandas as pd
import ntpath

# plotting
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [31]:
def preprocess_img(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return(x,img)

In [32]:
def perf_measure(y_actual, y_hat):
    TP = 0
    FP = 0
    TN = 0
    FN = 0

    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==1:
           TP += 1
    for i in range(len(y_hat)): 
        if (y_hat[i]==1) and (y_actual[i]!=y_hat[i]):
           FP += 1
    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==0:
           TN += 1
    for i in range(len(y_hat)): 
        if (y_hat[i]==0) and (y_actual[i]!=y_hat[i]):
           FN += 1

    return(TP, FP, TN, FN)

Extract NN Features


In [33]:
# instantiate the model
base_model = ResNet50(include_top=False, weights='imagenet') #this will pull the weights from the folder 

# cut the model to lower levels only 
model = Model(input=base_model.input, output=base_model.get_layer('avg_pool').output)


/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:165: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (7, 7), strides=(2, 2), name="conv1")`
  x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:90: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (1, 1), strides=(1, 1), name="res2a_branch2a")`
  name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:95: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), padding="same", name="res2a_branch2b")`
  name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:99: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), name="res2a_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:103: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), strides=(1, 1), name="res2a_branch1")`
  name=conv_name_base + '1')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:106: UserWarning: The `merge` function is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.
  x = merge([x, shortcut], mode='sum')
/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/keras/legacy/layers.py:456: UserWarning: The `Merge` layer is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.
  name=name)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (1, 1), name="res2b_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), padding="same", name="res2b_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), name="res2b_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:63: UserWarning: The `merge` function is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.
  x = merge([x, input_tensor], mode='sum')
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (1, 1), name="res2c_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), padding="same", name="res2c_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), name="res2c_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:90: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (1, 1), strides=(2, 2), name="res3a_branch2a")`
  name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:95: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), padding="same", name="res3a_branch2b")`
  name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:99: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (1, 1), name="res3a_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:103: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (1, 1), strides=(2, 2), name="res3a_branch1")`
  name=conv_name_base + '1')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (1, 1), name="res3b_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), padding="same", name="res3b_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (1, 1), name="res3b_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (1, 1), name="res3c_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), padding="same", name="res3c_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (1, 1), name="res3c_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (1, 1), name="res3d_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), padding="same", name="res3d_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (1, 1), name="res3d_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:90: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), strides=(2, 2), name="res4a_branch2a")`
  name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:95: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), padding="same", name="res4a_branch2b")`
  name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:99: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(1024, (1, 1), name="res4a_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:103: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(1024, (1, 1), strides=(2, 2), name="res4a_branch1")`
  name=conv_name_base + '1')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), name="res4b_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), padding="same", name="res4b_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(1024, (1, 1), name="res4b_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), name="res4c_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), padding="same", name="res4c_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(1024, (1, 1), name="res4c_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1), name="res4d_branch2a")`
  x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:56: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), padding="same", name="res4d_branch2b")`
  border_mode='same', name=conv_name_base + '2b')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:60: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(1024, (1, 1), name="res4d_branch2c")`
  x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
/Users/chris/Desktop/CDIPS_Recommender/notebooks/../lib/resnet50.py:51: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (1, 1),
limit_output extension: Maximum message size of 10000 exceeded with 10852 characters

In [34]:
#img_paths = glob.glob('../img/baiyi/*')
# 
img_paths = glob.glob('../original_img/*')
img_paths[0:3]


Out[34]:
['../original_img/ANGEL-325500130901-5.jpg',
 '../original_img/ANGEL-621400250101-5.jpg',
 '../original_img/ANGEL-621401661304-5.jpg']

In [35]:
# single image
x,img = preprocess_img(img_path) # preprocess
model_output = model.predict(x)[0,0,0,:]

In [37]:
len(model_output)


Out[37]:
2048

In [38]:
# create dataframe with all image features
img_feature_df = pd.DataFrame()
for i,img_path in enumerate(img_paths):
    x,img = preprocess_img(img_path) # preprocess
    model_output = model.predict(x)[0,0,0,:]
    img_feature_df.loc[i,'img_path']=img_path
    img_feature_df.loc[i,'nn_features']=str(list(model_output))


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-38-edfdc4c6a69e> in <module>()
      3 for i,img_path in enumerate(img_paths):
      4     x,img = preprocess_img(img_path) # preprocess
----> 5     model_output = model.predict(x)[0,0,0,:]
      6     img_feature_df.loc[i,'img_path']=img_path
      7     img_feature_df.loc[i,'nn_features']=str(list(model_output))

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/keras/engine/training.pyc in predict(self, x, batch_size, verbose)
   1570         f = self.predict_function
   1571         return self._predict_loop(f, ins,
-> 1572                                   batch_size=batch_size, verbose=verbose)
   1573 
   1574     def train_on_batch(self, x, y,

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/keras/engine/training.pyc in _predict_loop(self, f, ins, batch_size, verbose)
   1200                 ins_batch = _slice_arrays(ins, batch_ids)
   1201 
-> 1202             batch_outs = f(ins_batch)
   1203             if not isinstance(batch_outs, list):
   1204                 batch_outs = [batch_outs]

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/keras/backend/tensorflow_backend.pyc in __call__(self, inputs)
   2071         session = get_session()
   2072         updated = session.run(self.outputs + [self.updates_op],
-> 2073                               feed_dict=feed_dict)
   2074         return updated[:len(self.outputs)]
   2075 

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    765     try:
    766       result = self._run(None, fetches, feed_dict, options_ptr,
--> 767                          run_metadata_ptr)
    768       if run_metadata:
    769         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
    963     if final_fetches or final_targets:
    964       results = self._do_run(handle, final_targets, final_fetches,
--> 965                              feed_dict_string, options, run_metadata)
    966     else:
    967       results = []

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1013     if handle is None:
   1014       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1015                            target_list, options, run_metadata)
   1016     else:
   1017       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
   1020   def _do_call(self, fn, *args):
   1021     try:
-> 1022       return fn(*args)
   1023     except errors.OpError as e:
   1024       message = compat.as_text(e.message)

/Users/chris/anaconda/envs/virtenv/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1002         return tf_session.TF_Run(session, options,
   1003                                  feed_dict, fetch_list, target_list,
-> 1004                                  status, run_metadata)
   1005 
   1006     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

In [ ]:
img_feature_df['img_name'] = img_feature_df['img_path'].apply(lambda x: ntpath.basename(x))

In [ ]:
img_feature_df.head()

In [ ]:
img_feature_df.to_csv('../data_nn_features/img_features_all.csv')

Predicting Own Labels from Selected Images

  • within a folder (find class 1, class 0).
  • (split into test train)
  • get matrix of img X features X class
  • fit logistic regression (or other classifier)
  • assess test set-fit.
  • html (sample images used to define class; top and bottom predictions from test-set.

In [4]:
# get target and non-target lists

def create_image_class_dataframe(target_img_folder):


    # all the image folders
    non_target_img_folders = ['../original_img/']

    
    target_img_paths=glob.glob(target_img_folder+'*')
    target_img_paths_stemless = [ntpath.basename(t) for t in target_img_paths]
    non_target_img_paths =[]
    for non_target_folder in non_target_img_folders:
        for img_path in glob.glob(non_target_folder+'*'):
            if ntpath.basename(img_path) not in target_img_paths_stemless: # remove targets from non-target list
                non_target_img_paths.append(img_path)

    # create data frame with image name and label
    img_paths = np.append(target_img_paths,non_target_img_paths)
    labels = np.append(np.ones(len(target_img_paths)),np.zeros(len(non_target_img_paths)))
    df = pd.DataFrame(data=np.vstack((img_paths,labels)).T,columns=['img_path','label']) 
    df['img_name'] = df['img_path'].apply(lambda x: ntpath.basename(x)) # add image name
    df['label'] = df['label'].apply(lambda x: float(x)) # add label 

    # load up features per image
    img_feature_df = pd.read_csv('../data_nn_features/img_features_all.csv',index_col=0)
    img_feature_df.head()


    # create feature matrix out of loaded up features. 
    for i,row in df.iterrows():
        features = img_feature_df.loc[img_feature_df.img_name==row['img_name'],'nn_features'].as_matrix()[0].replace(']','').replace('[','').split(',')
        features = [np.float(f) for f in features]
        lab = row['img_name']
        if i==0:
            X = features
            labs = lab
        else:
            X = np.vstack((X,features))
            labs = np.append(labs,lab)

    xcolumns = ['x'+str(i) for i in np.arange(X.shape[1])]
    X_df = pd.DataFrame(np.hstack((labs[:,np.newaxis],X)),columns=['img_name']+xcolumns)

    # merge together 
    df = df.merge(X_df,on='img_name')
    
    # make sure there is only one instance per image in dataframe
    lens = np.array([])
    for img_name in df.img_name.unique():
        lens = np.append(lens,len(df.loc[df.img_name==img_name]))


    assert len(np.unique(lens)[:])==1
    
    return(df)

In [5]:
# remove some non-targets to make dataset smaller #
# i_class0 = np.where(df.label==0.0)[0]
# i_class0_remove = np.random.choice(i_class0,int(np.round(len(i_class0)/1.1)))
# df_smaller = df.drop(i_class0_remove)
#df_smaller.to_csv('test.csv')

Horizontal Striped Data


In [15]:
# image folder 
target_img_folder ='../data_img_classes/class_horiztonal_striped/'
df = create_image_class_dataframe(target_img_folder)
df.head()


Out[15]:
img_path label img_name x0 x1 x2 x3 x4 x5 x6 ... x2038 x2039 x2040 x2041 x2042 x2043 x2044 x2045 x2046 x2047
0 ../data_img_classes/class_horiztonal_striped/E... 1.0 EUROMODA-U125256-39-5.jpg 0.080648147 0.0092789298 0.0014473638 0.79745281 0.1980352 0.0092308726 1.1527375 ... 0.010522826 0.52378851 0.0 4.1638546 0.0 0.0023334951 1.5376362 0.41636777 0.0 0.37979186
1 ../data_img_classes/class_horiztonal_striped/E... 1.0 EUROMODA-U125267-79-5.jpg 0.012774257 1.0192471 0.13633534 0.7930606 0.41112542 0.0 1.4454148 ... 0.36394235 0.0 0.035090849 2.28038 0.12486018 0.0 1.5214183 0.35948351 0.037030876 0.047698129
2 ../data_img_classes/class_horiztonal_striped/E... 1.0 EUROMODA-U127278-03-5.jpg 0.2103394 0.44533923 0.23877689 2.1717458 0.040404715 0.0 0.19045945 ... 2.9992921 0.0041331076 0.054148678 2.9987047 0.0011503234 0.0 0.84170794 0.56640506 0.079589754 0.015616337
3 ../data_img_classes/class_horiztonal_striped/E... 1.0 EUROMODA-U127278-13-5.jpg 0.095383428 0.87436837 0.075488105 0.60814637 0.10280731 0.052728202 0.30020541 ... 0.48274636 0.24174443 0.079249993 2.4447916 0.21308827 0.035023067 0.06211203 0.52482486 0.10131172 0.0
4 ../data_img_classes/class_horiztonal_striped/E... 1.0 EUROMODAJ-U125406-09-5.jpg 0.49232581 0.055619191 0.043276276 2.4512403 0.21075039 0.0 0.2511763 ... 0.89770401 0.12855974 0.0 2.7500679 0.44566065 0.0 1.6538981 3.4805861 0.05550551 1.0497173

5 rows × 2051 columns


In [31]:
print('target class')
plt.figure(figsize=(12,3))
for i in range(5):
    img_path= df['img_path'][i]
    img = image.load_img(img_path, target_size=(224, 224))
    plt.subplot(1,5,i+1)
    plt.imshow(img)
    plt.grid(b=False)


target class

In [35]:
xcolumns=['x'+str(i) for i in np.arange(2048)]
X = df.loc[:,xcolumns].as_matrix().astype('float')
y= df.loc[:,'label'].as_matrix().astype('float')
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,stratify=y,test_size=.33)
print(' training shape {0} \n testing shape {1}').format(X_train.shape,X_test.shape)
print('\n target/non-target \n (train) {0}\{1} \n (test) {2}\{3}').format(y_train.sum(),(1-y_train).sum(),y_test.sum(),(1-y_test).sum())


 training shape (338, 2024) 
 testing shape (167, 2024)

 target/non-target 
 (train) 11.0\327.0 
 (test) 6.0\161.0

In [52]:
# classifiers 
C = 1.0
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
clf_svm = sklearn.svm.SVC(C=C,kernel='linear')

In [53]:
clf_LR.fit(X_train, y_train)
clf_svm.fit(X_train, y_train)


Out[53]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [44]:
coef = clf_LR.coef_[0,:]
plt.figure(figsize=(12,3))
sns.set_style('white')
plt.scatter(np.arange(len(coef)),coef)
plt.xlabel('nnet feature')
plt.ylabel('LogReg coefficient')
sns.despine()



In [40]:
#len(coef)

In [54]:
y_pred = clf_LR.predict(X_test)

(TP,FP,TN,FN) =perf_measure(y_test,y_pred)
print('TruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)


TruePos:1
FalsePos:0
TrueNeg:161
FalseNeg:5

In [46]:
y_pred = clf_svm.predict(X_test)

(TP,FP,TN,FN) =perf_measure(y_test,y_pred)
print('TruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)


TruePos:2
FalsePos:0
TrueNeg:161
FalseNeg:4
  • neither the svm or the logistic reg is doing well

In [48]:
# from sklearn.model_selection import StratifiedKFold
# skf = StratifiedKFold(n_splits=5,shuffle=True)
# for train, test in skf.split(X, y):
#     #print("%s %s" % (train, test))
#     C=1.0
#     clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
#     clf_LR.fit(X[train], y[train])
#     y_pred = clf_LR.predict(X[test])
#     (TP,FP,TN,FN) =perf_measure(y[test],y_pred)
#     print('\nTruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)

In [49]:
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
skf = StratifiedKFold(n_splits=5,shuffle=True)
score, permutation_scores, pvalue = permutation_test_score(
    clf_LR, X, y, scoring="accuracy", cv=skf, n_permutations=100)

In [56]:
#

In [50]:
plt.hist(permutation_scores)
plt.axvline(score)
sns.despine()
plt.xlabel('accuracy')
print(pvalue)


0.00990099009901
  • the accuracy achieved is above chance (as determined by permutation testing)

Red / Pink Data


In [41]:
# image folder 
target_img_folder ='../data_img_classes/class_red_pink/'
df = create_image_class_dataframe(target_img_folder)
df.head()


Out[41]:
img_path label img_name x0 x1 x2 x3 x4 x5 x6 ... x2038 x2039 x2040 x2041 x2042 x2043 x2044 x2045 x2046 x2047
0 ../data_img_classes/class_red_pink/ANGEL-62140... 1.0 ANGEL-6214020T0805-5.jpg 0.27172932 0.54065263 1.2518882 0.71433866 0.0 0.223846 0.208391 ... 3.1341801 0.078170836 0.19200282 1.3976613 0.01351728 0.0097718844 0.92253286 0.46201733 2.3602607 0.12272973
1 ../data_img_classes/class_red_pink/ANGEL-62140... 1.0 ANGEL-621402220501-5.jpg 0.15732542 0.85577351 0.13256542 1.6754812 0.14064166 0.72851104 0.15100212 ... 0.61663407 0.321567 0.13439243 1.693658 0.022544336 0.03912805 0.11787287 0.29207376 0.59155571 0.38405305
2 ../data_img_classes/class_red_pink/ANGELCITIZ-... 1.0 ANGELCITIZ-621308290602-5.jpg 0.41256633 0.38114852 0.18842269 1.5292635 0.85203356 0.27785954 0.18870671 ... 0.18259989 0.49224538 0.46193609 3.8138292 0.19726405 0.097800381 0.22442091 1.3731562 0.31209072 0.75006706
3 ../data_img_classes/class_red_pink/Bai-B520N01... 1.0 Bai-B520N015-5.jpg 0.039242335 0.36203983 0.0042010327 0.37699968 0.46601561 0.0 0.46742466 ... 0.15885612 0.13920899 0.16824563 3.6293392 0.10784438 0.0039167427 0.33787274 2.1860485 0.28497639 0.93075883
4 ../data_img_classes/class_red_pink/BAIYI-B1008... 1.0 BAIYI-B1008N289-5.jpg 0.43477067 0.33263576 0.0 0.40581283 0.14094441 0.017958783 0.63126558 ... 0.70636362 0.19716582 0.12621519 4.4085803 0.11626053 0.0 0.27553368 1.5186014 0.13309085 1.9339614

5 rows × 2051 columns


In [43]:
df.columns.values[-1]


Out[43]:
'x2047'

In [44]:
print('target class')
plt.figure(figsize=(12,3))
for i in range(5):
    img_path= df['img_path'][i+1]
    img = image.load_img(img_path, target_size=(224, 224))
    plt.subplot(1,5,i+1)
    plt.imshow(img)
    plt.grid(b=False)


target class

Split Set Assessment


In [45]:
# split data 
xcolumns=['x'+str(i) for i in np.arange(2048)]
X = df.loc[:,xcolumns].as_matrix().astype('float')
y= df.loc[:,'label'].as_matrix().astype('float')
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,y,stratify=y,test_size=.33)
print(' training shape {0} \n testing shape {1}').format(X_train.shape,X_test.shape)
print('\n target/non-target \n (train) {0}\{1} \n (test) {2}\{3}').format(y_train.sum(),(1-y_train).sum(),y_test.sum(),(1-y_test).sum())


 training shape (338, 2048) 
 testing shape (167, 2048)

 target/non-target 
 (train) 41.0\297.0 
 (test) 21.0\146.0

In [46]:
# Train
C = 1.0
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
clf_LR.fit(X_train, y_train)

# test 
y_pred = clf_LR.predict(X_test)
(TP,FP,TN,FN) =perf_measure(y_test,y_pred)
print('TruePos:{0}\nFalsePos:{1}\nTrueNeg:{2}\nFalseNeg:{3}').format(TP,FP,TN,FN)


TruePos:13
FalsePos:4
TrueNeg:142
FalseNeg:8
  • classification performance is mucher better on this dataset

Permutation Assessment


In [47]:
from sklearn.model_selection import StratifiedKFold
C = 1.0
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
skf = StratifiedKFold(n_splits=5,shuffle=True)
score, permutation_scores, pvalue = permutation_test_score(
    clf_LR, X, y, scoring="accuracy", cv=skf, n_permutations=100)

In [48]:
plt.hist(permutation_scores)
plt.axvline(score)
sns.despine()
plt.xlabel('accuracy')
plt.title('permutation test on test set classification')
print(pvalue)


0.00990099009901

Re-train on whole dataset


In [49]:
C = 1.0
clf_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
clf_LR.fit(X, y)


Out[49]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l1', random_state=None, solver='liblinear', tol=0.01,
          verbose=0, warm_start=False)

In [50]:
coef = clf_LR.coef_[0,:]
plt.figure(figsize=(12,3))
sns.set_style('white')
plt.scatter(np.arange(len(coef)),coef)
plt.xlabel('nnet feature')
plt.ylabel('LogReg coefficient')
sns.despine()



In [51]:
len(coef)


Out[51]:
2048

Save out


In [52]:
np.savetxt('../data_nn_features/class_weights_LR_redpink.txt',coef)

Save


In [28]:
%%bash
jupyter nbconvert --to html Training_Network_to_Idenfitying_HandPicked_Classes.ipynb && mv Training_Network_to_Idenfitying_HandPicked_Classes.html ../notebook_htmls/Training_Network_to_Idenfitying_HandPicked_Classes_v2.html
cp Training_Network_to_Idenfitying_HandPicked_Classes.ipynb ../notebook_versions/Training_Network_to_Idenfitying_HandPicked_Classes_v2.ipynb


[NbConvertApp] Converting notebook Training_Network_to_Idenfitying_HandPicked_Classes.ipynb to html
[NbConvertApp] Writing 663582 bytes to Training_Network_to_Idenfitying_HandPicked_Classes.html