In [16]:

    
import skimage.measure as sk
from skimage import io, color
import matplotlib.pyplot as plt



In [26]:

    
path = "/Users/sreejithmenon/Dropbox/Social_Media_Wildlife_Census/All_Zebra_Count_Images/"

img1 = io.imread(path+"167.jpeg")
img2 = io.imread(path+"168.jpeg")



In [27]:

    
# STRUCTURAL SIMILARITY; higher means similar 

IMG1 = color.rgb2gray(img1)
IMG2 = color.rgb2gray(img2)
print(sk.compare_ssim(IMG1, IMG2))









    



0.628246714933



In [28]:

    
## SQUARED ERRORS; lower means similar

# difference in colored images
print(sk.simple_metrics.compare_nrmse(img1, img2))
print(sk.simple_metrics.compare_mse(img1, img2))

# difference in gray scale images
print(sk.simple_metrics.compare_nrmse(IMG1, IMG2))
print(sk.simple_metrics.compare_mse(IMG1, IMG2))









    



0.197093739088
900.735216392
0.186315448481
0.0128908777034



In [ ]:

    
fig = plt.figure("Images")

ax = fig.add_subplot(1,2,1)
plt.imshow(img1, cmap=plt.cm.gray)

ax = fig.add_subplot(1,2,2)
plt.imshow(img2, cmap=plt.cm.gray)
plt.show()



In [36]:

    
import PopulationEstimatorAPI as PE, ClassiferHelperAPI as CH

regrArgs = {'linear' : {'fit_intercept' : True},
            'ridge' : {'fit_intercept' : True},
            'lasso' : {'fit_intercept' : True},
            'elastic_net' : {'fit_intercept' : True},
            'svr' : {'fit_intercept' : True},
            'dtree_regressor' : {'fit_intercept' : True}}



In [38]:

    
train_fl = "../data/BeautyFtrVector_GZC_Expt2.csv"
test_fl = "../data/GZC_exifs_beauty_full.csv"

methObj,predResults = CH.trainTestRgrs(train_fl,
                                test_fl,
                                'linear',
                                'beauty',
                                infoGainFl="../data/infoGainsExpt2.csv",
                                methArgs = regrArgs
                                )









    



Number of outliers identified: 0
6524 6524



In [40]:

    
predResults['1'], predResults['2']









    Out[40]:





(46.30627117346981, 45.92882939519987)

Adding logic to filter out human images

The idea is that, human images are far more easily shared than animal images, in total there are ~459 images and it could bring in a greater accuracy or in turn a lower number of images that are required for convergence.



In [13]:

    
import pandas as pd, numpy as np
import ClassifierCapsuleClass as ClfClass, ClassiferHelperAPI as CH
clfArgs = {'dummy' : {'strategy' : 'most_frequent'},
            'bayesian' : {'fit_prior' : True},
            'logistic' : {'penalty' : 'l2'},
            'svm' : {'kernel' : 'rbf','probability' : True},
            'dtree' : {'criterion' : 'entropy'},
            'random_forests' : {'n_estimators' : 10 },
            'ada_boost' : {'n_estimators' : 50 }}

methodName = 'logistic'



In [15]:

    
train_data_fl = "../data/BeautyFtrVector_GZC_Expt2.csv"
train_x = pd.DataFrame.from_csv(train_data_fl)
        
train_x = train_x[(train_x['Proportion'] >= 80.0) | (train_x['Proportion'] <= 20.0)]
train_x['TARGET'] = np.where(train_x['Proportion'] >= 80.0, 1, 0)

train_y = train_x['TARGET']
train_x.drop(['Proportion','TARGET'],1,inplace=True)        
clf = CH.getLearningAlgo(methodName,clfArgs.get(methodName,None))
lObj = ClfClass.ClassifierCapsule(clf,methodName,0.0,train_x,train_y,None,None)



In [16]:

    
test_data_fl = "../data/GZC_exifs_beauty_full.csv"
testDf = pd.DataFrame.from_csv(test_data_fl)

testDataFeatures = testDf[lObj.train_x.columns]



In [22]:

    
with open("../data/HumanImagesException.csv", "r") as HImgs:
    h_img_list = HImgs.read().split("\n")

h_img_list = list(map(int, h_img_list))



In [25]:

    
len(set(testDataFeatures.index) - set(h_img_list))









    Out[25]:





6491



In [31]:

    
count = 0
for i in h_img_list:
    if i in testDataFeatures.index:
        count += 1
print(count)



In [32]:

    
len(testDf)









    Out[32]:





6524



In [33]:

    
testDataFeatures.index = set(testDataFeatures.index) - set(h_img_list)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-33-6279b272a156> in <module>()
----> 1 testDataFeatures.index = set(testDataFeatures.index) - set(h_img_list)

/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/pandas/core/generic.py in __setattr__(self, name, value)
   2683         try:
   2684             object.__getattribute__(self, name)
-> 2685             return object.__setattr__(self, name, value)
   2686         except AttributeError:
   2687             pass

pandas/src/properties.pyx in pandas.lib.AxisProperty.__set__ (pandas/lib.c:44748)()

/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/pandas/core/generic.py in _set_axis(self, axis, labels)
    426 
    427     def _set_axis(self, axis, labels):
--> 428         self._data.set_axis(axis, labels)
    429         self._clear_item_cache()
    430 

/Users/sreejithmenon/anaconda/lib/python3.5/site-packages/pandas/core/internals.py in set_axis(self, axis, new_labels)
   2633             raise ValueError('Length mismatch: Expected axis has %d elements, '
   2634                              'new values have %d elements' %
-> 2635                              (old_len, new_len))
   2636 
   2637         self.axes[axis] = new_labels

ValueError: Length mismatch: Expected axis has 6524 elements, new values have 6491 elements



In [ ]:

    
obj