In [15]:

    
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import numpy as np
import itertools
import boto3
%matplotlib inline



In [2]:

    
labels = pd.read_csv("s3://dogfaces/tensor_model/output_labels_20170907.txt", names=["dog_breed"])
labels.head()









    Out[2]:






  
    
      
      dog_breed
    
  
  
    
      0
      rottweiler
    
    
      1
      bull mastiff
    
    
      2
      french bulldog
    
    
      3
      cairn
    
    
      4
      yorkshire terrier

Get confusion matrix



In [3]:

    
df_conf = pd.read_csv("s3://dogfaces/tensor_model/test_result_20170907.txt", sep=" ", names=["image_name", "pred", "actual"])



In [4]:

    
df_conf.tail()









    Out[4]:






  
    
      
      image_name
      pred
      actual
    
  
  
    
      953
      train_images/West_Highland_white_terrier/n0209...
      52
      52
    
    
      954
      train_images/West_Highland_white_terrier/n0209...
      52
      52
    
    
      955
      train_images/West_Highland_white_terrier/n0209...
      52
      52
    
    
      956
      train_images/West_Highland_white_terrier/n0209...
      52
      52
    
    
      957
      train_images/West_Highland_white_terrier/n0209...
      52
      52



In [5]:

    
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    '''
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    '''

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')



In [6]:

    
y_pred = df_conf['pred']
y_true = df_conf['actual']



In [7]:

    
# Compute confusion matrix
cnf_matrix = confusion_matrix(y_true, y_pred)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
#plt.figure(figsize=(10,10))
#plot_confusion_matrix(cnf_matrix, classes=labels.dog_breed,
#                      title='Confusion matrix, without normalization')
#plt.show()

#Plot normalized confusion matrix
plt.figure(figsize=(20,10))
plot_confusion_matrix(cnf_matrix, classes=labels.dog_breed, normalize=True,
                      title='Normalized confusion matrix')

plt.show()









    



Normalized confusion matrix
[[ 1.    0.    0.   ...,  0.    0.    0.  ]
 [ 0.    1.    0.   ...,  0.    0.    0.  ]
 [ 0.    0.    0.94 ...,  0.    0.    0.  ]
 ..., 
 [ 0.    0.    0.   ...,  1.    0.    0.  ]
 [ 0.    0.    0.   ...,  0.    0.85  0.  ]
 [ 0.    0.    0.   ...,  0.    0.    1.  ]]

Examine model images



In [8]:

    
breed_names = labels.dog_breed.values



In [9]:

    
df_revpics = pd.read_csv("s3://dogfaces/reviews/labeled_pictures.csv")
df_revpics.head()









    Out[9]:






  
    
      
      pic_names
      probability
    
  
  
    
      0
      0090-en_us_1395941_photo.jpg
      [0.995804, 0.00103013, 8.67468e-06, 1.25309e-0...
    
    
      1
      0090-en_us_2278266_photo.jpg
      [0.000183522, 0.00523787, 0.0364081, 0.0026475...
    
    
      2
      0090-en_us_1719425_photo.jpg
      [0.0147026, 0.000886808, 0.00103998, 0.0013103...
    
    
      3
      0090-en_us_1892461_photo.jpg
      [0.000591641, 0.00258964, 0.000426093, 0.00229...
    
    
      4
      0090-en_us_304189_photo.jpg
      [0.0014609, 0.00287083, 0.00435399, 0.00102825...



In [10]:

    
def prob2num(row):
    return map(lambda x:float(x), row.lstrip('[').rstrip(']').split(','))
def getTopbreed(prob,k, thres=0.1):
    prob_num = prob2num(prob)
    prob_index = np.argsort(prob_num)[-k]
    pred = breed_names[prob_index]
    prob_digi = prob_num[prob_index]
    if prob_digi > thres:
        return pred
    else:
        return 'unknown'



In [11]:

    
df_a = pd.DataFrame()



In [12]:

    
df_a['top1'] = df_revpics['probability'].apply(lambda row:getTopbreed(row, 1))



In [13]:

    
df_a['top1'].head()









    Out[13]:





0                   rottweiler
1    american pit bull terrier
2           miniature pinscher
3    american pit bull terrier
4                     malinois
Name: top1, dtype: object



In [14]:

    
df_a['top1'].value_counts()









    Out[14]:





unknown                        421
american pit bull terrier      192
labrador retriever             173
german shepherd                137
golden retriever               105
beagle                          97
chihuahua                       85
havanese                        84
american bulldog                76
toy poodle                      73
miniature pinscher              73
shih tzu                        69
whippet                         66
french bulldog                  65
doberman                        56
shiba inu                       53
yorkshire terrier               51
rottweiler                      50
boxer                           48
great dane                      44
malinois                        43
border collie                   42
german shorthaired              32
rhodesian ridgeback             30
west highland white terrier     27
miniature schnauzer             25
siberian husky                  25
miniature poodle                22
vizsla                          21
soft coated wheaten terrier     21
collie                          21
pug                             21
bull mastiff                    20
pomeranian                      20
newfoundland                    19
scottish terrier                18
bernese mountain dog            17
chesapeake bay retriever        16
english springer                16
weimaraner                      15
shetland sheepdog               15
standard poodle                 15
papillon                        14
great pyrenees                  14
cairn                           14
wheaten terrier                 13
basset                          10
samoyed                          9
malamute                         9
bloodhound                       7
tibetan mastiff                  5
english foxhound                 3
chow                             2
Name: top1, dtype: int64



In [96]:

    
df_a['top2'] = df_revpics['probability'].apply(lambda row:getTopbreed(row, 2, thres=0.05))



In [97]:

    
df_a['top2'].value_counts()









    Out[97]:





unknown                        664
american pit bull terrier      131
american bulldog               120
miniature pinscher             107
chihuahua                       96
labrador retriever              96
havanese                        82
beagle                          80
shih tzu                        72
malinois                        62
whippet                         60
toy poodle                      59
shiba inu                       59
collie                          53
german shepherd                 46
boxer                           46
doberman                        45
golden retriever                41
chesapeake bay retriever        41
scottish terrier                38
miniature poodle                37
wheaten terrier                 36
standard poodle                 34
great dane                      33
german shorthaired              32
soft coated wheaten terrier     30
rhodesian ridgeback             25
french bulldog                  23
miniature schnauzer             22
yorkshire terrier               21
newfoundland                    21
bull mastiff                    20
siberian husky                  20
pug                             20
pomeranian                      20
papillon                        17
tibetan mastiff                 17
vizsla                          17
rottweiler                      17
shetland sheepdog               17
malamute                        16
basset                          15
english springer                15
border collie                   14
weimaraner                      14
west highland white terrier     11
english foxhound                11
cairn                           10
bernese mountain dog             9
bloodhound                       8
great pyrenees                   7
basset hound                     7
samoyed                          3
chow                             2
Name: top2, dtype: int64



In [99]:

    
df_a['top3'] = df_revpics['probability'].apply(lambda row:getTopbreed(row, 3, thres=0.05))



In [100]:

    
df_a['top3'].value_counts()









    Out[100]:





unknown                        1351
american pit bull terrier        75
chihuahua                        64
miniature pinscher               63
beagle                           57
american bulldog                 57
havanese                         57
labrador retriever               55
whippet                          54
shiba inu                        48
shih tzu                         40
boxer                            37
toy poodle                       30
french bulldog                   29
chesapeake bay retriever         28
german shepherd                  27
collie                           27
doberman                         26
malinois                         25
golden retriever                 24
wheaten terrier                  23
soft coated wheaten terrier      23
standard poodle                  22
rottweiler                       22
great dane                       21
yorkshire terrier                21
siberian husky                   20
border collie                    20
rhodesian ridgeback              18
newfoundland                     17
miniature poodle                 16
west highland white terrier      16
cairn                            14
german shorthaired               14
basset                           14
bull mastiff                     13
english springer                 13
shetland sheepdog                13
pug                              13
scottish terrier                 13
vizsla                           12
miniature schnauzer              11
pomeranian                       10
basset hound                     10
malamute                          9
great pyrenees                    8
english foxhound                  8
tibetan mastiff                   7
bernese mountain dog              6
papillon                          5
chow                              5
samoyed                           4
bloodhound                        4
Name: top3, dtype: int64



In [101]:

    
df_a.shape









    Out[101]:





(2619, 3)



In [102]:

    
df_calc = pd.concat([df_revpics['pic_names'], df_a], axis=1)



In [103]:

    
df_calc[df_calc['top1']=='unknown'].sample(10)









    Out[103]:






  
    
      
      pic_names
      top1
      top2
      top3
    
  
  
    
      1479
      0090-en_us_2409909_photo.jpg
      unknown
      collie
      unknown
    
    
      722
      0090-en_us_2393501_photo.jpg
      unknown
      chihuahua
      french bulldog
    
    
      2322
      0090-en_us_968270_photo.jpg
      unknown
      unknown
      unknown
    
    
      2425
      0090-en_us_269147_photo.jpg
      unknown
      unknown
      unknown
    
    
      1923
      0090-en_us_963563_photo.jpg
      unknown
      tibetan mastiff
      unknown
    
    
      2010
      0090-en_us_2379105_photo.jpg
      unknown
      unknown
      unknown
    
    
      1804
      0090-en_us_943117_photo.jpg
      unknown
      unknown
      unknown
    
    
      325
      0090-en_us_2404039_photo.jpg
      unknown
      collie
      shih tzu
    
    
      411
      0090-en_us_906430_photo.jpg
      unknown
      miniature pinscher
      chihuahua
    
    
      1978
      0090-en_us_924606_photo.jpg
      unknown
      unknown
      unknown

Display images



In [52]:

    
s3 = boto3.client("s3")



In [53]:

    
def getReviewPicFromS3(pic_name, s3):
    pic_Key = "reviews/review_pics/"+pic_name
    pic_object = s3.get_object(Bucket='dogfaces', Key=pic_Key)
    pic_content = pic_object['Body'].read()
    return pic_content



In [61]:

    
test_name = "0090-en_us_1428965_photo.jpg"
pic_to_show = getReviewPicFromS3(test_name, s3)
with open('photo.jpg', 'wb') as f:
    f.write(pic_to_show)



In [146]:

    
# plot classified pictures
samples = df_calc[df_calc['top1']!='unknown'].sample(9)
image_names = samples.pic_names.values
breeds_1 = samples.top1.values
breeds_2 = samples.top2.values
fig, axes = plt.subplots(3,3, figsize=(10,10))
for img_name,breed1,breed2, ax in zip(image_names, breeds_1,breeds_2, axes.flatten()):
    pic_to_show = getReviewPicFromS3(img_name, s3)
    with open('photo.jpg', 'wb') as f:
        f.write(pic_to_show)
    ax.imshow(plt.imread('photo.jpg'))
    ax.axis("off")
    ax.set_title(breed1+"\n"+breed2)
plt.tight_layout()
plt.show()

Examine user information



In [58]:

    
df_users = pd.read_csv("s3://dogfaces/reviews/reviews.csv")



In [60]:

    
len(df_users['user_name'].unique())









    Out[60]:





32702



In [61]:

    
df_users.shape









    Out[61]:





(61202, 7)



In [62]:

    
df_users['user_name'].value_counts()









    Out[62]:





Lisa             86
Sandy            68
Chris            67
Debbie           67
Linda            63
Bella            63
Mary             63
Cindy            63
Sarah            61
Kathy            61
Kate             61
Karen            61
Katie            58
Dogmom           58
Daisy            56
Barb             55
Jenn             54
Kelly            53
Carol            53
Jess             53
Amanda           51
Lori             49
Laura            49
Nancy            48
Doglover         47
Michelle         46
Tina             45
Donna            45
Ashley           45
Beth             45
                 ..
Kongpiggy         1
BChaney           1
RHONCOS           1
DalMom            1
babbleball        1
AbbieRose         1
rosie16           1
Ilene             1
Vcolo             1
Dynamite          1
Mysticks1         1
upjumplick        1
alisonp           1
scarletaurora     1
CourtMcKenz       1
Jasmine474        1
NotClassy         1
Macelle           1
mom22             1
Phreddie          1
mom24             1
Rhonda54          1
PitPal            1
neecnrs           1
wesmorkids        1
brwnneyedgrl      1
AB23              1
BJan              1
msternod          1
Bob61             1
Name: user_name, dtype: int64



In [80]:

    
a= [1,2,3]
zip(a,a,a)









    Out[80]:





[(1, 1, 1), (2, 2, 2), (3, 3, 3)]



In [ ]:

	dog_breed
0	rottweiler
1	bull mastiff
2	french bulldog
3	cairn
4	yorkshire terrier

	image_name	pred	actual
953	train_images/West_Highland_white_terrier/n0209...	52	52
954	train_images/West_Highland_white_terrier/n0209...	52	52
955	train_images/West_Highland_white_terrier/n0209...	52	52
956	train_images/West_Highland_white_terrier/n0209...	52	52
957	train_images/West_Highland_white_terrier/n0209...	52	52

	pic_names	probability
0	0090-en_us_1395941_photo.jpg	[0.995804, 0.00103013, 8.67468e-06, 1.25309e-0...
1	0090-en_us_2278266_photo.jpg	[0.000183522, 0.00523787, 0.0364081, 0.0026475...
2	0090-en_us_1719425_photo.jpg	[0.0147026, 0.000886808, 0.00103998, 0.0013103...
3	0090-en_us_1892461_photo.jpg	[0.000591641, 0.00258964, 0.000426093, 0.00229...
4	0090-en_us_304189_photo.jpg	[0.0014609, 0.00287083, 0.00435399, 0.00102825...

	pic_names	top1	top2	top3
1479	0090-en_us_2409909_photo.jpg	unknown	collie	unknown
722	0090-en_us_2393501_photo.jpg	unknown	chihuahua	french bulldog
2322	0090-en_us_968270_photo.jpg	unknown	unknown	unknown
2425	0090-en_us_269147_photo.jpg	unknown	unknown	unknown
1923	0090-en_us_963563_photo.jpg	unknown	tibetan mastiff	unknown
2010	0090-en_us_2379105_photo.jpg	unknown	unknown	unknown
1804	0090-en_us_943117_photo.jpg	unknown	unknown	unknown
325	0090-en_us_2404039_photo.jpg	unknown	collie	shih tzu
411	0090-en_us_906430_photo.jpg	unknown	miniature pinscher	chihuahua
1978	0090-en_us_924606_photo.jpg	unknown	unknown	unknown