In [1]:
from data import num_sift_kp, lazy_df
df = lazy_df()
print('num_sift_kp=' + str(num_sift_kp))
print df.dtypes


num_sift_kp=30
name                      object
class                   category
sift_kp_descriptors       object
red_histogram             object
green_histogram           object
blue_histogram            object
hue_histogram             object
saturation_histogram      object
value_histogram           object
dtype: object

In [2]:
cid_filename = dict()
for k, v in zip(df['class'].astype('int8'), df['name']):
    if k not in cid_filename:
        cid_filename[k] = set()
    cid_filename[k].add(v)
cid_filename


Out[2]:
{0: {'image001.JPG', 'image002.JPG', 'image003.JPG', 'image004.JPG'},
 1: {'image006.JPG', 'image007.JPG', 'image008.JPG', 'image009.JPG'},
 2: {'image011.JPG', 'image012.JPG', 'image013.JPG', 'image014.JPG'},
 3: {'image016.JPG', 'image017.JPG', 'image018.JPG', 'image019.JPG'},
 4: {'image021.JPG', 'image022.JPG', 'image023.JPG', 'image024.JPG'},
 5: {'image031.JPG', 'image032.JPG', 'image033.JPG', 'image034.JPG'},
 6: {'image036.JPG', 'image037.JPG', 'image038.JPG', 'image039.JPG'},
 7: {'image041.JPG', 'image042.JPG', 'image043.JPG', 'image044.JPG'},
 8: {'image046.JPG', 'image047.JPG', 'image048.JPG', 'image049.JPG'},
 9: {'image051.JPG', 'image052.JPG', 'image053.JPG', 'image054.JPG'},
 10: {'image056.JPG', 'image057.JPG', 'image058.JPG', 'image059.JPG'},
 11: {'image061.JPG', 'image062.JPG', 'image063.JPG', 'image064.JPG'},
 12: {'image066.JPG', 'image067.JPG', 'image068.JPG', 'image069.JPG'},
 13: {'image071.JPG', 'image072.JPG', 'image073.JPG', 'image074.JPG'},
 14: {'image076.JPG', 'image077.JPG', 'image078.JPG', 'image079.JPG'},
 15: {'image081.JPG', 'image082.JPG', 'image083.JPG', 'image084.JPG'},
 16: {'image086.JPG', 'image087.JPG', 'image088.JPG', 'image089.JPG'},
 17: {'image091.JPG', 'image092.JPG', 'image093.JPG', 'image094.JPG'},
 18: {'image096.JPG', 'image097.JPG', 'image098.JPG', 'image099.JPG'},
 19: {'image101.JPG', 'image102.JPG', 'image103.JPG', 'image104.JPG'},
 20: {'image106.JPG', 'image107.JPG', 'image108.JPG', 'image109.JPG'},
 21: {'image111.JPG', 'image112.JPG', 'image113.JPG', 'image114.JPG'},
 22: {'image116.JPG', 'image117.JPG', 'image118.JPG', 'image119.JPG'},
 23: {'image121.JPG', 'image122.JPG', 'image123.JPG', 'image124.JPG'},
 24: {'image126.JPG', 'image127.JPG', 'image128.JPG', 'image129.JPG'},
 25: {'image131.JPG', 'image132.JPG', 'image133.JPG', 'image134.JPG'},
 26: {'image136.JPG', 'image137.JPG', 'image138.JPG', 'image139.JPG'},
 27: {'image141.JPG', 'image142.JPG', 'image143.JPG', 'image144.JPG'},
 28: {'image151.JPG', 'image152.JPG', 'image153.JPG', 'image154.JPG'},
 29: {'image156.JPG', 'image157.JPG', 'image158.JPG', 'image159.JPG'},
 30: {'image161.JPG', 'image162.JPG', 'image163.JPG', 'image164.JPG'},
 31: {'image166.JPG', 'image167.JPG', 'image168.JPG', 'image169.JPG'},
 32: {'image171.JPG', 'image172.JPG', 'image173.JPG', 'image174.JPG'},
 33: {'image176.JPG', 'image177.JPG', 'image178.JPG', 'image179.JPG'},
 34: {'image181.JPG', 'image182.JPG', 'image183.JPG', 'image184.JPG'},
 35: {'image186.JPG', 'image187.JPG', 'image188.JPG', 'image189.JPG'},
 36: {'image191.JPG', 'image192.JPG', 'image193.JPG', 'image194.JPG'},
 37: {'image196.JPG', 'image197.JPG', 'image198.JPG', 'image199.JPG'},
 38: {'image201.JPG', 'image202.JPG', 'image203.JPG', 'image204.JPG'},
 39: {'image206.JPG', 'image207.JPG', 'image208.JPG', 'image209.JPG'},
 40: {'image211.JPG', 'image212.JPG', 'image213.JPG', 'image214.JPG'},
 41: {'image217.JPG', 'image218.JPG', 'image219.JPG', 'image220.JPG'},
 42: {'image222.JPG', 'image223.JPG', 'image224.JPG', 'image225.JPG'},
 43: {'image227.JPG', 'image228.JPG', 'image229.JPG', 'image230.JPG'},
 44: {'image232.JPG', 'image233.JPG', 'image234.JPG', 'image235.JPG'},
 45: {'image237.JPG', 'image238.JPG', 'image239.JPG', 'image240.JPG'},
 46: {'image242.JPG', 'image243.JPG', 'image244.JPG', 'image245.JPG'},
 47: {'image247.JPG', 'image248.JPG', 'image249.JPG', 'image250.JPG'},
 48: {'image257.JPG', 'image258.JPG', 'image259.JPG', 'image260.JPG'},
 49: {'image262.JPG', 'image263.JPG', 'image264.JPG', 'image265.JPG'}}

In [3]:
import numpy as np
from collections import Counter
pair_counts = Counter()
with open('sift_incorrect_log.txt', mode='r') as h:
    for line in h:
        if line == '[] -> []':
            continue
        [correct, incorrect] = line.split(' -> ')
        correct = np.fromstring(correct[1:-1], dtype=np.uint8, sep=' ')
        incorrect = np.fromstring(incorrect[1:-1], dtype=np.uint8, sep=' ')
        pair_counts.update(zip(correct, incorrect))
pair_counts.most_common(20)  # 20 most common errors and their counts


Out[3]:
[((16, 37), 326),
 ((2, 39), 235),
 ((11, 46), 230),
 ((28, 31), 209),
 ((23, 8), 202),
 ((39, 37), 202),
 ((23, 36), 201),
 ((37, 16), 197),
 ((33, 46), 191),
 ((22, 38), 184),
 ((40, 16), 166),
 ((41, 16), 122),
 ((37, 39), 121),
 ((2, 4), 121),
 ((23, 25), 118),
 ((2, 21), 111),
 ((39, 31), 111),
 ((16, 24), 95),
 ((2, 28), 87),
 ((40, 24), 84)]

In [4]:
import matplotlib.pyplot as plt
%matplotlib inline
pairs, counts = zip(*pair_counts.most_common())
fig = plt.figure(figsize=(14, 7))
ax = fig.add_subplot(111)
ax.bar(np.arange(len(counts)), counts, color="green", alpha=0.75)
ax.grid()



In [5]:
from_counts = dict()
for k, v in zip(zip(*pairs)[0], counts):
    if k not in from_counts:
        from_counts[k] = 0
    from_counts[k] += v
sorted(from_counts.items(), key=lambda x: x[1], reverse=True)


Out[5]:
[(2, 821),
 (23, 571),
 (16, 562),
 (39, 487),
 (37, 318),
 (33, 277),
 (11, 272),
 (40, 250),
 (28, 209),
 (22, 196),
 (41, 152),
 (0, 3)]

In [ ]: