In [8]:
import ibeis
import six
import vtool
import utool
import numpy as np
import numpy.linalg as npl  # NOQA
import pandas as pd
np.set_printoptions(precision=2)
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 10)
pd.set_option('isplay.notebook_repr_html', True)
ibeis.ensure_pz_mtest()


have mtest_dir='F:/data/work/PZ_MTEST'

In [9]:
def make_annot_df(ibs):
    aid_list = ibs.get_valid_aids()
    _kpts_col = pd.DataFrame(ibs.get_annot_kpts(aid_list),
                             index=aid_list, columns=['kpts'])
    _vecs_col = pd.DataFrame(ibs.get_annot_desc(aid_list),
                             index=aid_list, columns=['vecs'])
    annots_df = pd.concat([_vecs_col, _kpts_col], axis=1)
    return annots_df

In [10]:
def learn_visual_words(annots_df, train_aids, nCentroids):
    from vtool import clustering2 as clustertool
    clustertool.rrr()
    vecs_list = annots_df['vecs'][train_aids].as_matrix()
    train_vecs = np.vstack(vecs_list)
    print('Training %d word vocabulary with %d annots and %d descriptors' %
          (nCentroids, len(train_aids), len(train_vecs)))
    words = clustertool.precompute_akmeans(train_vecs, nCentroids, max_iters=100)
    return words

In [14]:
def index_data_annots(annots_df, daids, words):
    from vtool import nearest_neighbors as nntool
    vecs_list = annots_df['vecs'][daids]
    flann_params = {}
    wordflann = vtool.nearest_neighbors.flann_cache(words, flann_params=flann_params)
    idx2_vec, idx2_ax, idx2_fx = nntool.invertable_stack(vecs_list, daids)
    ax2_aid = np.array(daids)
    wx2_idxs = inverted_assignments(wordflann, idx2_vec)
    print('Vectors per word')
    print(utool.stats_str(map(len, wx2_idxs)))
    invindex = InvertedIndex(words, wordflann, idx2_vec, idx2_ax, idx2_fx, ax2_aid)
    return invindex


def inverted_assignments(wordflann, idx2_vec):
    idx2_wx, _idx2_wdist = wordflann.nn_index(idx2_vec, 1)
    idx_list = list(range(len(idx2_wx)))
    # TODO: replace with pandas groupby
    wx2_idxs_dict = utool.group_items(idx_list, idx2_wx.tolist())
    wx2_idxs = wx2_idxs_dict.values()
    return wx2_idxs


@six.add_metaclass(utool.ReloadingMetaclass)
class InvertedIndex(object):
    def __init__(invindex, words, wordflann, idx2_vec, idx2_ax, idx2_fx, ax2_aid):
        invindex.wordflann = wordflann
        invindex.words     = words
        invindex.ax2_aid   = ax2_aid
        invindex.idx2_vec  = idx2_vec
        invindex.idx2_ax   = idx2_ax
        invindex.idx2_fx   = idx2_fx
        invindex.wx2_idxs = invindex.inverted_assignments(idx2_vec)

    def inverted_assignments(invindex, idx2_vec):
        return inverted_assignments(invindex.wordflann, idx2_vec)

In [12]:
def main():
    ibs = ibeis.opendb('PZ_MTEST')
    annots_df = make_annot_df(ibs)
    valid_aids = annots_df.index
    # Training set
    train_aids = valid_aids[0:20:2]
    # Database set
    daids  = valid_aids[3::2]
    # Search set
    qaids = valid_aids[0::2]
    nCentroids = 10
    words = learn_visual_words(annots_df, train_aids, nCentroids)
    invindex = index_data_annots(annots_df, daids, words)
    return locals()

In [15]:
main_locals = main()
main_execstr = utool.execstr_dict(main_locals, 'main_locals')
exec(main_execstr)


[ibs._init_dirs] ibs.dbdir = 'F:\\data\\work\\PZ_MTEST'
[ensure_correct_version] Database version: u'1.0.1' | Expected version: '1.0.1' 
[ibs] building default config
RELOAD: [clustering2] __name__=vtool.clustering2
Training 10 word vocabulary with 10 annots and 10676 descriptors
[akmeans] pre_akmeans()
Warning: no cache dir specified
[cache] * load_cPkl('akmeans_centroids_FLANN()_DPTS((10676,128)%0dytztceyvefc3w).cPkl', data)
[akmeans.precompute] load successful
Warning: no cache dir specified
...flann_cache cfgstr = '_FLANN()_DPTS((10,128)a1+9sr9@npqz+38u)': 
...flann cache hit
{'max': 6739.0, 'min': 4796.0, 'mean': 5892.3, 'std': 585.318, 'nMin': 1, 'nMax': 1, 'shape': (10,)}