In [1]:
import ibeis
import vtool
import utool
import numpy as np
import numpy.linalg as npl
np.set_printoptions(precision=2)

In [2]:
ibeis.ensure_pz_mtest()
ibs = ibeis.opendb('PZ_MTEST')


have mtest_dir='F:/data/work/PZ_MTEST'
[ibs._init_dirs] ibs.dbdir = 'F:\\data\\work\\PZ_MTEST'
[ensure_correct_version] Database version: u'1.0.1' | Expected version: '1.0.1' 
[ibs] building default config

In [3]:
taids = ibs.get_valid_aids()
tvecs_list = ibs.get_annot_desc(taids)
tvec_list = np.vstack(tvecs_list)
print(len(tvecs_list))
print(len(tvec_list))


119
120590

In [4]:
#import pyflann
#flann = pyflann.FLANN()
#help(flann.kmeans)
#centroids = flann.kmeans(tvec_list, num_clusters=1000, max_iterations=100)

In [5]:
labels, words = vtool.clustering.precompute_akmeans(tvec_list, 1000, 30, cache_dir='.')


[akmeans] pre_akmeans()
[cache] * load_cPkl('akmeans_clusters_FLANN()_DPTS((120590,128)0&q+ina20q33dfii).cPkl', data)
[cache] * load_cPkl('akmeans_datax2cl_FLANN()_DPTS((120590,128)0&q+ina20q33dfii).cPkl', data)
[akmeans.precompute] load successful

In [6]:
print(words.shape)


(1000, 128)

In [7]:
help(vtool.nearest_neighbors.flann_cache)


Help on function flann_cache in module vtool.nearest_neighbors:

flann_cache(dpts, cache_dir=None, cfgstr='', flann_params=None, use_cache=True, save=True, use_params_hash=True, use_data_hash=True)
    Tries to load a cached flann index before doing anything
    from vtool.nn


In [8]:
help(vtool.clustering.precompute_akmeans)


Help on function precompute_akmeans in module vtool.clustering:

precompute_akmeans(data, num_clusters, max_iters=5, flann_params={}, cache_dir=None, force_recomp=False, use_data_hash=True, cfgstr='', refine=False, akmeans_cfgstr=None)
    precompute aproximate kmeans with builtin caching


In [9]:
centroid_flann = vtool.nearest_neighbors.flann_cache(words, cache_dir='.', flann_params={})


...flann_cache cfgstr = '_FLANN()_DPTS((1000,128)f6rp+s93u4pl72o7)': 
...flann cache hit

In [10]:
indexes, dists = centroid_flann.nn_index(tvec_list, 1) 
print(indexes.shape)


(120590,)

In [11]:
wx2_tvec = utool.group_items(tvec_list.tolist(), indexes.tolist())

In [12]:
word_index = list(wx2_tvec.keys())[0]
vecs = np.array(wx2_tvec[word_index], dtype=np.float64)
word = np.array(words[word_index], dtype=np.float64)
residuals = np.array([word - vec for vec in vecs])
residuals_n = vtool.linalg.normalize_rows(residuals)
#print(vecs)
#print(residuals)
print(residuals_n)
print(residuals_n.shape)
print((residuals_n ** 2).sum(-1))
#print(word)


[[ 0.14  0.06  0.04 ..., -0.01  0.06  0.06]
 [ 0.15  0.06  0.04 ...,  0.03  0.01 -0.09]
 [ 0.15  0.06  0.03 ...,  0.02 -0.18  0.02]
 ..., 
 [-0.25 -0.15  0.03 ...,  0.    0.02  0.06]
 [-0.3   0.07  0.04 ..., -0.01  0.02 -0.05]
 [-0.25  0.03  0.03 ...,  0.03  0.07  0.05]]
(93, 128)
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.]

In [13]:
# Compare each residual vec to all others in this word index
rvecs = residuals_n
similarity_matrix = (rvecs.dot(rvecs.T))
print(similarity_matrix)
print(similarity_matrix.shape)


[[ 1.    0.24 -0.12 ..., -0.18 -0.03 -0.1 ]
 [ 0.24  1.   -0.01 ..., -0.35  0.4  -0.1 ]
 [-0.12 -0.01  1.   ..., -0.01 -0.29  0.01]
 ..., 
 [-0.18 -0.35 -0.01 ...,  1.   -0.06 -0.03]
 [-0.03  0.4  -0.29 ..., -0.06  1.    0.38]
 [-0.1  -0.1   0.01 ..., -0.03  0.38  1.  ]]
(93, 93)

In [13]:


In [13]:


In [13]:


In [13]:


In [13]:


In [13]: