In [1]:
import collections as cl

import faiss
import numpy as np
import torch as th

from misc import load_sift, save_sift

Load vectors extracted from fasttext


In [30]:
xq = load_sift('../data/siftLSHTC/predictions.hid.fvecs', dtype=np.float32)
xb = load_sift('../data/siftLSHTC/predictions.wo.fvecs', dtype=np.float32)
xb = np.ascontiguousarray(xb.T)

n, d, c = xq.shape[0], xq.shape[1], xb.shape[1]

print(f"Loaded dataset of {n:_}, {d:_}-dimensionsl queries (examples)")
print(f"The dataset contains {c:_} classes, and more than one class can be positive")


Loaded dataset of 566_010, 256-dimensionsl queries (examples)
The dataset contains 163_679 classes, and more than one class can be positive

Load groundtruth


In [3]:
gt = []
for line in open('../data/siftLSHTC/predictions.labels.txt'):
    gt.append({int(y) for y in line.split()})

Evaluate matmul approach

Compute scores


In [4]:
%%time

BATCH_SIZE = 1024
K = 1

tq = th.from_numpy(xq).cuda()
tb = th.from_numpy(xb).cuda()
ti = th.cuda.LongTensor(tq.shape[0], K)

start_idx = 0
while start_idx < tq.shape[0]:
    stop_idx = min(start_idx + BATCH_SIZE, tq.shape[0])
    
    scores = tq[start_idx:stop_idx, :] @ tb
    D, I = th.topk(scores, K)
    
    ti[start_idx:stop_idx, :] = I    
    
    start_idx = stop_idx
    
ti = ti.cpu()    
th.cuda.synchronize()


CPU times: user 36.8 s, sys: 14.9 s, total: 51.7 s
Wall time: 51.7 s

Evaluate p@1


In [5]:
p1 = 0.
for i, item in enumerate(ti.cpu().numpy()):
    p1 += float(int(item[0]) in gt[i])
    
p1 /= len(gt)

print(f'Precision @ 1: {p1}')


Precision @ 1: 0.30991325241603507

Evaluate GPU-Flat

Setup


In [6]:
%%time

if xb.shape[1] > xb.shape[0]:
    xb = np.ascontiguousarray(xb.T)

res = faiss.StandardGpuResources()
flat_config = faiss.GpuIndexFlatConfig()
flat_config.device = 0

index = faiss.GpuIndexFlatIP(res, xb.shape[1], flat_config)
index.add(xb)

res.syncDefaultStream(0)


CPU times: user 220 ms, sys: 68 ms, total: 288 ms
Wall time: 285 ms

Warmup


In [7]:
_ = index.search(xq, 1)

In [8]:
%%time

D, I = index.search(xq, 1)


CPU times: user 20.3 s, sys: 1.13 s, total: 21.5 s
Wall time: 21.5 s

Evaluate


In [9]:
p1 = 0.
for i, item in enumerate(I):
    p1 += float(int(item) in gt[i])
    
p1 /= len(gt)

print(f'Precision @ 1: {p1}')


Precision @ 1: 0.30991325241603507

Evaluate GPU-Fast

Setup


In [10]:
%%time

if xb.shape[1] > xb.shape[0]:
    xb = np.ascontiguousarray(xb.T)

d = xb.shape[1]
    
res = faiss.StandardGpuResources()
flat_config = faiss.GpuIndexFlatConfig()
flat_config.device = 0
co = faiss.GpuClonerOptions()

index = faiss.index_factory(d, "IVF16384,Flat", faiss.METRIC_INNER_PRODUCT)
index = faiss.index_cpu_to_gpu(res, 0, index, co)

index.train(xb)
index.add(xb)
res.syncDefaultStream(0)


CPU times: user 14 s, sys: 1.26 s, total: 15.3 s
Wall time: 7.68 s

Warmup


In [11]:
_ = index.search(xq, 1)

In [15]:
%%time

index.setNumProbes(32)
D, I = index.search(xq, 1)


CPU times: user 2.99 s, sys: 188 ms, total: 3.18 s
Wall time: 3.2 s

Evaluate


In [16]:
p1 = 0.
for i, item in enumerate(I):
    p1 += float(int(item) in gt[i])
    
p1 /= len(gt)

print(f'Precision @ 1: {p1}')


Precision @ 1: 0.3039946290701578

Evaluate CPU Fast

Setup


In [17]:
%%time

if xb.shape[1] > xb.shape[0]:
    xb = np.ascontiguousarray(xb.T)

d = xb.shape[1]

index = faiss.index_factory(d, "IVF16384,Flat", faiss.METRIC_INNER_PRODUCT)

index.train(xb)
index.add(xb)


CPU times: user 3min 17s, sys: 8.62 s, total: 3min 25s
Wall time: 51.8 s

In [25]:
%%time

index.nprobe = 32
D, I = index.search(xq, 1)


CPU times: user 1min 30s, sys: 3.43 s, total: 1min 34s
Wall time: 23.5 s

Evaluate


In [26]:
p1 = 0.
for i, item in enumerate(I):
    p1 += float(int(item) in gt[i])
    
p1 /= len(gt)

print(f'Precision @ 1: {p1}')


Precision @ 1: 0.3041077012773626

In [31]:
pwd


Out[31]:
'/home/elan/Mine/university/inzynierka/mips/tests'