In [1]:
import pickle
import faiss
In [2]:
def load_data():
with open('movies.pickle', 'rb') as f:
data = pickle.load(f)
return data
data = load_data()
data
Out[2]:
In [3]:
# class FalconIndex():
# def __init__(self, vectors, labels):
# self.dimention = vectors.shape[1]
# self.vectors = vectors.astype('float32')
# self.labels = labels
# def build(self, number_of_partition=8, search_in_x_partitions=2, subvector_size=8):
# quantizer = faiss.IndexFlatL2(self.dimention)
# self.index = faiss.IndexIVFPQ(quantizer, self.dimention, number_of_partition, search_in_x_partitions, subvector_size)
# self.index.train(self.vectors)
# self.index.add(self.vectors)
# def query(self, vectors, k=10):
# distances, indices = self.index.search(vectors, k)
# return [self.labels[i] for i in indices[0]]
# # https://github.com/erikbern/ann-benchmarks/commit/ecc56def165234fbec830fd1eed44396a1a52c49
# https://github.com/nmslib/nmslib/tree/master/python_bindings
In [4]:
# index = IVPQIndex(data["vector"], data["name"])
# index.build()
In [5]:
# movie_vector, movie_name = data['vector'][90:91], data['name'][90]
# simlar_movies_names = '\n* '.join(index.query(movie_vector))
# print(f"The most similar movies to {movie_name} are:\n* {simlar_movies_names}")
In [ ]: