In [1]:
import pickle
import faiss
In [3]:
def load_data():
with open('movies.pickle', 'rb') as f:
data = pickle.load(f)
return data
data = load_data()
data
Out[3]:
In [5]:
class IVPQIndex():
def __init__(self, vectors, labels):
self.dimention = vectors.shape[1]
self.vectors = vectors.astype('float32')
self.labels = labels
def build(self, number_of_partition=8, search_in_x_partitions=2, subvector_size=8):
quantizer = faiss.IndexFlatL2(self.dimention)
self.index = faiss.IndexIVFPQ(quantizer,
self.dimention,
number_of_partition,
search_in_x_partitions,
subvector_size)
self.index.train(self.vectors)
self.index.add(self.vectors)
def query(self, vectors, k=10):
distances, indices = self.index.search(vectors, k)
return [self.labels[i] for i in indices[0]]
In [13]:
index = IVPQIndex(data["vector"], data["name"])
index.build()
In [14]:
movie_index = 90
movie_vector = data['vector'][movie_index:movie_index+1]
print(f"The most simillar movies to {data['name'][movie_index]} are:")
index.query(movie_vector)
Out[14]: