In [6]:
import pickle
import nmslib

In [7]:
def load_data():
    with open('movies.pickle', 'rb') as f:
        data = pickle.load(f)
    return data

data = load_data()
data


Out[7]:
{'name': array(['Toy Story (1995)', 'GoldenEye (1995)', 'Four Rooms (1995)', ...,
        'Sliding Doors (1998)', 'You So Crazy (1994)',
        'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object),
 'vector': array([[-0.01780608, -0.14265831,  0.10308606, ...,  0.09659795,
         -0.17529577, -0.03061521],
        [-0.03357764,  0.16418771,  0.21801303, ...,  0.16502103,
         -0.09166156,  0.05047869],
        [-0.2761452 , -0.01991325, -0.04969981, ...,  0.0258275 ,
         -0.08328608, -0.0152858 ],
        ...,
        [ 0.05142734, -0.01683608, -0.20441587, ...,  0.00045828,
          0.14679626,  0.2462584 ],
        [ 0.04491899, -0.02819411, -0.09472758, ..., -0.02152078,
          0.16223577,  0.19897607],
        [ 0.02531924,  0.03099714,  0.06437534, ..., -0.07260127,
          0.0467432 ,  0.07893164]], dtype=float32)}

In [17]:
class NMSLIBIndex():
    def __init__(self, vectors, labels):
        self.dimention = vectors.shape[1]
        self.vectors = vectors.astype('float32')
        self.labels = labels

    def build(self):
        self.index = nmslib.init(method='hnsw', space='cosinesimil')
        self.index.addDataPointBatch(self.vectors)
        self.index.createIndex({'post': 2})
        
    def query(self, vector, k=10):
        indices = self.index.knnQuery(vector, k=k)
        return [self.labels[i] for i in indices[0]]

In [18]:
index = NMSLIBIndex(data["vector"], data["name"])
index.build()

In [20]:
movie_vector, movie_name = data['vector'][90], data['name'][90]
simlar_movies_names = '\n* '.join(index.query(movie_vector))
print(f"The most similar movies to {movie_name} are:\n* {simlar_movies_names}")


The most similar movies to Nightmare Before Christmas, The (1993) are:
* Nightmare Before Christmas, The (1993)
* Beauty and the Beast (1991)
* Fantasia (1940)
* Heavy Metal (1981)
* Aladdin (1992)
* Snow White and the Seven Dwarfs (1937)
* Batman (1989)
* James and the Giant Peach (1996)
* Blade Runner (1982)
* Aliens (1986)