In [1]:
from tensorglue.engine import RecommenderData
from tensorglue.tools.movielens import get_movielens_data
from tensorglue.tools.printing import print_frames

Get data


In [2]:
ml_data, ml_genres = get_movielens_data(get_genres=True)
#if you  have local copy of the movielens data you can use
#get_movielens_data(local_file="full_path_to_local_file", get_genres=True)

In [3]:
print_frames((ml_data.head(10), ml_genres.head(10)))


Out[3]:
userid movieid rating
0 1 122 5
1 1 185 5
2 1 231 5
3 1 292 5
4 1 316 5
5 1 329 5
6 1 355 5
7 1 356 5
8 1 362 5
9 1 364 5
movieid movienm genreid
0 1 Toy Story (1995) Adventure
1 1 Toy Story (1995) Animation
2 1 Toy Story (1995) Children
3 1 Toy Story (1995) Comedy
4 1 Toy Story (1995) Fantasy
5 2 Jumanji (1995) Adventure
6 2 Jumanji (1995) Children
7 2 Jumanji (1995) Fantasy
8 3 Grumpier Old Men (1995) Comedy
9 3 Grumpier Old Men (1995) Romance

Models with no context

PureSVD


In [4]:
simple_model = RecommenderData(ml_data, 'userid', 'movieid', 'rating')

In [5]:
simple_model._prepare_data(eval_num=3)


Unseen items found in the test set. Dropping...

In [6]:
simple_model.train_model('svd')

In [7]:
simple_model.evaluate()


Out[7]:
8284

Item-to-item


In [8]:
simple_model.train_model('i2i')

In [9]:
simple_model.evaluate()


Out[9]:
8882

Models with context

Contextualized SVD


In [8]:
context_model = RecommenderData(ml_data, 'userid', 'movieid', 'rating',
                                  context_data=ml_genres.drop('movienm', 1))

In [9]:
context_model.arrange_by = context_model.fields.values

In [10]:
context_model._prepare_data(eval_num=3)


Unseen items found in the test set. Dropping...
Summarizing contextual values

In [11]:
context_model.train_model('svd')

In [12]:
context_model.evaluate()


Out[12]:
11419

Tensor Factorization


In [13]:
context_model.arrange_by = context_model.fields.contextid

In [14]:
context_model._prepare_data(eval_num=3)


Unseen items found in the test set. Dropping...
Maximizing contextual values

In [15]:
context_model.train_model('tensor')


Step 1 of 25
growth of the core: 1.000000
Step 2 of 25
growth of the core: 0.121917
Step 3 of 25
growth of the core: 0.005939
Step 4 of 25
growth of the core: 0.000797
Core is no longer growing.
Done

In [16]:
context_model.evaluate()


Out[16]:
13014