Comparison of two algorithms

We will see in this notebook how we can compare the prediction accuracy of two algorithms.



In [1]:

    
from __future__ import (absolute_import, division, print_function,             
                        unicode_literals)                                      
import pickle
import os

import pandas as pd

from surprise import SVD
from surprise import KNNBasic
from surprise import Dataset                                                     
from surprise import Reader                                                      
from surprise import dump
from surprise.accuracy import rmse



In [2]:

    
# We will train and test on the u1.base and u1.test files of the movielens-100k dataset.
# if you haven't already, you need to download the movielens-100k dataset
# You can do it manually, or by running:

#Dataset.load_builtin('ml-100k')

# Now, let's load the dataset
train_file = os.path.expanduser('~') + '/.surprise_data/ml-100k/ml-100k/u1.base'
test_file = os.path.expanduser('~') + '/.surprise_data/ml-100k/ml-100k/u1.test'
data = Dataset.load_from_folds([(train_file, test_file)], Reader('ml-100k'))

                
# We'll use the well-known SVD algorithm and a basic nearest neighbors approach.
algo_svd = SVD()                                                       
algo_knn = KNNBasic()

for trainset, testset in data.folds(): 
    algo_svd.fit(trainset)                             
    predictions_svd = algo_svd.test(testset)
    
    algo_knn.fit(trainset)
    predictions_knn = algo_knn.test(testset)
    
    rmse(predictions_svd)
    rmse(predictions_knn)                                                                           
    
    dump.dump('./dump_SVD', predictions_svd, algo_svd)
    dump.dump('./dump_KNN', predictions_knn, algo_knn)









    



Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9500
RMSE: 0.9889



In [4]:

    
# The dumps have been saved and we can now use them whenever we want.

predictions_svd, algo_svd = dump.load('./dump_SVD')
predictions_knn, algo_knn = dump.load('./dump_KNN')

df_svd = pd.DataFrame(predictions_svd, columns=['uid', 'iid', 'rui', 'est', 'details'])    
df_knn = pd.DataFrame(predictions_knn, columns=['uid', 'iid', 'rui', 'est', 'details'])    

df_svd['err'] = abs(df_svd.est - df_svd.rui)
df_knn['err'] = abs(df_knn.est - df_knn.rui)

We now have two dataframes with the all the predictions for each algorithm. The cool thing is that, as both algorithm have been tested on the same testset, the indexes of the two dataframes are the same!



In [5]:

    
df_svd.head()









    Out[5]:






  
    
      
      uid
      iid
      rui
      est
      details
      err
    
  
  
    
      0
      1
      6
      5.0
      3.300098
      {'was_impossible': False}
      1.699902
    
    
      1
      1
      10
      3.0
      3.673812
      {'was_impossible': False}
      0.673812
    
    
      2
      1
      12
      5.0
      4.746471
      {'was_impossible': False}
      0.253529
    
    
      3
      1
      14
      5.0
      4.031972
      {'was_impossible': False}
      0.968028
    
    
      4
      1
      17
      3.0
      2.744526
      {'was_impossible': False}
      0.255474



In [6]:

    
df_knn.head()









    Out[6]:






  
    
      
      uid
      iid
      rui
      est
      details
      err
    
  
  
    
      0
      1
      6
      5.0
      3.468613
      {'was_impossible': False, 'actual_k': 20}
      1.531387
    
    
      1
      1
      10
      3.0
      3.866290
      {'was_impossible': False, 'actual_k': 40}
      0.866290
    
    
      2
      1
      12
      5.0
      4.538194
      {'was_impossible': False, 'actual_k': 40}
      0.461806
    
    
      3
      1
      14
      5.0
      4.235741
      {'was_impossible': False, 'actual_k': 40}
      0.764259
    
    
      4
      1
      17
      3.0
      3.228002
      {'was_impossible': False, 'actual_k': 40}
      0.228002



In [7]:

    
# Let's check how good are the KNN predictions when the SVD has a huge error:
df_knn[df_svd.err >= 3.5]









    Out[7]:






  
    
      
      uid
      iid
      rui
      est
      details
      err
    
  
  
    
      4198
      89
      221
      1.0
      3.766186
      {'was_impossible': False, 'actual_k': 40}
      2.766186
    
    
      7385
      167
      48
      1.0
      4.259556
      {'was_impossible': False, 'actual_k': 40}
      3.259556
    
    
      7390
      167
      169
      1.0
      4.664991
      {'was_impossible': False, 'actual_k': 40}
      3.664991
    
    
      13972
      295
      183
      1.0
      4.202611
      {'was_impossible': False, 'actual_k': 40}
      3.202611
    
    
      15306
      312
      265
      1.0
      4.131875
      {'was_impossible': False, 'actual_k': 40}
      3.131875
    
    
      19140
      405
      575
      5.0
      2.410506
      {'was_impossible': False, 'actual_k': 36}
      2.589494



In [8]:

    
# Well... Not much better.
# Now, let's look at the predictions of SVD on the 10 worst predictions for KNN
df_svd.iloc[df_knn.sort_values(by='err')[-10:].index]









    Out[8]:






  
    
      
      uid
      iid
      rui
      est
      details
      err
    
  
  
    
      9406
      208
      302
      1.0
      4.326863
      {'was_impossible': False}
      3.326863
    
    
      19089
      405
      169
      1.0
      3.039247
      {'was_impossible': False}
      2.039247
    
    
      19785
      436
      132
      1.0
      4.243595
      {'was_impossible': False}
      3.243595
    
    
      157
      2
      315
      1.0
      4.260016
      {'was_impossible': False}
      3.260016
    
    
      8503
      193
      56
      1.0
      4.129742
      {'was_impossible': False}
      3.129742
    
    
      5531
      113
      976
      5.0
      3.320028
      {'was_impossible': False}
      1.679972
    
    
      7917
      181
      408
      1.0
      2.962557
      {'was_impossible': False}
      1.962557
    
    
      7390
      167
      169
      1.0
      4.720954
      {'was_impossible': False}
      3.720954
    
    
      7412
      167
      1306
      5.0
      3.350088
      {'was_impossible': False}
      1.649912
    
    
      5553
      114
      1104
      5.0
      3.070772
      {'was_impossible': False}
      1.929228



In [9]:

    
# How different are the predictions from both algorithms ?
# Let's count the number of predictions for each rating value

import matplotlib.pyplot as plt
import matplotlib
%matplotlib notebook
matplotlib.style.use('ggplot')

figure, (ax1, ax2) = plt.subplots(1, 2)

df_svd.est.plot(kind='hist', title='SVD', ax=ax1)
df_knn.est.plot(kind='hist', title='KNN', ax=ax2)

# As expected, one of the drawbacks of the NN algorithms is that their predictions are often
# quite concentrated around the mean. The SVD algorithm seems more confortable predicting extreme rating values.









    














    











    Out[9]:





<matplotlib.axes._subplots.AxesSubplot at 0x7ff756a46ef0>



In [11]:

    
# Question: when a user has rated only a small number of items (less than 10), which algorithm
# gives the best predictions on average?

def get_Iu(uid):
    """Return the number of items rated by given user
    
    Args:
        uid: The raw id of the user.
    Returns:
        The number of items rated by the user.
    """
    
    try:
        return len(trainset.ur[trainset.to_inner_uid(uid)])
    except ValueError:  # user was not part of the trainset
        return 0
    
df_knn['Iu'] = df_knn.uid.apply(get_Iu)
df_svd['Iu'] = df_svd.uid.apply(get_Iu)

df_knn[df_knn.Iu < 10].err.mean(), df_svd[df_svd.Iu < 10].err.mean()









    Out[11]:





(1.0382962702232326, 1.0130235152149263)

	uid	iid	rui	est	details	err
0	1	6	5.0	3.300098	{'was_impossible': False}	1.699902
1	1	10	3.0	3.673812	{'was_impossible': False}	0.673812
2	1	12	5.0	4.746471	{'was_impossible': False}	0.253529
3	1	14	5.0	4.031972	{'was_impossible': False}	0.968028
4	1	17	3.0	2.744526	{'was_impossible': False}	0.255474

	uid	iid	rui	est	details	err
0	1	6	5.0	3.468613	{'was_impossible': False, 'actual_k': 20}	1.531387
1	1	10	3.0	3.866290	{'was_impossible': False, 'actual_k': 40}	0.866290
2	1	12	5.0	4.538194	{'was_impossible': False, 'actual_k': 40}	0.461806
3	1	14	5.0	4.235741	{'was_impossible': False, 'actual_k': 40}	0.764259
4	1	17	3.0	3.228002	{'was_impossible': False, 'actual_k': 40}	0.228002

	uid	iid	rui	est	details	err
4198	89	221	1.0	3.766186	{'was_impossible': False, 'actual_k': 40}	2.766186
7385	167	48	1.0	4.259556	{'was_impossible': False, 'actual_k': 40}	3.259556
7390	167	169	1.0	4.664991	{'was_impossible': False, 'actual_k': 40}	3.664991
13972	295	183	1.0	4.202611	{'was_impossible': False, 'actual_k': 40}	3.202611
15306	312	265	1.0	4.131875	{'was_impossible': False, 'actual_k': 40}	3.131875
19140	405	575	5.0	2.410506	{'was_impossible': False, 'actual_k': 36}	2.589494

	uid	iid	rui	est	details	err
9406	208	302	1.0	4.326863	{'was_impossible': False}	3.326863
19089	405	169	1.0	3.039247	{'was_impossible': False}	2.039247
19785	436	132	1.0	4.243595	{'was_impossible': False}	3.243595
157	2	315	1.0	4.260016	{'was_impossible': False}	3.260016
8503	193	56	1.0	4.129742	{'was_impossible': False}	3.129742
5531	113	976	5.0	3.320028	{'was_impossible': False}	1.679972
7917	181	408	1.0	2.962557	{'was_impossible': False}	1.962557
7390	167	169	1.0	4.720954	{'was_impossible': False}	3.720954
7412	167	1306	5.0	3.350088	{'was_impossible': False}	1.649912
5553	114	1104	5.0	3.070772	{'was_impossible': False}	1.929228