notebook.community

Edit and run



In [14]:

    
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import numpy as np
import pandas as pd
import scipy
import csv

matplotlib.style.use('ggplot')
pd.options.display.float_format = '{:20,.2f}'.format
pd.set_option('display.max_columns', 50)



In [4]:

    
test_rating = pd.io.parsers.read_csv('raw/rating_test.csv')



In [5]:

    
content = pd.io.parsers.read_csv('content_recommendations.csv')
content['recs'] = content['recs'].apply(lambda s: map(lambda _s: _s.replace("'","").strip(), s[1:-1].split(',')) )



In [81]:

    
content_no_rating = pd.io.parsers.read_csv('content_recommendations_no_rating.csv')
content_no_rating['recs'] = content_no_rating['recs'].apply(lambda s: map(lambda _s: _s.replace("'","").strip(), s[1:-1].split(',')) )



In [6]:

    
collaborative = pd.io.parsers.read_csv('collaborative_recommendations.csv')
collaborative['recs'] = collaborative['recs'].apply(lambda s: map(lambda _s: _s.replace("'","").strip(), s[1:-1].split(',')) )
collaborative['size'] = collaborative['recs'].apply(lambda l: len(l))
collaborative = collaborative.loc[collaborative['size'] == 10]



In [117]:

    
user_ids = list(collaborative['user_id'].unique())
user_results = pd.DataFrame()

for user_id in user_ids:
    # Get the results of the collaborative filtering recommendations
    recommendations_cf = collaborative[collaborative['user_id']==user_id]['recs'].tolist()[0]
    recommendations_cf = set(map(lambda x: int(x), recommendations_cf))
    
    # print(recommendations_cf)
    # Get the results of the content based recommendations
    recommendations_cb = content[content['user_id']==user_id]['recs'].tolist()[0]
    recommendations_cb = set(map(lambda x: int(x), recommendations_cb))
    
    recommendations_cb_no_rating = content_no_rating[content_no_rating['user_id']==user_id]['recs'].tolist()[0]
    recommendations_cb_no_rating = set(map(lambda x: int(x), recommendations_cb_no_rating))
    
    # Get the test sample for the user
    watched_animes = test_rating['anime_id'][test_rating['user_id'] == user_id].astype(int).tolist()
    watched_animes = set(watched_animes)
    
    
    union = recommendations_cf.union(recommendations_cb)
    inter = recommendations_cf.intersection(recommendations_cb)
    
    # Get the denominator for the collaborative filtering and content based recommendations
    if len(watched_animes) > 10:
        denom = 10
    else:
        denom = len(watched_animes)
    
    if user_id == 32:
        print watched_animes
        print sorted(recommendations_cb)
        print sorted(recommendations_cf)
    
    # Get the 4 results for each user: number of matches for cf, cb, union and intersection
    res_cf = len(watched_animes.intersection(recommendations_cf)) #* 1.0 / denom
    res_cb = len(recommendations_cb.intersection(watched_animes)) #* 1.0 / denom
    res_cb_no_rating = len(recommendations_cb_no_rating.intersection(watched_animes)) #* 1.0 / denom
    res_union = len(union.intersection(watched_animes)) * 1.0 / min(len(union), len(watched_animes))
    if min(len(inter), len(watched_animes)) != 0:  
        res_intersection = len(inter.intersection(watched_animes)) * 1.0 / min(len(inter), len(watched_animes))
    else: 
        res_intersection = 0.0
        
    user_results = user_results.append({
        'user_id': user_id, 
        'res_cb': res_cb, 
        'res_cb_no_rating': res_cb_no_rating,  
        'res_cf':res_cf, 
        'res_union':res_union, 
        'res_inter':res_intersection, 
        'denom':denom, 
        'watched_animes': len(watched_animes)
    }, ignore_index=True)









    



set([120, 249, 317, 1535])
[249, 2707, 4997, 5231, 12175, 15059, 15783, 20045, 21595, 31289]
[121, 164, 199, 430, 512, 513, 528, 1117, 1118, 1943]



In [109]:

    
test_rating['anime_id'][test_rating['user_id'] == 377]









    Out[109]:





[11013, 11111]



In [114]:

    
user_results.loc[user_results['res_union'] > 0.3]









    Out[114]:






  
    
      
      user_id
      res_cb
      res_cf
      res_union
      denom
      res_cb_no_rating
      res_inter
      watched_animes
    
  
  
    
      29
      87.00
      0.00
      4.00
      0.33
      10.00
      0.00
      0.00
      12.00
    
    
      70
      210.00
      0.00
      7.00
      0.35
      10.00
      0.00
      0.00
      75.00
    
    
      78
      240.00
      0.00
      3.00
      0.33
      9.00
      0.00
      0.00
      9.00
    
    
      93
      282.00
      0.00
      7.00
      0.35
      10.00
      0.00
      0.00
      53.00
    
    
      98
      300.00
      1.00
      6.00
      0.35
      10.00
      1.00
      0.00
      22.00
    
    
      108
      321.00
      0.00
      7.00
      0.35
      10.00
      0.00
      0.00
      58.00
    
    
      134
      376.00
      0.00
      2.00
      0.67
      3.00
      0.00
      0.00
      3.00
    
    
      135
      377.00
      0.00
      2.00
      1.00
      2.00
      0.00
      0.00
      2.00
    
    
      142
      391.00
      0.00
      2.00
      0.50
      4.00
      0.00
      0.00
      4.00
    
    
      150
      410.00
      0.00
      2.00
      0.40
      5.00
      0.00
      0.00
      5.00
    
    
      160
      428.00
      0.00
      7.00
      0.35
      10.00
      0.00
      0.00
      48.00
    
    
      175
      475.00
      0.00
      8.00
      0.40
      10.00
      0.00
      0.00
      26.00
    
    
      179
      487.00
      0.00
      2.00
      0.50
      4.00
      0.00
      0.00
      4.00
    
    
      192
      513.00
      0.00
      2.00
      0.50
      4.00
      0.00
      0.00
      4.00
    
    
      223
      593.00
      0.00
      7.00
      0.35
      10.00
      0.00
      0.00
      50.00
    
    
      257
      660.00
      0.00
      8.00
      0.40
      10.00
      0.00
      0.00
      56.00
    
    
      272
      701.00
      0.00
      3.00
      0.33
      9.00
      0.00
      0.00
      9.00
    
    
      288
      748.00
      1.00
      4.00
      0.31
      10.00
      1.00
      1.00
      13.00
    
    
      291
      755.00
      0.00
      2.00
      0.33
      6.00
      0.00
      0.00
      6.00
    
    
      304
      786.00
      1.00
      8.00
      0.45
      10.00
      1.00
      0.00
      100.00
    
    
      314
      809.00
      0.00
      1.00
      0.33
      3.00
      0.00
      0.00
      3.00
    
    
      323
      836.00
      0.00
      1.00
      0.33
      3.00
      0.00
      0.00
      3.00
    
    
      336
      866.00
      0.00
      3.00
      0.33
      9.00
      0.00
      0.00
      9.00
    
    
      354
      917.00
      0.00
      7.00
      0.35
      10.00
      0.00
      0.00
      101.00
    
    
      397
      1,019.00
      0.00
      8.00
      0.40
      10.00
      0.00
      0.00
      115.00
    
    
      399
      1,027.00
      0.00
      10.00
      0.50
      10.00
      0.00
      0.00
      99.00
    
    
      424
      1,074.00
      0.00
      7.00
      0.35
      10.00
      0.00
      0.00
      55.00
    
    
      433
      1,103.00
      0.00
      7.00
      0.35
      10.00
      0.00
      0.00
      97.00
    
    
      443
      1,123.00
      0.00
      3.00
      0.38
      8.00
      0.00
      0.00
      8.00
    
    
      460
      1,176.00
      0.00
      7.00
      0.35
      10.00
      0.00
      0.00
      122.00
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      536
      1,336.00
      0.00
      3.00
      0.60
      5.00
      0.00
      0.00
      5.00
    
    
      538
      1,338.00
      0.00
      7.00
      0.35
      10.00
      0.00
      0.00
      117.00
    
    
      541
      1,342.00
      0.00
      1.00
      0.33
      3.00
      0.00
      0.00
      3.00
    
    
      569
      1,417.00
      0.00
      2.00
      0.50
      4.00
      0.00
      0.00
      4.00
    
    
      572
      1,424.00
      0.00
      1.00
      0.33
      3.00
      0.00
      0.00
      3.00
    
    
      581
      1,439.00
      0.00
      1.00
      0.33
      3.00
      0.00
      0.00
      3.00
    
    
      585
      1,450.00
      0.00
      7.00
      0.35
      10.00
      0.00
      0.00
      78.00
    
    
      593
      1,474.00
      0.00
      3.00
      0.33
      9.00
      0.00
      0.00
      9.00
    
    
      614
      1,522.00
      1.00
      6.00
      0.32
      10.00
      1.00
      1.00
      128.00
    
    
      637
      1,589.00
      1.00
      7.00
      0.40
      10.00
      1.00
      0.00
      54.00
    
    
      651
      1,610.00
      0.00
      2.00
      0.67
      3.00
      0.00
      0.00
      3.00
    
    
      682
      1,676.00
      0.00
      3.00
      0.38
      8.00
      0.00
      0.00
      8.00
    
    
      683
      1,678.00
      0.00
      2.00
      0.33
      6.00
      0.00
      0.00
      6.00
    
    
      694
      1,705.00
      0.00
      8.00
      0.40
      10.00
      0.00
      0.00
      51.00
    
    
      713
      1,757.00
      0.00
      4.00
      0.31
      10.00
      0.00
      0.00
      13.00
    
    
      758
      1,870.00
      0.00
      8.00
      0.40
      10.00
      0.00
      0.00
      124.00
    
    
      764
      1,882.00
      0.00
      2.00
      0.40
      5.00
      0.00
      0.00
      5.00
    
    
      771
      1,902.00
      0.00
      2.00
      0.40
      5.00
      0.00
      0.00
      5.00
    
    
      782
      1,927.00
      0.00
      3.00
      0.33
      9.00
      0.00
      0.00
      9.00
    
    
      822
      2,012.00
      1.00
      6.00
      0.35
      10.00
      0.00
      0.00
      39.00
    
    
      846
      2,076.00
      0.00
      4.00
      0.50
      8.00
      0.00
      0.00
      8.00
    
    
      854
      2,095.00
      0.00
      4.00
      0.40
      10.00
      0.00
      0.00
      10.00
    
    
      865
      2,128.00
      0.00
      4.00
      0.44
      9.00
      0.00
      0.00
      9.00
    
    
      876
      2,164.00
      1.00
      3.00
      0.40
      10.00
      1.00
      0.00
      10.00
    
    
      883
      2,187.00
      0.00
      2.00
      0.40
      5.00
      0.00
      0.00
      5.00
    
    
      902
      2,226.00
      0.00
      4.00
      0.36
      10.00
      0.00
      0.00
      11.00
    
    
      912
      2,256.00
      1.00
      6.00
      0.35
      10.00
      1.00
      0.00
      62.00
    
    
      922
      2,286.00
      0.00
      3.00
      0.33
      9.00
      0.00
      0.00
      9.00
    
    
      938
      2,325.00
      0.00
      9.00
      0.45
      10.00
      0.00
      0.00
      81.00
    
    
      945
      2,344.00
      0.00
      5.00
      0.38
      10.00
      0.00
      0.00
      13.00
    
  

68 rows × 8 columns



In [57]:

    
user_results['res_union'].sum() / len(user_results['res_union'])









    Out[57]:





0.14005513690721089



In [125]:

    
user_results.describe()









    Out[125]:






  
    
      
      denom
      res_cb
      res_cb_no_rating
      res_cf
      res_inter
      res_union
      user_id
      watched_animes
    
  
  
    
      count
      955.00
      955.00
      955.00
      955.00
      955.00
      955.00
      955.00
      955.00
    
    
      mean
      9.36
      0.07
      0.04
      2.12
      0.01
      0.14
      1,197.75
      27.01
    
    
      std
      1.65
      0.27
      0.22
      1.76
      0.09
      0.11
      666.22
      22.14
    
    
      min
      2.00
      0.00
      0.00
      0.00
      0.00
      0.00
      5.00
      2.00
    
    
      25%
      10.00
      0.00
      0.00
      1.00
      0.00
      0.05
      629.00
      12.00
    
    
      50%
      10.00
      0.00
      0.00
      2.00
      0.00
      0.12
      1,210.00
      20.00
    
    
      75%
      10.00
      0.00
      0.00
      3.00
      0.00
      0.20
      1,760.00
      35.00
    
    
      max
      10.00
      3.00
      3.00
      10.00
      1.00
      1.00
      2,368.00
      129.00



In [134]:

    
data=user_results['res_cf'].sort_values().plot(kind="hist", bins=10, label="Collaborative")
#y,binEdges=np.histogram(data,bins=10)
#bincenters = 0.5*(binEdges[1:]+binEdges[:-1])
#plt.plot(bincenters,y,'-')

data=user_results['res_cb'].sort_values().plot(kind="hist", bins=10, label="Content", figsize=(10,7))
#y,binEdges=np.histogram(data,bins=10)
#bincenters = 0.5*(binEdges[1:]+binEdges[:-1])
#plt.plot(bincenters,y,'-')

plt.xlabel('number of accurate recommendations')
plt.ylabel('number of users')
plt.legend()
plt.xticks(np.arange(0,10))
plt.show()



In [141]:

    
data=user_results['res_union'].sort_values()
y,binEdges=np.histogram(data,bins=10)
bincenters = 0.5*(binEdges[1:]+binEdges[:-1])
plt.figure(figsize=(10,7))
plt.plot(bincenters,y,'-')

plt.xlabel('recommendation accuracy')
plt.ylabel('number of users')
plt.show()

	user_id	res_cb	res_cf	res_union	denom	res_cb_no_rating	res_inter	watched_animes
29	87.00	0.00	4.00	0.33	10.00	0.00	0.00	12.00
70	210.00	0.00	7.00	0.35	10.00	0.00	0.00	75.00
78	240.00	0.00	3.00	0.33	9.00	0.00	0.00	9.00
93	282.00	0.00	7.00	0.35	10.00	0.00	0.00	53.00
98	300.00	1.00	6.00	0.35	10.00	1.00	0.00	22.00
108	321.00	0.00	7.00	0.35	10.00	0.00	0.00	58.00
134	376.00	0.00	2.00	0.67	3.00	0.00	0.00	3.00
135	377.00	0.00	2.00	1.00	2.00	0.00	0.00	2.00
142	391.00	0.00	2.00	0.50	4.00	0.00	0.00	4.00
150	410.00	0.00	2.00	0.40	5.00	0.00	0.00	5.00
160	428.00	0.00	7.00	0.35	10.00	0.00	0.00	48.00
175	475.00	0.00	8.00	0.40	10.00	0.00	0.00	26.00
179	487.00	0.00	2.00	0.50	4.00	0.00	0.00	4.00
192	513.00	0.00	2.00	0.50	4.00	0.00	0.00	4.00
223	593.00	0.00	7.00	0.35	10.00	0.00	0.00	50.00
257	660.00	0.00	8.00	0.40	10.00	0.00	0.00	56.00
272	701.00	0.00	3.00	0.33	9.00	0.00	0.00	9.00
288	748.00	1.00	4.00	0.31	10.00	1.00	1.00	13.00
291	755.00	0.00	2.00	0.33	6.00	0.00	0.00	6.00
304	786.00	1.00	8.00	0.45	10.00	1.00	0.00	100.00
314	809.00	0.00	1.00	0.33	3.00	0.00	0.00	3.00
323	836.00	0.00	1.00	0.33	3.00	0.00	0.00	3.00
336	866.00	0.00	3.00	0.33	9.00	0.00	0.00	9.00
354	917.00	0.00	7.00	0.35	10.00	0.00	0.00	101.00
397	1,019.00	0.00	8.00	0.40	10.00	0.00	0.00	115.00
399	1,027.00	0.00	10.00	0.50	10.00	0.00	0.00	99.00
424	1,074.00	0.00	7.00	0.35	10.00	0.00	0.00	55.00
433	1,103.00	0.00	7.00	0.35	10.00	0.00	0.00	97.00
443	1,123.00	0.00	3.00	0.38	8.00	0.00	0.00	8.00
460	1,176.00	0.00	7.00	0.35	10.00	0.00	0.00	122.00
...	...	...	...	...	...	...	...	...
536	1,336.00	0.00	3.00	0.60	5.00	0.00	0.00	5.00
538	1,338.00	0.00	7.00	0.35	10.00	0.00	0.00	117.00
541	1,342.00	0.00	1.00	0.33	3.00	0.00	0.00	3.00
569	1,417.00	0.00	2.00	0.50	4.00	0.00	0.00	4.00
572	1,424.00	0.00	1.00	0.33	3.00	0.00	0.00	3.00
581	1,439.00	0.00	1.00	0.33	3.00	0.00	0.00	3.00
585	1,450.00	0.00	7.00	0.35	10.00	0.00	0.00	78.00
593	1,474.00	0.00	3.00	0.33	9.00	0.00	0.00	9.00
614	1,522.00	1.00	6.00	0.32	10.00	1.00	1.00	128.00
637	1,589.00	1.00	7.00	0.40	10.00	1.00	0.00	54.00
651	1,610.00	0.00	2.00	0.67	3.00	0.00	0.00	3.00
682	1,676.00	0.00	3.00	0.38	8.00	0.00	0.00	8.00
683	1,678.00	0.00	2.00	0.33	6.00	0.00	0.00	6.00
694	1,705.00	0.00	8.00	0.40	10.00	0.00	0.00	51.00
713	1,757.00	0.00	4.00	0.31	10.00	0.00	0.00	13.00
758	1,870.00	0.00	8.00	0.40	10.00	0.00	0.00	124.00
764	1,882.00	0.00	2.00	0.40	5.00	0.00	0.00	5.00
771	1,902.00	0.00	2.00	0.40	5.00	0.00	0.00	5.00
782	1,927.00	0.00	3.00	0.33	9.00	0.00	0.00	9.00
822	2,012.00	1.00	6.00	0.35	10.00	0.00	0.00	39.00
846	2,076.00	0.00	4.00	0.50	8.00	0.00	0.00	8.00
854	2,095.00	0.00	4.00	0.40	10.00	0.00	0.00	10.00
865	2,128.00	0.00	4.00	0.44	9.00	0.00	0.00	9.00
876	2,164.00	1.00	3.00	0.40	10.00	1.00	0.00	10.00
883	2,187.00	0.00	2.00	0.40	5.00	0.00	0.00	5.00
902	2,226.00	0.00	4.00	0.36	10.00	0.00	0.00	11.00
912	2,256.00	1.00	6.00	0.35	10.00	1.00	0.00	62.00
922	2,286.00	0.00	3.00	0.33	9.00	0.00	0.00	9.00
938	2,325.00	0.00	9.00	0.45	10.00	0.00	0.00	81.00
945	2,344.00	0.00	5.00	0.38	10.00	0.00	0.00	13.00

	denom	res_cb	res_cb_no_rating	res_cf	res_inter	res_union	user_id	watched_animes
count	955.00	955.00	955.00	955.00	955.00	955.00	955.00	955.00
mean	9.36	0.07	0.04	2.12	0.01	0.14	1,197.75	27.01
std	1.65	0.27	0.22	1.76	0.09	0.11	666.22	22.14
min	2.00	0.00	0.00	0.00	0.00	0.00	5.00	2.00
25%	10.00	0.00	0.00	1.00	0.00	0.05	629.00	12.00
50%	10.00	0.00	0.00	2.00	0.00	0.12	1,210.00	20.00
75%	10.00	0.00	0.00	3.00	0.00	0.20	1,760.00	35.00
max	10.00	3.00	3.00	10.00	1.00	1.00	2,368.00	129.00