In [1]:
def create_R(ratingsstr = "../data/ml-latest-small/ratings.csv"):
    
    import numpy as np
    import pandas as pd
    
    ratings = pd.read_csv(ratingsstr)
    
    uniqueRatings = np.unique(ratings['movieId'])
    ratings['TrueMovieId'] = ratings['movieId'].map(lambda i: np.argmin(abs(uniqueRatings - i)))
    R = np.zeros([len(np.unique(ratings['userId'])),len(uniqueRatings)])
    R_dict = {"Users": np.empty([0]), "Movies": np.empty([0]), "Ratings": np.empty([0])}
    
    ratingsnp = np.asarray(ratings)
    
    for i in range(len(ratings)):
        R[ratingsnp[i,0]-1, ratingsnp[i,-1]] = ratingsnp[i,2]
        R_dict["Users"] = np.append(R_dict["Users"],ratingsnp[i,0]-1)
        R_dict["Movies"] = np.append(R_dict["Movies"],ratingsnp[i,-1])
        R_dict["Ratings"] = np.append(R_dict["Ratings"],ratingsnp[i,2])
    
    return R, R_dict

In [2]:
R, rd = create_R()


0           30
1          833
2          859
3          906
4          931
5         1017
6         1041
7         1047
8         1083
9         1087
10        1111
11        1140
12        1515
13        1665
14        1708
15        1743
16        1815
17        1962
18        2380
19        2925
20           9
21          16
22          37
23          45
24          48
25          49
26          58
27         100
28         123
29         129
          ... 
99974     3233
99975     3419
99976     3420
99977     3800
99978     3803
99979     3810
99980     3845
99981     3854
99982     3869
99983     3871
99984     3882
99985     3999
99986     4051
99987     4081
99988     4098
99989     4135
99990     4147
99991     4255
99992     4321
99993     4367
99994     4391
99995     4412
99996     4413
99997     4417
99998     4507
99999     4545
100000    4546
100001    4597
100002    4610
100003    4696
Name: TrueMovieId, dtype: int64
/home/marc/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:18: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future

In [4]:
R.shape


Out[4]:
(671, 9125)

In [ ]: