BUILDING A RECOMMENDER SYSTEM ON USER-USER COLLABORATIVE FILTERING (MOVIELENS DATASET)

We will load the data sets firsts.



In [17]:

    
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import math

#column headers for the dataset
data_cols = ['user id','movie id','rating','timestamp']
item_cols = ['movie id','movie title','release date','video release date','IMDb URL','unknown','Action',
'Adventure','Animation','Childrens','Comedy','Crime','Documentary','Drama','Fantasy','Film-Noir','Horror',
'Musical','Mystery','Romance ','Sci-Fi','Thriller','War' ,'Western']
user_cols = ['user id','age','gender','occupation','zip code']

#importing the data files onto dataframes
data_df = pd.read_csv('ml-100k/u.data', sep='\t', names=data_cols, encoding='latin-1')
item_df = pd.read_csv('ml-100k/u.item', sep='|', names=item_cols, encoding='latin-1')
user_df = pd.read_csv('ml-100k/u.user', sep='|', names=user_cols, encoding='latin-1')

#dropping unecessary columns
#Voting Timestamp - Removed
data_df.drop(data_df.columns[[3]], axis = 1, inplace = True)
#Movie Title, Video Release Date and IMDB URL - Removed
item_df.drop(item_df.columns[[1,3,4]], axis = 1, inplace = True)
#Occupation and Zip Code - Removed
user_df.drop(user_df.columns[[3,4]], axis = 1, inplace = True)



In [18]:

    
print(data_df.head())









    



   user id  movie id  rating
0      196       242       3
1      186       302       3
2       22       377       1
3      244        51       2
4      166       346       1



In [19]:

    
print(item_df.head())









    



   movie id release date  unknown  Action  Adventure  Animation  Childrens  \
0         1  01-Jan-1995        0       0          0          1          1   
1         2  01-Jan-1995        0       1          1          0          0   
2         3  01-Jan-1995        0       0          0          0          0   
3         4  01-Jan-1995        0       1          0          0          0   
4         5  01-Jan-1995        0       0          0          0          0   

   Comedy  Crime  Documentary   ...     Fantasy  Film-Noir  Horror  Musical  \
0       1      0            0   ...           0          0       0        0   
1       0      0            0   ...           0          0       0        0   
2       0      0            0   ...           0          0       0        0   
3       1      0            0   ...           0          0       0        0   
4       0      1            0   ...           0          0       0        0   

   Mystery  Romance   Sci-Fi  Thriller  War  Western  
0        0         0       0         0    0        0  
1        0         0       0         1    0        0  
2        0         0       0         1    0        0  
3        0         0       0         0    0        0  
4        0         0       0         1    0        0  

[5 rows x 21 columns]



In [20]:

    
#Ajust release date to get only the year
item_df['release date'] = pd.to_datetime(item_df['release date'], errors='coerce').dt.year



In [21]:

    
print(item_df.head())









    



   movie id  release date  unknown  Action  Adventure  Animation  Childrens  \
0         1        1995.0        0       0          0          1          1   
1         2        1995.0        0       1          1          0          0   
2         3        1995.0        0       0          0          0          0   
3         4        1995.0        0       1          0          0          0   
4         5        1995.0        0       0          0          0          0   

   Comedy  Crime  Documentary   ...     Fantasy  Film-Noir  Horror  Musical  \
0       1      0            0   ...           0          0       0        0   
1       0      0            0   ...           0          0       0        0   
2       0      0            0   ...           0          0       0        0   
3       1      0            0   ...           0          0       0        0   
4       0      1            0   ...           0          0       0        0   

   Mystery  Romance   Sci-Fi  Thriller  War  Western  
0        0         0       0         0    0        0  
1        0         0       0         1    0        0  
2        0         0       0         1    0        0  
3        0         0       0         0    0        0  
4        0         0       0         1    0        0  

[5 rows x 21 columns]



In [22]:

    
print(user_df.head())









    



   user id  age gender
0        1   24      M
1        2   53      F
2        3   23      M
3        4   24      M
4        5   33      F



In [23]:

    
#Convert Gender column to numeric
user_df['gender'].replace('F', 1,inplace=True)
user_df['gender'].replace('M', 2,inplace=True)



In [24]:

    
#Adjust columns replacing NaN with the mean
meanYear = int(round(item_df['release date'].mean()))
print(meanYear)



In [25]:

    
item_df['release date'] = item_df['release date'].fillna(meanYear)



In [26]:

    
print(item_df['release date'].hasnans)









    



False



In [27]:

    
#merge it all
data_item = pd.merge(data_df, item_df, left_on = "movie id", right_on = "movie id")
data_item_user = pd.merge(data_item, user_df, left_on = "user id", right_on = "user id")
dataset = data_item_user



In [28]:

    
print(dataset.head())









    



   user id  movie id  rating  release date  unknown  Action  Adventure  \
0      196       242       3        1997.0        0       0          0   
1      196       257       2        1997.0        0       1          1   
2      196       111       4        1996.0        0       0          0   
3      196        25       4        1996.0        0       0          0   
4      196       382       4        1994.0        0       0          0   

   Animation  Childrens  Comedy   ...    Horror  Musical  Mystery  Romance   \
0          0          0       1   ...         0        0        0         0   
1          0          0       1   ...         0        0        0         0   
2          0          0       1   ...         0        0        0         1   
3          0          0       1   ...         0        0        0         0   
4          0          0       1   ...         0        0        0         0   

   Sci-Fi  Thriller  War  Western  age  gender  
0       0         0    0        0   49       2  
1       1         0    0        0   49       2  
2       0         0    0        0   49       2  
3       0         0    0        0   49       2  
4       0         0    0        0   49       2  

[5 rows x 25 columns]



In [29]:

    
# Data distribution
display(dataset.describe())









    







  
    
      
      user id
      movie id
      rating
      release date
      unknown
      Action
      Adventure
      Animation
      Childrens
      Comedy
      ...
      Horror
      Musical
      Mystery
      Romance
      Sci-Fi
      Thriller
      War
      Western
      age
      gender
    
  
  
    
      count
      100000.00000
      100000.000000
      100000.000000
      100000.000000
      100000.0000
      100000.000000
      100000.000000
      100000.000000
      100000.000000
      100000.000000
      ...
      100000.000000
      100000.000000
      100000.000000
      100000.000000
      100000.00000
      100000.00000
      100000.000000
      100000.000000
      100000.000000
      100000.000000
    
    
      mean
      462.48475
      425.530130
      3.529860
      1987.956310
      0.0001
      0.255890
      0.137530
      0.036050
      0.071820
      0.298320
      ...
      0.053170
      0.049540
      0.052450
      0.194610
      0.12730
      0.21872
      0.093980
      0.018540
      32.969850
      1.742600
    
    
      std
      266.61442
      330.798356
      1.125674
      14.154889
      0.0100
      0.436362
      0.344408
      0.186416
      0.258191
      0.457523
      ...
      0.224373
      0.216994
      0.222934
      0.395902
      0.33331
      0.41338
      0.291802
      0.134894
      11.562623
      0.437204
    
    
      min
      1.00000
      1.000000
      1.000000
      1922.000000
      0.0000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.000000
      0.000000
      0.00000
      0.00000
      0.000000
      0.000000
      7.000000
      1.000000
    
    
      25%
      254.00000
      175.000000
      3.000000
      1986.000000
      0.0000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.000000
      0.000000
      0.00000
      0.00000
      0.000000
      0.000000
      24.000000
      1.000000
    
    
      50%
      447.00000
      322.000000
      4.000000
      1994.000000
      0.0000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.000000
      0.000000
      0.00000
      0.00000
      0.000000
      0.000000
      30.000000
      2.000000
    
    
      75%
      682.00000
      631.000000
      4.000000
      1996.000000
      0.0000
      1.000000
      0.000000
      0.000000
      0.000000
      1.000000
      ...
      0.000000
      0.000000
      0.000000
      0.000000
      0.00000
      0.00000
      0.000000
      0.000000
      40.000000
      2.000000
    
    
      max
      943.00000
      1682.000000
      5.000000
      1998.000000
      1.0000
      1.000000
      1.000000
      1.000000
      1.000000
      1.000000
      ...
      1.000000
      1.000000
      1.000000
      1.000000
      1.00000
      1.00000
      1.000000
      1.000000
      73.000000
      2.000000
    
  

8 rows × 25 columns



In [30]:

    
# Show the current Dataset Structure
from IPython.display import display
display(dataset)









    







  
    
      
      user id
      movie id
      rating
      release date
      unknown
      Action
      Adventure
      Animation
      Childrens
      Comedy
      ...
      Horror
      Musical
      Mystery
      Romance
      Sci-Fi
      Thriller
      War
      Western
      age
      gender
    
  
  
    
      0
      196
      242
      3
      1997.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      1
      196
      257
      2
      1997.0
      0
      1
      1
      0
      0
      1
      ...
      0
      0
      0
      0
      1
      0
      0
      0
      49
      2
    
    
      2
      196
      111
      4
      1996.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      49
      2
    
    
      3
      196
      25
      4
      1996.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      4
      196
      382
      4
      1994.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      5
      196
      202
      3
      1993.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      49
      2
    
    
      6
      196
      153
      5
      1988.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      7
      196
      286
      5
      1996.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      0
      1
      0
      49
      2
    
    
      8
      196
      66
      3
      1995.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      49
      2
    
    
      9
      196
      845
      4
      1996.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      10
      196
      173
      2
      1987.0
      0
      1
      1
      0
      0
      1
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      49
      2
    
    
      11
      196
      238
      4
      1987.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      12
      196
      94
      3
      1990.0
      0
      0
      0
      0
      1
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      13
      196
      762
      3
      1996.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      14
      196
      381
      4
      1994.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      49
      2
    
    
      15
      196
      306
      4
      1997.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      49
      2
    
    
      16
      196
      8
      5
      1995.0
      0
      0
      0
      0
      1
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      17
      196
      70
      3
      1994.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      49
      2
    
    
      18
      196
      655
      5
      1986.0
      0
      0
      1
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      19
      196
      13
      2
      1995.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      20
      196
      692
      5
      1995.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      49
      2
    
    
      21
      196
      1022
      4
      1997.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      22
      196
      287
      3
      1996.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      23
      196
      269
      3
      1997.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      24
      196
      285
      5
      1996.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      25
      196
      110
      1
      1995.0
      0
      1
      1
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      1
      0
      49
      2
    
    
      26
      196
      251
      3
      1997.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      27
      196
      393
      4
      1993.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      28
      196
      663
      5
      1979.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      49
      2
    
    
      29
      196
      580
      2
      1995.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      49
      2
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      99970
      598
      898
      4
      1997.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      40
      1
    
    
      99971
      598
      243
      2
      1997.0
      0
      0
      0
      0
      1
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      40
      1
    
    
      99972
      598
      308
      4
      1997.0
      0
      0
      0
      0
      1
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      40
      1
    
    
      99973
      598
      312
      5
      1997.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      1
      0
      0
      0
      0
      0
      40
      1
    
    
      99974
      598
      313
      5
      1997.0
      0
      1
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      40
      1
    
    
      99975
      598
      260
      3
      1997.0
      0
      1
      0
      0
      0
      0
      ...
      0
      0
      1
      0
      1
      1
      0
      0
      40
      1
    
    
      99976
      598
      895
      2
      1997.0
      0
      0
      0
      0
      0
      0
      ...
      1
      0
      0
      0
      0
      1
      0
      0
      40
      1
    
    
      99977
      598
      691
      2
      1998.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      1
      1
      0
      0
      40
      1
    
    
      99978
      598
      349
      4
      1998.0
      0
      1
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      1
      0
      0
      40
      1
    
    
      99979
      598
      538
      4
      1997.0
      0
      0
      0
      1
      1
      0
      ...
      0
      1
      0
      0
      0
      0
      0
      0
      40
      1
    
    
      99980
      873
      294
      4
      1997.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      48
      1
    
    
      99981
      873
      328
      4
      1997.0
      0
      1
      0
      0
      0
      0
      ...
      0
      0
      1
      1
      0
      1
      0
      0
      48
      1
    
    
      99982
      873
      307
      3
      1997.0
      0
      0
      0
      0
      0
      0
      ...
      1
      0
      1
      0
      0
      1
      0
      0
      48
      1
    
    
      99983
      873
      750
      3
      1997.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      48
      1
    
    
      99984
      873
      258
      3
      1997.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      1
      0
      0
      0
      48
      1
    
    
      99985
      873
      339
      3
      1997.0
      0
      1
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      48
      1
    
    
      99986
      873
      321
      1
      1996.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      48
      1
    
    
      99987
      873
      879
      2
      1997.0
      0
      1
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      1
      1
      0
      48
      1
    
    
      99988
      873
      286
      2
      1996.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      0
      1
      0
      48
      1
    
    
      99989
      873
      259
      1
      1997.0
      0
      0
      0
      0
      1
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      48
      1
    
    
      99990
      873
      289
      2
      1996.0
      0
      0
      0
      0
      0
      0
      ...
      0
      1
      0
      0
      0
      0
      0
      0
      48
      1
    
    
      99991
      873
      292
      5
      1997.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      48
      1
    
    
      99992
      873
      269
      2
      1997.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      48
      1
    
    
      99993
      873
      875
      1
      1997.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      48
      1
    
    
      99994
      873
      300
      4
      1997.0
      0
      1
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      1
      0
      0
      48
      1
    
    
      99995
      873
      313
      5
      1997.0
      0
      1
      0
      0
      0
      0
      ...
      0
      0
      0
      1
      0
      0
      0
      0
      48
      1
    
    
      99996
      873
      326
      4
      1997.0
      0
      1
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      1
      0
      48
      1
    
    
      99997
      873
      348
      3
      1998.0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      1
      0
      0
      48
      1
    
    
      99998
      873
      358
      2
      1997.0
      0
      1
      1
      0
      0
      0
      ...
      0
      0
      0
      0
      1
      1
      0
      0
      48
      1
    
    
      99999
      873
      342
      4
      1997.0
      0
      0
      0
      0
      0
      1
      ...
      0
      0
      1
      0
      0
      0
      0
      0
      48
      1
    
  

100000 rows × 25 columns



In [ ]:

    
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor

# Calculate Feature Relevance to the Dataset
for name, values in dataset.iteritems():
    # Clone Dataset for Feature Relevance calculation
    backupData = dataset.copy()
    # Clone the Column to be predicted
    y = backupData[name].copy()
    # Drop column, that will be used for prediction
    X = backupData.drop(name, 1)
    # Split Data for Model calibration
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=72)
    regressor = DecisionTreeRegressor(random_state=72)
    regressor.fit(X_train, y_train)
    score = regressor.score(X_test, y_test)
    print('Genre {} score = {}').format(name, score)
    # User Id and Movie Id have a weak relation within the other features, but



In [ ]:

    
# Based on Feature Relevance we are going to 
# 1st Remove the user id from the dataset
smartdata = dataset.copy()
smartdata.drop(smartdata.columns[[0]], axis = 1, inplace = True)
print(smartdata.head())



In [ ]:

    
# 2nd Lets translate the Ratingo into a more discrete evalution (like and dislike)
# 1 - 2.9 : DILIKE
# 3 - 5 : LIKE

for name, values in smartdata['rating'].iteritems():
    print(values)



In [32]:

    
# Produce a scatter matrix for each pair of features in the data 
pd.plotting.scatter_matrix(dataset, alpha = 0.3, figsize = (14,8), diagonal = 'kde');



In [ ]:

    
# Feature Scaling
# Scale the data using the natural logarithm 
log_data = np.log(dataset)
# Scale the sample data using the natural logarithm
log_samples = np.log(samples)
# Produce a scatter matrix for each pair of newly-transformed features
pd.scatter_matrix(log_data, alpha = 0.3, figsize = (14,8), diagonal = 'kde');



In [ ]:

    
item_list = (((pd.merge(item,data).sort_values(by = 'movie id')).groupby('movie title')))['movie id', 'movie title', 'rating']
item_list = item_list.mean()
item_list['movie title'] = item_list.index
item_list = item_list.as_matrix()



In [ ]:

    
recommendation_list = []
for i in recommendation:
    recommendation_list.append(item_list[i-1])
    
recommendation = (pd.DataFrame(recommendation_list,columns = ['movie id','mean rating' ,'movie title'])).sort_values(by = 'mean rating', ascending = False)
print(recommendation[['mean rating','movie title']])



In [ ]:

	user id	movie id	rating	release date	unknown	Action	Adventure	Animation	Childrens	Comedy	...	Horror	Musical	Mystery	Romance	Sci-Fi	Thriller	War	Western	age	gender
count	100000.00000	100000.000000	100000.000000	100000.000000	100000.0000	100000.000000	100000.000000	100000.000000	100000.000000	100000.000000	...	100000.000000	100000.000000	100000.000000	100000.000000	100000.00000	100000.00000	100000.000000	100000.000000	100000.000000	100000.000000
mean	462.48475	425.530130	3.529860	1987.956310	0.0001	0.255890	0.137530	0.036050	0.071820	0.298320	...	0.053170	0.049540	0.052450	0.194610	0.12730	0.21872	0.093980	0.018540	32.969850	1.742600
std	266.61442	330.798356	1.125674	14.154889	0.0100	0.436362	0.344408	0.186416	0.258191	0.457523	...	0.224373	0.216994	0.222934	0.395902	0.33331	0.41338	0.291802	0.134894	11.562623	0.437204
min	1.00000	1.000000	1.000000	1922.000000	0.0000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	7.000000	1.000000
25%	254.00000	175.000000	3.000000	1986.000000	0.0000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	24.000000	1.000000
50%	447.00000	322.000000	4.000000	1994.000000	0.0000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	30.000000	2.000000
75%	682.00000	631.000000	4.000000	1996.000000	0.0000	1.000000	0.000000	0.000000	0.000000	1.000000	...	0.000000	0.000000	0.000000	0.000000	0.00000	0.00000	0.000000	0.000000	40.000000	2.000000
max	943.00000	1682.000000	5.000000	1998.000000	1.0000	1.000000	1.000000	1.000000	1.000000	1.000000	...	1.000000	1.000000	1.000000	1.000000	1.00000	1.00000	1.000000	1.000000	73.000000	2.000000

	user id	movie id	rating	release date	unknown	Action	Adventure	Animation	Childrens	Comedy	...	Horror	Musical	Mystery	Romance	Sci-Fi	Thriller	War	Western	age	gender
0	196	242	3	1997.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	49	2
1	196	257	2	1997.0	0	1	1	0	0	1	...	0	0	0	0	1	0	0	0	49	2
2	196	111	4	1996.0	0	0	0	0	0	1	...	0	0	0	1	0	0	0	0	49	2
3	196	25	4	1996.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	49	2
4	196	382	4	1994.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	49	2
5	196	202	3	1993.0	0	0	0	0	0	1	...	0	0	0	1	0	0	0	0	49	2
6	196	153	5	1988.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	49	2
7	196	286	5	1996.0	0	0	0	0	0	0	...	0	0	0	1	0	0	1	0	49	2
8	196	66	3	1995.0	0	0	0	0	0	1	...	0	0	0	1	0	0	0	0	49	2
9	196	845	4	1996.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	49	2
10	196	173	2	1987.0	0	1	1	0	0	1	...	0	0	0	1	0	0	0	0	49	2
11	196	238	4	1987.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	49	2
12	196	94	3	1990.0	0	0	0	0	1	1	...	0	0	0	0	0	0	0	0	49	2
13	196	762	3	1996.0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	49	2
14	196	381	4	1994.0	0	0	0	0	0	1	...	0	0	0	1	0	0	0	0	49	2
15	196	306	4	1997.0	0	0	0	0	0	0	...	0	0	0	1	0	0	0	0	49	2
16	196	8	5	1995.0	0	0	0	0	1	1	...	0	0	0	0	0	0	0	0	49	2
17	196	70	3	1994.0	0	0	0	0	0	1	...	0	0	0	1	0	0	0	0	49	2
18	196	655	5	1986.0	0	0	1	0	0	1	...	0	0	0	0	0	0	0	0	49	2
19	196	13	2	1995.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	49	2
20	196	692	5	1995.0	0	0	0	0	0	1	...	0	0	0	1	0	0	0	0	49	2
21	196	1022	4	1997.0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	49	2
22	196	287	3	1996.0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	49	2
23	196	269	3	1997.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	49	2
24	196	285	5	1996.0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	49	2
25	196	110	1	1995.0	0	1	1	0	0	1	...	0	0	0	0	0	0	1	0	49	2
26	196	251	3	1997.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	49	2
27	196	393	4	1993.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	49	2
28	196	663	5	1979.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	49	2
29	196	580	2	1995.0	0	0	0	0	0	1	...	0	0	0	1	0	0	0	0	49	2
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
99970	598	898	4	1997.0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	40	1
99971	598	243	2	1997.0	0	0	0	0	1	1	...	0	0	0	0	0	0	0	0	40	1
99972	598	308	4	1997.0	0	0	0	0	1	0	...	0	0	0	0	0	0	0	0	40	1
99973	598	312	5	1997.0	0	0	0	0	0	1	...	0	0	1	0	0	0	0	0	40	1
99974	598	313	5	1997.0	0	1	0	0	0	0	...	0	0	0	1	0	0	0	0	40	1
99975	598	260	3	1997.0	0	1	0	0	0	0	...	0	0	1	0	1	1	0	0	40	1
99976	598	895	2	1997.0	0	0	0	0	0	0	...	1	0	0	0	0	1	0	0	40	1
99977	598	691	2	1998.0	0	0	0	0	0	0	...	0	0	0	0	1	1	0	0	40	1
99978	598	349	4	1998.0	0	1	0	0	0	0	...	0	0	0	0	0	1	0	0	40	1
99979	598	538	4	1997.0	0	0	0	1	1	0	...	0	1	0	0	0	0	0	0	40	1
99980	873	294	4	1997.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	48	1
99981	873	328	4	1997.0	0	1	0	0	0	0	...	0	0	1	1	0	1	0	0	48	1
99982	873	307	3	1997.0	0	0	0	0	0	0	...	1	0	1	0	0	1	0	0	48	1
99983	873	750	3	1997.0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	48	1
99984	873	258	3	1997.0	0	0	0	0	0	0	...	0	0	0	0	1	0	0	0	48	1
99985	873	339	3	1997.0	0	1	0	0	0	0	...	0	0	0	0	0	0	0	0	48	1
99986	873	321	1	1996.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	48	1
99987	873	879	2	1997.0	0	1	0	0	0	0	...	0	0	0	0	0	1	1	0	48	1
99988	873	286	2	1996.0	0	0	0	0	0	0	...	0	0	0	1	0	0	1	0	48	1
99989	873	259	1	1997.0	0	0	0	0	1	1	...	0	0	0	0	0	0	0	0	48	1
99990	873	289	2	1996.0	0	0	0	0	0	0	...	0	1	0	0	0	0	0	0	48	1
99991	873	292	5	1997.0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	48	1
99992	873	269	2	1997.0	0	0	0	0	0	1	...	0	0	0	0	0	0	0	0	48	1
99993	873	875	1	1997.0	0	0	0	0	0	0	...	0	0	0	1	0	0	0	0	48	1
99994	873	300	4	1997.0	0	1	0	0	0	0	...	0	0	0	0	0	1	0	0	48	1
99995	873	313	5	1997.0	0	1	0	0	0	0	...	0	0	0	1	0	0	0	0	48	1
99996	873	326	4	1997.0	0	1	0	0	0	0	...	0	0	0	0	0	0	1	0	48	1
99997	873	348	3	1998.0	0	0	0	0	0	0	...	0	0	0	0	0	1	0	0	48	1
99998	873	358	2	1997.0	0	1	1	0	0	0	...	0	0	0	0	1	1	0	0	48	1
99999	873	342	4	1997.0	0	0	0	0	0	1	...	0	0	1	0	0	0	0	0	48	1