notebook.community

Edit and run



In [2]:

    
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import numpy as np
import pandas as pd
import scipy
import csv

from sklearn import preprocessing
from sklearn.neighbors import NearestNeighbors

matplotlib.style.use('ggplot')
pd.options.display.float_format = '{:20,.2f}'.format
pd.set_option('display.max_columns', 50)



In [3]:

    
data_rating = pd.io.parsers.read_csv('raw/rating.csv')
#data_rating = data_rating.loc[data_rating['user_id'] != 48766]
data_anime = pd.io.parsers.read_csv('raw/anime.csv')

train_rating = pd.io.parsers.read_csv('omer/rating_train.csv')
test_rating = pd.io.parsers.read_csv('omer/rating_test.csv')
final_profiles = pd.io.parsers.read_csv('raw/user_profiles_final.csv')

print "Animes: "
print data_anime.describe()

print "\nRatings: "
print data_rating.describe()









    



Animes: 
                  anime_id               rating              members
count            12,294.00            12,064.00            12,294.00
mean             14,058.22                 6.47            18,071.34
std              11,455.29                 1.03            54,820.68
min                   1.00                 1.67                 5.00
25%               3,484.25                 5.88               225.00
50%              10,260.50                 6.57             1,550.00
75%              24,794.50                 7.18             9,437.00
max              34,527.00                10.00         1,013,917.00

Ratings: 
                   user_id             anime_id               rating
count         7,813,737.00         7,813,737.00         7,813,737.00
mean             36,727.96             8,909.07                 6.14
std              20,997.95             8,883.95                 3.73
min                   1.00                 1.00                -1.00
25%              18,974.00             1,240.00                 6.00
50%              36,791.00             6,213.00                 7.00
75%              54,757.00            14,093.00                 9.00
max              73,516.00            34,519.00                10.00



In [241]:

    
data_anime['rating'].plot(kind="hist", bins=40,figsize=(15,4))
plt.show()



In [3]:

    
data_rating.groupby('user_id').size().to_frame().sort_values(by=0).plot(kind="density", logx=True, figsize=(15,4))
print data_rating.groupby('user_id').size().to_frame().sort_values(by=0).quantile(0.01)
plt.show()









    



0                   1.00
Name: 0.01, dtype: float64



In [4]:

    
print 'Anime rating:'
print data_anime['rating'].describe(include='all')

for t in ['TV', 'OVA', 'Movie', 'Special', 'ONA', 'Music']:
    print '\nRating: ' + t
    print data_anime.loc[data_anime['type'] == t]['rating'].describe(include='all')
    data_anime.loc[data_anime['type'] == t]['rating'].plot(kind="density", figsize=(15,8))

L=plt.legend()
for i, t in enumerate(['TV', 'OVA', 'Movie', 'Special', 'ONA', 'Music']):
    L.get_texts()[i].set_text(t)

plt.show()









    



Anime rating:
count              12,064.00
mean                    6.47
std                     1.03
min                     1.67
25%                     5.88
50%                     6.57
75%                     7.18
max                    10.00
Name: rating, dtype: float64

Rating: TV
count               3,671.00
mean                    6.90
std                     0.86
min                     2.67
25%                     6.44
50%                     6.94
75%                     7.46
max                     9.60
Name: rating, dtype: float64

Rating: OVA
count               3,285.00
mean                    6.38
std                     0.86
min                     2.00
25%                     5.85
50%                     6.38
75%                     6.92
max                     9.25
Name: rating, dtype: float64

Rating: Movie
count               2,297.00
mean                    6.32
std                     1.21
min                     1.92
25%                     5.42
50%                     6.47
75%                     7.25
max                    10.00
Name: rating, dtype: float64

Rating: Special
count               1,671.00
mean                    6.52
std                     0.89
min                     1.67
25%                     6.08
50%                     6.63
75%                     7.11
max                     8.66
Name: rating, dtype: float64

Rating: ONA
count                 652.00
mean                    5.64
std                     1.13
min                     2.58
25%                     4.87
50%                     5.75
75%                     6.41
max                     8.26
Name: rating, dtype: float64

Rating: Music
count                 488.00
mean                    5.59
std                     0.96
min                     3.28
25%                     5.01
50%                     5.62
75%                     6.15
max                     8.38
Name: rating, dtype: float64



In [5]:

    
_genre = data_anime['genre']
_genre_list = []
genre_count = []
unique_genre = []
for g in _genre:
    try:
        gs = g.split(',')
        if len(gs) == 1:
            unique_genre.extend(gs)
        _genre_list.extend(map(lambda s: s.strip(), gs))
        genre_count.append(gs)
    except:
        pass
    
#print genre_count / len(data_anime) 

print sorted(set(unique_genre))
print len(sorted(set(unique_genre)))

print len(data_anime) 
print len(sorted(set(_genre_list)))
print sorted(set(_genre_list))
print
print set(_genre_list) - set(unique_genre)

pd.DataFrame(map(lambda x: len(x), genre_count)).plot(kind='hist', bins=15)
plt.show()









    



['Action', 'Adventure', 'Cars', 'Comedy', 'Dementia', 'Demons', 'Drama', 'Ecchi', 'Fantasy', 'Game', 'Harem', 'Hentai', 'Historical', 'Horror', 'Kids', 'Magic', 'Martial Arts', 'Mecha', 'Military', 'Music', 'Mystery', 'Parody', 'Psychological', 'Romance', 'Samurai', 'School', 'Sci-Fi', 'Seinen', 'Shoujo', 'Shounen', 'Slice of Life', 'Space', 'Sports', 'Supernatural', 'Thriller', 'Vampire', 'Yaoi']
37
12294
43
['Action', 'Adventure', 'Cars', 'Comedy', 'Dementia', 'Demons', 'Drama', 'Ecchi', 'Fantasy', 'Game', 'Harem', 'Hentai', 'Historical', 'Horror', 'Josei', 'Kids', 'Magic', 'Martial Arts', 'Mecha', 'Military', 'Music', 'Mystery', 'Parody', 'Police', 'Psychological', 'Romance', 'Samurai', 'School', 'Sci-Fi', 'Seinen', 'Shoujo', 'Shoujo Ai', 'Shounen', 'Shounen Ai', 'Slice of Life', 'Space', 'Sports', 'Super Power', 'Supernatural', 'Thriller', 'Vampire', 'Yaoi', 'Yuri']

set(['Josei', 'Police', 'Yuri', 'Shounen Ai', 'Super Power', 'Shoujo Ai'])



In [6]:

    
genre_count = pd.DataFrame(_genre_list).groupby(0)
genre_count.size().sort_values(ascending=False).plot(kind="bar", width=0.9, figsize=(15,4))
plt.show()



In [7]:

    
members = data_anime['members'].cumsum()

data_anime['members'].quantile(np.arange(0.0, 1.0, 0.01)).plot(kind="line")
plt.show()



In [8]:

    
data_anime['members'].plot(kind="box", logy=True)
plt.show()



In [9]:

    
movies = data_rating.groupby('user_id').size().to_frame().sort_values(by=0)
movies.loc[movies[0] < 314].describe()
movies.loc[movies[0] < 314].plot(kind="hist", bins=313)
plt.show()



In [4]:

    
def normalize(df_user_profiles):
    x = df_user_profiles.iloc[:,1:].values #returns a numpy array
    min_max_scaler = preprocessing.MinMaxScaler()
    
    x_scaled = min_max_scaler.fit_transform(x)
    
    df_scaled = pd.DataFrame(x_scaled, columns=df_user_profiles.columns.difference(['user_id','rating','genre']))
    
    df_scaled['user_id'] = df_user_profiles['user_id'].values
    df_scaled['genre_count'] = map(lambda x: x /10.0, df_user_profiles['genre_count'].values)
    #df_scaled['rating'] = 1.0
    
    return df_scaled

def normalize_prof_from_file(df_user_profiles):
    x = df_user_profiles.iloc[:,:-3].values #returns a numpy array
    print len(x.T)
    min_max_scaler = preprocessing.MinMaxScaler()
    
    x_scaled = min_max_scaler.fit_transform(x.T)
    print len(df_user_profiles.columns.difference(['user_id','rating','genre_count']))
    df_scaled = pd.DataFrame(x_scaled.T, columns=df_user_profiles.columns.difference(['user_id','rating','genre_count']))
    
    df_scaled['user_id'] = df_user_profiles['user_id'].values
    df_scaled['genre_count'] = map(lambda x: x /13.0, df_user_profiles['genre_count'].values)
    df_scaled['rating'] = 1.0
    
    return df_scaled

def get_user_profile(user_id, df_rating, data_anime):
    df_anime_genres = pd.get_dummies(data_anime['genre'].str.get_dummies(sep=", ")) # creates genre vectors
    df_anime_vector = pd.concat([data_anime['anime_id'], df_anime_genres], axis=1)
    
    df_user = df_rating.loc[df_rating['user_id'] == user_id]
    df_merged = pd.merge(df_user, df_anime_vector, how='left', left_on='anime_id', right_on='anime_id' 
                        ).drop(['anime_id', 'rating'], axis=1)

    
    avg_genre = df_merged[df_merged.columns.difference(['user_id'])].sum(axis=1)
    
    # Count only 1's
    df_user_sum = df_merged.apply(pd.Series.value_counts).loc[df_merged.index == 1]
    df_user_sum.fillna(0, inplace = True)
    df_user_sum = df_user_sum.apply(func=lambda x: x**2,axis=0)

    df_user_sum['genre_count'] = avg_genre.sum() / float(len(avg_genre))
    df_user_sum['user_id'] = user_id
   # df_user_sum['rating'] = 10.0

    return df_user_sum

def build_user_profiles(user_ids):
    df_user_profiles = pd.DataFrame()

    for id in user_ids:
        u_prof = get_user_profile(id, data_rating, data_anime)
        df_user_profiles = df_user_profiles.append(u_prof, ignore_index = True)
    
    return df_user_profiles

def build_knn(n, id, rating=False):
    filter_out = train_rating.loc[train_rating['user_id'] == id]['anime_id']
    filter_anime = data_anime.loc[~data_anime['anime_id'].isin(set(filter_out))]
    
    filter_anime_genres = pd.get_dummies(filter_anime['genre'].str.get_dummies(sep=", ")) # creates genre vectors
    df_anime_vector = pd.concat([filter_anime['anime_id'], filter_anime_genres], axis=1) # anime_id + genre vector
    df_anime_vector['genre_count'] =  df_anime_vector[df_anime_vector.columns.difference(['anime_id'])].sum(axis=1).apply(lambda x: x / 13.0)
    if rating:
        filter_anime_genres['rating'] = 0
        df_anime_vector['rating'] =  filter_anime['rating'].apply(lambda x: x / 10.0)
        df_anime_vector.fillna(0, inplace = True)

    return NearestNeighbors(n_neighbors=n, algorithm='ball_tree').fit(df_anime_vector.iloc[:,1:])

def get_n_closest_users(norm_profile, n, rating):
    
    nbrs = build_knn(n, norm_profile.user_id, rating=rating)
    norm_profile = norm_profile.drop('user_id')

    # Get closest neighbours
    distances, indices = nbrs.kneighbors(norm_profile)
    
    return distances, indices, norm_profile



In [11]:

    
profile1 = get_user_profile(1, train_rating, data_anime)
profile2 = get_user_profile(2, train_rating, data_anime)

profiles = pd.DataFrame.append(profile1,profile2)
# print normalize(profiles)

final_normalized = normalize_prof_from_file(final_profiles)
final_normalized.head(5)









    



43
43






    Out[11]:






  
    
      
      Action
      Adventure
      Cars
      Comedy
      Dementia
      Demons
      Drama
      Ecchi
      Fantasy
      Game
      Harem
      Hentai
      Historical
      Horror
      Josei
      Kids
      Magic
      Martial Arts
      Mecha
      Military
      Music
      Mystery
      Parody
      Police
      Psychological
      Romance
      Samurai
      School
      Sci-Fi
      Seinen
      Shoujo
      Shoujo Ai
      Shounen
      Shounen Ai
      Slice of Life
      Space
      Sports
      Super Power
      Supernatural
      Thriller
      Vampire
      Yaoi
      Yuri
      user_id
      genre_count
      rating
    
  
  
    
      0
      0.78
      0.14
      0.00
      1.00
      0.00
      0.16
      0.34
      0.67
      0.50
      0.08
      0.72
      0.00
      0.03
      0.08
      0.00
      0.00
      0.18
      0.04
      0.09
      0.01
      0.01
      0.07
      0.03
      0.00
      0.08
      0.91
      0.00
      0.74
      0.28
      0.21
      0.00
      0.03
      0.37
      0.00
      0.08
      0.00
      0.00
      0.16
      0.62
      0.00
      0.08
      0.00
      0.00
      1.00
      0.41
      1.00
    
    
      1
      1.00
      0.75
      0.00
      0.65
      0.00
      0.06
      0.48
      0.08
      0.81
      0.12
      0.00
      0.00
      0.00
      0.10
      0.00
      0.27
      0.21
      0.06
      0.02
      0.02
      0.00
      0.17
      0.00
      0.02
      0.17
      0.17
      0.00
      0.19
      0.15
      0.06
      0.02
      0.00
      0.71
      0.00
      0.08
      0.00
      0.19
      0.15
      0.44
      0.06
      0.02
      0.00
      0.00
      3.00
      0.38
      1.00
    
    
      2
      1.00
      0.41
      0.00
      0.78
      0.00
      0.04
      0.19
      0.15
      0.81
      0.07
      0.07
      0.00
      0.19
      0.07
      0.00
      0.00
      0.37
      0.04
      0.00
      0.07
      0.00
      0.22
      0.00
      0.04
      0.11
      0.30
      0.15
      0.30
      0.04
      0.07
      0.07
      0.00
      0.56
      0.00
      0.15
      0.00
      0.04
      0.19
      0.44
      0.07
      0.15
      0.00
      0.00
      4.00
      0.35
      1.00
    
    
      3
      0.62
      0.46
      0.05
      1.00
      0.00
      0.11
      0.39
      0.07
      0.39
      0.01
      0.04
      0.00
      0.12
      0.01
      0.02
      0.12
      0.11
      0.05
      0.01
      0.01
      0.04
      0.07
      0.10
      0.00
      0.04
      0.13
      0.10
      0.30
      0.33
      0.20
      0.00
      0.00
      0.67
      0.00
      0.28
      0.02
      0.29
      0.13
      0.34
      0.07
      0.05
      0.00
      0.00
      5.00
      0.32
      1.00
    
    
      4
      0.83
      0.08
      0.00
      1.00
      0.00
      0.17
      0.67
      0.33
      0.33
      0.17
      0.33
      0.00
      0.08
      0.33
      0.17
      0.00
      0.00
      0.17
      0.00
      0.00
      0.08
      0.50
      0.00
      0.17
      0.42
      0.75
      0.00
      0.75
      0.17
      0.42
      0.08
      0.00
      0.58
      0.00
      0.58
      0.00
      0.17
      0.33
      1.00
      0.17
      0.00
      0.00
      0.00
      6.00
      0.33
      1.00



In [13]:

    
data_rating.loc[data_rating['user_id'] == 102]









    Out[13]:






  
    
      
      user_id
      anime_id
      rating
    
  
  
    
      8280
      102
      24
      8
    
    
      8281
      102
      30
      7
    
    
      8282
      102
      45
      10
    
    
      8283
      102
      48
      8
    
    
      8284
      102
      66
      8
    
    
      8285
      102
      71
      7
    
    
      8286
      102
      72
      7
    
    
      8287
      102
      73
      7
    
    
      8288
      102
      79
      7
    
    
      8289
      102
      98
      6
    
    
      8290
      102
      99
      7
    
    
      8291
      102
      121
      6
    
    
      8292
      102
      145
      5
    
    
      8293
      102
      166
      7
    
    
      8294
      102
      167
      8
    
    
      8295
      102
      169
      5
    
    
      8296
      102
      189
      7
    
    
      8297
      102
      190
      7
    
    
      8298
      102
      199
      9
    
    
      8299
      102
      202
      7
    
    
      8300
      102
      205
      7
    
    
      8301
      102
      226
      6
    
    
      8302
      102
      237
      8
    
    
      8303
      102
      355
      8
    
    
      8304
      102
      356
      8
    
    
      8305
      102
      357
      7
    
    
      8306
      102
      371
      6
    
    
      8307
      102
      431
      9
    
    
      8308
      102
      534
      8
    
    
      8309
      102
      846
      8
    
    
      ...
      ...
      ...
      ...
    
    
      8357
      102
      10020
      9
    
    
      8358
      102
      10080
      8
    
    
      8359
      102
      10464
      7
    
    
      8360
      102
      10491
      6
    
    
      8361
      102
      10521
      8
    
    
      8362
      102
      10578
      6
    
    
      8363
      102
      10620
      8
    
    
      8364
      102
      10711
      7
    
    
      8365
      102
      10719
      8
    
    
      8366
      102
      10721
      8
    
    
      8367
      102
      10793
      8
    
    
      8368
      102
      11111
      7
    
    
      8369
      102
      11433
      9
    
    
      8370
      102
      11757
      8
    
    
      8371
      102
      11759
      7
    
    
      8372
      102
      11785
      6
    
    
      8373
      102
      13659
      9
    
    
      8374
      102
      13859
      5
    
    
      8375
      102
      13939
      7
    
    
      8376
      102
      14741
      8
    
    
      8377
      102
      14829
      8
    
    
      8378
      102
      14967
      8
    
    
      8379
      102
      15699
      6
    
    
      8380
      102
      16498
      9
    
    
      8381
      102
      18857
      9
    
    
      8382
      102
      19815
      9
    
    
      8383
      102
      22297
      8
    
    
      8384
      102
      27821
      7
    
    
      8385
      102
      28701
      8
    
    
      8386
      102
      30276
      7
    
  

107 rows × 3 columns



In [10]:

    
# profiles = build_user_profiles([10203,43202,1300])


usdf = pd.DataFrame()

with open('content_recommendations', 'ab') as file:
    writer = csv.writer(file)
    for idx in [5,6,7,8,14,17,21,23,25,26,27]:    
        distances, indices, us = get_n_closest_users(final_normalized.drop([], axis=1).iloc[idx], 10, True)

        usdf = usdf.append(final_normalized.iloc[idx], ignore_index=True)
        test_movies = test_rating.loc[test_rating['user_id'] == final_normalized.iloc[idx]['user_id']]
        for ind in indices:
            print "-----------------------"
            print final_normalized.iloc[idx]['user_id']
            # print data_anime.loc[ind][['anime_id','genre', 'rating']]
            print len(data_anime.loc[ind]['anime_id'])
            print set(data_anime.loc[ind]['anime_id']).intersection(set(test_movies['anime_id']))
            writer.writerow([final_normalized.iloc[idx]['user_id'], ])









    



c:\python27\lib\site-packages\sklearn\utils\validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)






    



-----------------------
7.0
10
set([])






    



c:\python27\lib\site-packages\sklearn\utils\validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)






    



-----------------------
8.0
10
set([])






    



c:\python27\lib\site-packages\sklearn\utils\validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)






    



-----------------------
11.0
10
set([])






    



c:\python27\lib\site-packages\sklearn\utils\validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)






    



-----------------------
12.0
10
set([])






    



c:\python27\lib\site-packages\sklearn\utils\validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)






    



-----------------------
19.0
10
set([])






    



c:\python27\lib\site-packages\sklearn\utils\validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)






    



-----------------------
22.0
10
set([])






    



c:\python27\lib\site-packages\sklearn\utils\validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)






    



-----------------------
26.0
10
set([])






    



c:\python27\lib\site-packages\sklearn\utils\validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)






    



-----------------------
28.0
10
set([])






    



c:\python27\lib\site-packages\sklearn\utils\validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)






    



-----------------------
30.0
10
set([])






    



c:\python27\lib\site-packages\sklearn\utils\validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)






    



-----------------------
31.0
10
set([])
-----------------------
32.0
10
set([249])






    



c:\python27\lib\site-packages\sklearn\utils\validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)

	Action	Adventure	Cars	Comedy	Demons	Drama	Ecchi	Fantasy	Game	Harem	Historical	Horror	Josei	Kids	Magic	Martial Arts	Mecha	Military	Music	Mystery	Parody	Police	Psychological	Romance	Samurai	School	Sci-Fi	Seinen	Shoujo	Shoujo Ai	Shounen	Slice of Life	Space	Sports	Super Power	Supernatural	Thriller	Vampire	user_id	genre_count	rating
0	0.78	0.14	0.00	1.00	0.16	0.34	0.67	0.50	0.08	0.72	0.03	0.08	0.00	0.00	0.18	0.04	0.09	0.01	0.01	0.07	0.03	0.00	0.08	0.91	0.00	0.74	0.28	0.21	0.00	0.03	0.37	0.08	0.00	0.00	0.16	0.62	0.00	0.08	1.00	0.41	1.00
1	1.00	0.75	0.00	0.65	0.06	0.48	0.08	0.81	0.12	0.00	0.00	0.10	0.00	0.27	0.21	0.06	0.02	0.02	0.00	0.17	0.00	0.02	0.17	0.17	0.00	0.19	0.15	0.06	0.02	0.00	0.71	0.08	0.00	0.19	0.15	0.44	0.06	0.02	3.00	0.38	1.00
2	1.00	0.41	0.00	0.78	0.04	0.19	0.15	0.81	0.07	0.07	0.19	0.07	0.00	0.00	0.37	0.04	0.00	0.07	0.00	0.22	0.00	0.04	0.11	0.30	0.15	0.30	0.04	0.07	0.07	0.00	0.56	0.15	0.00	0.04	0.19	0.44	0.07	0.15	4.00	0.35	1.00
3	0.62	0.46	0.05	1.00	0.11	0.39	0.07	0.39	0.01	0.04	0.12	0.01	0.02	0.12	0.11	0.05	0.01	0.01	0.04	0.07	0.10	0.00	0.04	0.13	0.10	0.30	0.33	0.20	0.00	0.00	0.67	0.28	0.02	0.29	0.13	0.34	0.07	0.05	5.00	0.32	1.00
4	0.83	0.08	0.00	1.00	0.17	0.67	0.33	0.33	0.17	0.33	0.08	0.33	0.17	0.00	0.00	0.17	0.00	0.00	0.08	0.50	0.00	0.17	0.42	0.75	0.00	0.75	0.17	0.42	0.08	0.00	0.58	0.58	0.00	0.17	0.33	1.00	0.17	0.00	6.00	0.33	1.00

	user_id	anime_id	rating
8280	102	24	8
8281	102	30	7
8282	102	45	10
8283	102	48	8
8284	102	66	8
8285	102	71	7
8286	102	72	7
8287	102	73	7
8288	102	79	7
8289	102	98	6
8290	102	99	7
8291	102	121	6
8292	102	145	5
8293	102	166	7
8294	102	167	8
8295	102	169	5
8296	102	189	7
8297	102	190	7
8298	102	199	9
8299	102	202	7
8300	102	205	7
8301	102	226	6
8302	102	237	8
8303	102	355	8
8304	102	356	8
8305	102	357	7
8306	102	371	6
8307	102	431	9
8308	102	534	8
8309	102	846	8
...	...	...	...
8357	102	10020	9
8358	102	10080	8
8359	102	10464	7
8360	102	10491	6
8361	102	10521	8
8362	102	10578	6
8363	102	10620	8
8364	102	10711	7
8365	102	10719	8
8366	102	10721	8
8367	102	10793	8
8368	102	11111	7
8369	102	11433	9
8370	102	11757	8
8371	102	11759	7
8372	102	11785	6
8373	102	13659	9
8374	102	13859	5
8375	102	13939	7
8376	102	14741	8
8377	102	14829	8
8378	102	14967	8
8379	102	15699	6
8380	102	16498	9
8381	102	18857	9
8382	102	19815	9
8383	102	22297	8
8384	102	27821	7
8385	102	28701	8
8386	102	30276	7