In [1]:
# standard libraries
import numpy as np
import pandas as pd

# for plotting
import matplotlib.pyplot as plt
import pickle

# for interpreting data
from pyechonest import artist

# for machine learning
from sklearn.ensemble import RandomForestRegressor as rf
from scipy.sparse import coo_matrix

from pyechonest import config
config.ECHO_NEST_API_KEY='EIVX1I4WCCD7FQRFV'

# turn off to avoid displaying test output
display = False

In [2]:
trainfile = "train.csv"
testfile = "test.csv"
artistfile = "artists.csv"
userfile = "profiles.csv"

In [3]:
train = pd.read_csv(trainfile)

In [8]:
%matplotlib
plt.hist(train.plays)


Using matplotlib backend: TkAgg
Out[8]:
(array([  4.15470900e+06,   6.70000000e+01,   1.70000000e+01,
          6.00000000e+00,   2.00000000e+00,   2.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          1.00000000e+00]),
 array([  1.00000000e+00,   4.19166000e+04,   8.38322000e+04,
          1.25747800e+05,   1.67663400e+05,   2.09579000e+05,
          2.51494600e+05,   2.93410200e+05,   3.35325800e+05,
          3.77241400e+05,   4.19157000e+05]),
 <a list of 10 Patch objects>)

In [5]:
test = pd.read_csv(testfile)

In [6]:
len(test)


Out[6]:
4154804

In [77]:
artists = pd.read_csv(artistfile)
users = pd.read_csv(userfile)

In [78]:
artist_to_name = {row['artist']: row['name'] for i,row in artists.iterrows()}

In [79]:
# useful functions
def load_data(filenames):
    '''
    Inputs: list of filenames of pickled data files to be loaded.
    Returns: dictionary with {filename: object } pairings.
    '''
    d = {}
    for f in filenames:
        try:
            d[f] = pickle.load(open(f + '.p','rb'))
        except _:
            print "Could not load {} into data.".format(f)
            if 'matrix' in f:
                print "Assuming matrix. Attempting to recreate with available data."
                d[f] = lil_matrix((U,D)) 
                for i in xrange(len(train)):
                    if i % 100000 == 0:
                        print i
                    d[f][user_id[train.user[i]], artist_id[train.artist[i]]] = train.plays[i]

    return d

def dump_data(data):
    '''
    Input: data - a dictionary of filename: object items to be dumped using pickle.
    '''
    for f,o in data.iteritems():
        pickle.dump(o,open(f + '.p','wb'))

In [80]:
datafiles = ['artist_hotness', 'could_not_find', 'similar_artists']
pickled = load_data(datafiles)
similar_artists = pickled[datafiles[2]]
artist_hotness = pickled[datafiles[0]]
no_matches = pickled[datafiles[1]]

In [81]:
datafiles2 = ['clean_artist_terms']
pickled2 = load_data(datafiles2)
artist_terms = pickled2[datafiles2[0]]

In [82]:
#dictionary of artists "hash" to name to help interpret groups
bands = {artists.artist[i]:artists.name[i] for i in xrange(len(artists))}
artist_ids = {artists.name[i]:artists.artist[i] for i in xrange(len(artists))}

In [83]:
#names_array = np.array(artists.name)
#similar_names = {}
#for artist in similar_artists:
#    similar = [match.name for match in similar_artists[artist]]
#    similar_names[artist] = similar
    
# calcluate artist median plays    
#artist_medians = {}
#for artist in artists.artist:
#    artist_medians[artist] = np.median(train[train.artist == artist].plays)

In [84]:
# now we're going to generate the feature vector for the users
# first, map countries to ids
uniq_countries = set()
for country in users.country:
    uniq_countries.add(country)
    
# normalize the country label so they aren't heavily emphasized    
country_label = {country: float(i) /len(uniq_countries) for i,country in enumerate(uniq_countries)}
country_label


Out[84]:
{'Afghanistan': 0.2510460251046025,
 'Albania': 0.19246861924686193,
 'Algeria': 0.5899581589958159,
 'American Samoa': 0.0794979079497908,
 'Andorra': 0.5564853556485355,
 'Angola': 0.4560669456066946,
 'Anguilla': 0.8284518828451883,
 'Antarctica': 0.9707112970711297,
 'Antigua and Barbuda': 0.7531380753138075,
 'Argentina': 0.05439330543933055,
 'Armenia': 0.13389121338912133,
 'Aruba': 0.04184100418410042,
 'Australia': 0.5857740585774058,
 'Austria': 0.502092050209205,
 'Azerbaijan': 0.2175732217573222,
 'Bahamas': 0.17154811715481172,
 'Bahrain': 0.07112970711297072,
 'Bangladesh': 0.5815899581589958,
 'Barbados': 0.7824267782426778,
 'Belarus': 0.42677824267782427,
 'Belgium': 0.6192468619246861,
 'Belize': 0.6317991631799164,
 'Benin': 0.33472803347280333,
 'Bermuda': 0.2928870292887029,
 'Bhutan': 0.7949790794979079,
 'Bolivia': 0.058577405857740586,
 'Bosnia and Herzegovina': 0.10460251046025104,
 'Botswana': 0.3472803347280335,
 'Bouvet Island': 0.8451882845188284,
 'Brazil': 0.5230125523012552,
 'British Indian Ocean Territory': 0.899581589958159,
 'Brunei Darussalam': 0.8117154811715481,
 'Bulgaria': 0.4435146443514644,
 'Burkina Faso': 0.06694560669456066,
 'Burundi': 0.7698744769874477,
 'Cambodia': 0.4393305439330544,
 'Cameroon': 0.06276150627615062,
 'Canada': 0.0,
 'Cape Verde': 0.5523012552301255,
 'Cayman Islands': 0.39330543933054396,
 'Central African Republic': 0.40585774058577406,
 'Chad': 0.46443514644351463,
 'Chile': 0.6108786610878661,
 'China': 0.36401673640167365,
 'Christmas Island': 0.1506276150627615,
 'Cocos (Keeling) Islands': 0.09205020920502092,
 'Colombia': 0.7615062761506276,
 'Comoros': 0.9623430962343096,
 'Congo': 0.9748953974895398,
 'Congo, the Democratic Republic of the': 0.3389121338912134,
 'Cook Islands': 0.3305439330543933,
 'Costa Rica': 0.5439330543933054,
 "Cote D'Ivoire": 0.2217573221757322,
 'Croatia': 0.6778242677824268,
 'Cuba': 0.9539748953974896,
 'Cyprus': 0.23430962343096234,
 'Czech Republic': 0.25523012552301255,
 'Denmark': 0.6527196652719666,
 'Djibouti': 0.7447698744769874,
 'Dominica': 0.7573221757322176,
 'Dominican Republic': 0.37656903765690375,
 'Ecuador': 0.5774058577405857,
 'Egypt': 0.9497907949790795,
 'El Salvador': 0.5941422594142259,
 'Equatorial Guinea': 0.7238493723849372,
 'Eritrea': 0.2594142259414226,
 'Estonia': 0.7154811715481172,
 'Ethiopia': 0.03765690376569038,
 'Falkland Islands (Malvinas)': 0.35564853556485354,
 'Faroe Islands': 0.5271966527196653,
 'Fiji': 0.9916317991631799,
 'Finland': 0.16317991631799164,
 'France': 0.28451882845188287,
 'French Guiana': 0.6694560669456067,
 'French Polynesia': 0.2803347280334728,
 'French Southern Territories': 0.4602510460251046,
 'Gabon': 0.15481171548117154,
 'Gambia': 0.602510460251046,
 'Georgia': 0.6443514644351465,
 'Germany': 0.8702928870292888,
 'Ghana': 0.38493723849372385,
 'Gibraltar': 0.5606694560669456,
 'Greece': 0.9832635983263598,
 'Greenland': 0.9330543933054394,
 'Grenada': 0.698744769874477,
 'Guadeloupe': 0.9288702928870293,
 'Guam': 0.20920502092050208,
 'Guatemala': 0.100418410041841,
 'Guinea-Bissau': 0.6861924686192469,
 'Guyana': 0.4309623430962343,
 'Haiti': 0.6276150627615062,
 'Heard Island and Mcdonald Islands': 0.9790794979079498,
 'Holy See (Vatican City State)': 0.7489539748953975,
 'Honduras': 0.9372384937238494,
 'Hong Kong': 0.6359832635983264,
 'Hungary': 0.9246861924686193,
 'Iceland': 0.8493723849372385,
 'India': 0.21338912133891214,
 'Indonesia': 0.401673640167364,
 'Iran, Islamic Republic of': 0.3514644351464435,
 'Iraq': 0.702928870292887,
 'Ireland': 0.5648535564853556,
 'Israel': 0.8410041841004184,
 'Italy': 0.7907949790794979,
 'Jamaica': 0.18828451882845187,
 'Japan': 0.5146443514644351,
 'Jordan': 0.11297071129707113,
 'Kazakhstan': 0.8786610878661087,
 'Kenya': 0.4351464435146444,
 'Kiribati': 0.6903765690376569,
 "Korea, Democratic People's Republic of": 0.9037656903765691,
 'Korea, Republic of': 0.5397489539748954,
 'Kuwait': 0.5313807531380753,
 'Kyrgyzstan': 0.891213389121339,
 "Lao People's Democratic Republic": 0.6485355648535565,
 'Latvia': 0.9205020920502092,
 'Lebanon': 0.7280334728033473,
 'Lesotho': 0.22594142259414227,
 'Liberia': 0.12552301255230125,
 'Libyan Arab Jamahiriya': 0.0041841004184100415,
 'Liechtenstein': 0.48535564853556484,
 'Lithuania': 0.02510460251046025,
 'Luxembourg': 0.5481171548117155,
 'Macao': 0.2384937238493724,
 'Macedonia': 0.9121338912133892,
 'Madagascar': 0.7866108786610879,
 'Malawi': 0.3263598326359833,
 'Malaysia': 0.497907949790795,
 'Maldives': 0.8158995815899581,
 'Mali': 0.4100418410041841,
 'Malta': 0.8075313807531381,
 'Marshall Islands': 0.14644351464435146,
 'Martinique': 0.8744769874476988,
 'Mauritania': 0.8870292887029289,
 'Mauritius': 0.4476987447698745,
 'Mayotte': 0.895397489539749,
 'Mexico': 0.9456066945606695,
 'Micronesia, Federated States of': 0.3682008368200837,
 'Moldova': 0.6652719665271967,
 'Monaco': 0.16736401673640167,
 'Mongolia': 0.6820083682008368,
 'Montenegro': 0.02092050209205021,
 'Montserrat': 0.08368200836820083,
 'Morocco': 0.6736401673640168,
 'Mozambique': 0.5062761506276151,
 'Myanmar': 0.9414225941422594,
 'Namibia': 0.6610878661087866,
 'Nauru': 0.3138075313807531,
 'Nepal': 0.803347280334728,
 'Netherlands': 0.1297071129707113,
 'Netherlands Antilles': 0.8368200836820083,
 'New Caledonia': 0.9079497907949791,
 'New Zealand': 0.1799163179916318,
 'Nicaragua': 0.7782426778242678,
 'Niger': 0.5188284518828452,
 'Nigeria': 0.5732217573221757,
 'Niue': 0.1589958158995816,
 'Norfolk Island': 0.200836820083682,
 'Northern Mariana Islands': 0.8200836820083682,
 'Norway': 0.32217573221757323,
 'Oman': 0.14225941422594143,
 'Pakistan': 0.13807531380753138,
 'Palau': 0.5690376569037657,
 'Palestinian Territory, Occupied': 0.99581589958159,
 'Panama': 0.5355648535564853,
 'Papua New Guinea': 0.8619246861924686,
 'Paraguay': 0.9874476987447699,
 'Peru': 0.3054393305439331,
 'Philippines': 0.6569037656903766,
 'Pitcairn': 0.24267782426778242,
 'Poland': 0.8828451882845189,
 'Portugal': 0.7112970711297071,
 'Puerto Rico': 0.6150627615062761,
 'Qatar': 0.4895397489539749,
 'Reunion': 0.7656903765690377,
 'Romania': 0.45188284518828453,
 'Russian Federation': 0.1087866108786611,
 'Rwanda': 0.606694560669456,
 'Saint Helena': 0.016736401673640166,
 'Saint Kitts and Nevis': 0.03347280334728033,
 'Saint Lucia': 0.2719665271966527,
 'Saint Pierre and Miquelon': 0.029288702928870293,
 'Saint Vincent and the Grenadines': 0.2301255230125523,
 'Samoa': 0.19665271966527198,
 'San Marino': 0.27615062761506276,
 'Sao Tome and Principe': 0.008368200836820083,
 'Saudi Arabia': 0.07531380753138076,
 'Senegal': 0.8577405857740585,
 'Serbia': 0.9581589958158996,
 'Seychelles': 0.3179916317991632,
 'Sierra Leone': 0.6401673640167364,
 'Singapore': 0.34309623430962344,
 'Slovakia': 0.29707112970711297,
 'Slovenia': 0.09623430962343096,
 'Solomon Islands': 0.26359832635983266,
 'Somalia': 0.301255230125523,
 'South Africa': 0.4686192468619247,
 'South Georgia and the South Sandwich Islands': 0.4811715481171548,
 'Spain': 0.12133891213389121,
 'Sri Lanka': 0.04602510460251046,
 'Sudan': 0.799163179916318,
 'Suriname': 0.8242677824267782,
 'Svalbard and Jan Mayen': 0.7322175732217573,
 'Swaziland': 0.0502092050209205,
 'Sweden': 0.4225941422594142,
 'Switzerland': 0.694560669456067,
 'Syrian Arab Republic': 0.28870292887029286,
 'Taiwan': 0.7740585774058577,
 'Tajikistan': 0.41841004184100417,
 'Tanzania, United Republic of': 0.7071129707112971,
 'Thailand': 0.6234309623430963,
 'Timor-Leste': 0.3723849372384937,
 'Togo': 0.3598326359832636,
 'Tokelau': 0.47280334728033474,
 'Tonga': 0.3891213389121339,
 'Trinidad and Tobago': 0.9163179916317992,
 'Tunisia': 0.7405857740585774,
 'Turkey': 0.24686192468619247,
 'Turkmenistan': 0.012552301255230125,
 'Turks and Caicos Islands': 0.4769874476987448,
 'Tuvalu': 0.5983263598326359,
 'Uganda': 0.5104602510460251,
 'Ukraine': 0.3807531380753138,
 'United Arab Emirates': 0.20502092050209206,
 'United Kingdom': 0.9665271966527197,
 'United States': 0.41422594142259417,
 'United States Minor Outlying Islands': 0.08786610878661087,
 'Uruguay': 0.7196652719665272,
 'Uzbekistan': 0.7364016736401674,
 'Vanuatu': 0.30962343096234307,
 'Venezuela': 0.8326359832635983,
 'Viet Nam': 0.26778242677824265,
 'Virgin Islands, British': 0.11715481171548117,
 'Virgin Islands, U.s.': 0.49372384937238495,
 'Wallis and Futuna': 0.17573221757322174,
 'Western Sahara': 0.39748953974895396,
 'Yemen': 0.18410041841004185,
 'Zambia': 0.8535564853556485,
 'Zimbabwe': 0.8661087866108786}

In [85]:
# fix the outlier data by replacing negatives with 1 and > 100 with 100
median_age = np.median(users.age)
max_age = np.max(users.age)
min_age = np.min(users[users.age > 0].age)
median_age, max_age, min_age

first = True

In [86]:
# now we change the female/male and country categories to numbers
#Turn sex data numerical
users.sex[users.sex == 'f'] = 1
users.sex[users.sex == 'm'] = 0
users.sex[(users.sex != 0) & (users.sex != 1)] = 0.5

#impute missing age with mean age
users.age[users.age > 101] = 100.0
users.age[users.age < 0] = 1.0 
users.age[pd.isnull(users.age)] = float(np.median(users.age)) 
users.age = users.age / float(max_age)

if first:
    users.country = users.apply(lambda r: country_label[r['country']], axis=1)
    first = False

In [87]:
binary = False
unique_terms = set()
for artist, terms in artist_terms.iteritems():
    for (term, freq) in terms:
        unique_terms.add(term)
        
# calcate the number of unique terms + 1 for hotness vector
artist_nfeats = len(unique_terms) + 1

# map unique artist terms to a row in an artist vector
term_to_index = {term : i for (i, term) in enumerate(unique_terms)}

artist_data = [[0 for _ in xrange(artist_nfeats + 1)] for _ in xrange(len(artist_terms))]
k = 0
for (artist, terms) in artist_terms.iteritems():
    artist_data[k][0] = artist
    for (term, freq) in terms:
        j = term_to_index[term] + 1
        artist_data[k][j] = freq if not binary else 1
    
    try:
        artist_data[k][artist_nfeats] = artist_hotness[artist]
    except KeyError:
        print "Could not find hotness for artist {}.".format(artist)
        artist_data[k][artist_nfeats] = 0
    k+=1
    
artist_data = pd.DataFrame(artist_data, columns = ['name'] + list(unique_terms) + ['hotness'])


Could not find hotness for artist Freezepop.
Could not find hotness for artist Phish.

In [88]:
# a function that given an artist id will construct the respective feature vector
median_artist = artist_data.median(axis=0)
len(median_artist)


Out[88]:
809

In [89]:
# this is fast
def get_artist_feature(row):
    max_feats = 128
    artist_name = row['name']
    features = np.zeros(artist_nfeats)
    # set feature hotness
    try:
        features[artist_nfeats - 1] = artist_hotness[artist_name]
    except KeyError:
        print "Could not find hotness for {}.".format(artist_name)
        features[artist_nfeats - 1] = 0
    try:
        for (term, freq) in artist_terms[artist_name]:
            try:
                index = term_to_index[term]
            except IndexError:
                print "Unable to obtain row index for feature {}.".format(term) 
            try:
                features[index] = freq if not binary else 1
            except IndexError:
                print "Row index {} out of range for list of length {}".format(index,len(features))
    except KeyError:
        # we have no data on this artist, so his information is the median for the terms
        # TODO: Can we just do this? Do we need to deepcopys
        features = median_artist
        print "Unable to find terms for {}.".format(artist_name)

    if features is None:
        print "BADDDDD!"
    return np.array(features[:max_feats])

def get_user_feature(row):
    user_nfeats = 3
    return np.array([row['country'], row['age'], row['sex']])

In [90]:
artist_features = {row['name'] : get_artist_feature(row) for i,row in artists.iterrows()}
user_features = {row['user'] : get_user_feature(row) for i, row in users.iterrows()}


Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for Freezepop.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for Various Artists.
Unable to find terms for Various Artists.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for !!!.
Unable to find terms for !!!.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for Phish.
Could not find hotness for nan.
Unable to find terms for nan.

In [39]:
dump_data({'artist_features_smaller': artist_features, 'user_features_smaller' : user_features})

In [91]:
def features(row):
    artist_name = artist_to_name[row['artist']] 
    user_name = row['user']
    return np.concatenate((artist_features[artist_name], user_features[user_name]))

In [92]:
# we want to subset the training data
subset_num = len(train)/2
train_small = train[:subset_num]
len(train_small)


Out[92]:
2077402

In [93]:
def extract_data(data, test = False):
    l = []
    for i,row in data.iterrows():
        if i % 50000 == 0:
            print "Processed {} items".format(i)
        l.append(features(row))

    Y = np.array(train_small.plays) if not test else None
    return np.vstack(l),Y

In [94]:
def model(X,Y,trees = 16):
    m = rf(n_estimators = trees)
    print "Fitting model"
    m.fit(X,Y)
    print "Done fit."
    
    return m

In [46]:
X,Y = extract_data(train_small)


Processed 0 items
Processed 50000 items
Processed 100000 items
Processed 150000 items
Processed 200000 items
Processed 250000 items
Processed 300000 items
Processed 350000 items
Processed 400000 items
Processed 450000 items
Processed 500000 items
Processed 550000 items
Processed 600000 items
Processed 650000 items
Processed 700000 items
Processed 750000 items
Processed 800000 items
Processed 850000 items
Processed 900000 items
Processed 950000 items
Processed 1000000 items
Processed 1050000 items
Processed 1100000 items
Processed 1150000 items
Processed 1200000 items
Processed 1250000 items
Processed 1300000 items
Processed 1350000 items
Processed 1400000 items
Processed 1450000 items
Processed 1500000 items
Processed 1550000 items
Processed 1600000 items
Processed 1650000 items
Processed 1700000 items
Processed 1750000 items
Processed 1800000 items
Processed 1850000 items
Processed 1900000 items
Processed 1950000 items
Processed 2000000 items
Processed 2050000 items

In [47]:
rfmodel = model(X,Y)


Fitting model
Done fit.

In [166]:
def error(p,e):
    return np.sum(np.absolute(p - e)) / len(p)
error(rfmodel.predict(T), Ye)


Out[166]:
291.25151081095402

In [52]:
testdata = pd.read_csv(testfile)
def make_predictions(chunks=50000):
    '''
    Makes predictions online using 'chunks' of data. Returns z, a vector 
    of (id, prediction)
    '''
    results = []
    for i in xrange(0,len(testdata), chunks):
        subset = testdata[i:i + chunks]
        YTest, _ = extract_data(subset, test=True)
        pred = rfmodel.predict(YTest)
        
        results += zip(subset.Id.tolist(), pred.tolist())
        del(YTest)
        
    return results

results = make_predictions()


Processed 0 items
Processed 50000 items
Processed 100000 items
Processed 150000 items
Processed 200000 items
Processed 250000 items
Processed 300000 items
Processed 350000 items
Processed 400000 items
Processed 450000 items
Processed 500000 items
Processed 550000 items
Processed 600000 items
Processed 650000 items
Processed 700000 items
Processed 750000 items
Processed 800000 items
Processed 850000 items
Processed 900000 items
Processed 950000 items
Processed 1000000 items
Processed 1050000 items
Processed 1100000 items
Processed 1150000 items
Processed 1200000 items
Processed 1250000 items
Processed 1300000 items
Processed 1350000 items
Processed 1400000 items
Processed 1450000 items
Processed 1500000 items
Processed 1550000 items
Processed 1600000 items
Processed 1650000 items
Processed 1700000 items
Processed 1750000 items
Processed 1800000 items
Processed 1850000 items
Processed 1900000 items
Processed 1950000 items
Processed 2000000 items
Processed 2050000 items
Processed 2100000 items
Processed 2150000 items
Processed 2200000 items
Processed 2250000 items
Processed 2300000 items
Processed 2350000 items
Processed 2400000 items
Processed 2450000 items
Processed 2500000 items
Processed 2550000 items
Processed 2600000 items
Processed 2650000 items
Processed 2700000 items
Processed 2750000 items
Processed 2800000 items
Processed 2850000 items
Processed 2900000 items
Processed 2950000 items
Processed 3000000 items
Processed 3050000 items
Processed 3100000 items
Processed 3150000 items
Processed 3200000 items
Processed 3250000 items
Processed 3300000 items
Processed 3350000 items
Processed 3400000 items
Processed 3450000 items
Processed 3500000 items
Processed 3550000 items
Processed 3600000 items
Processed 3650000 items
Processed 3700000 items
Processed 3750000 items
Processed 3800000 items
Processed 3850000 items
Processed 3900000 items
Processed 3950000 items
Processed 4000000 items
Processed 4050000 items
Processed 4100000 items
Processed 4150000 items

In [56]:
plays = [p for _,p in results]

In [95]:
np.mean(train.plays)


Out[95]:
253.01679621950879

In [96]:
np.mean(plays)


Out[96]:
262.24765742806989

In [55]:
# sort results by id
results.sort(key=lambda (i,_) : i)
results


Out[55]:
[(1, 450.65469374375624),
 (2, 218.51187139958424),
 (3, 214.3125),
 (4, 233.71428571428572),
 (5, 226.478125),
 (6, 379.5801624497794),
 (7, 1354.2541666666666),
 (8, 332.2497023809524),
 (9, 295.8107142857143),
 (10, 191.6429076953748),
 (11, 101.86875),
 (12, 29.28125),
 (13, 78.52895108363859),
 (14, 244.074897589138),
 (15, 230.734375),
 (16, 517.4375),
 (17, 328.94003322370656),
 (18, 376.6875),
 (19, 266.90860764699926),
 (20, 371.9447916666666),
 (21, 319.00625),
 (22, 1137.9375),
 (23, 256.98658588493606),
 (24, 204.05062210522854),
 (25, 239.22837211959316),
 (26, 437.78847853535353),
 (27, 67.10208333333333),
 (28, 105.94191631958003),
 (29, 142.4794034090909),
 (30, 482.25),
 (31, 250.5176282051282),
 (32, 199.60131520021542),
 (33, 757.8645833333334),
 (34, 362.78731737012987),
 (35, 111.45833333333333),
 (36, 254.83410213894703),
 (37, 474.46666666666664),
 (38, 350.710441042399),
 (39, 371.8651786859521),
 (40, 62.06047077922078),
 (41, 100.6041756854257),
 (42, 57.0),
 (43, 311.58333333333337),
 (44, 258.95369803807307),
 (45, 731.7916666666667),
 (46, 256.91249999999997),
 (47, 228.03624164696444),
 (48, 58.3125),
 (49, 97.8625),
 (50, 50.4375),
 (51, 159.60833333333335),
 (52, 258.1631695785138),
 (53, 142.83019029581527),
 (54, 726.5),
 (55, 293.45833333333337),
 (56, 42.60691964285714),
 (57, 329.26822916666663),
 (58, 263.3178584258093),
 (59, 108.47642250519088),
 (60, 583.8104166666667),
 (61, 138.625),
 (62, 283.8646241434037),
 (63, 138.75),
 (64, 217.26041666666666),
 (65, 458.1625),
 (66, 342.1485119047619),
 (67, 190.10520833333334),
 (68, 468.6875),
 (69, 117.1875),
 (70, 232.36057190841922),
 (71, 159.2188244047619),
 (72, 124.83328577691086),
 (73, 214.7734375),
 (74, 313.65023310023315),
 (75, 367.16826388677373),
 (76, 397.23741319444446),
 (77, 127.27994791666667),
 (78, 636.25),
 (79, 374.88541666666663),
 (80, 7066.9375),
 (81, 1276.34375),
 (82, 96.25104166666668),
 (83, 234.2875),
 (84, 52.97916666666667),
 (85, 138.83333333333334),
 (86, 69.99375),
 (87, 136.39375),
 (88, 262.7752976190476),
 (89, 400.9791666666667),
 (90, 162.7540922619048),
 (91, 115.09791666666666),
 (92, 167.0625),
 (93, 175.60267857142858),
 (94, 277.12958829365084),
 (95, 87.71659226190476),
 (96, 1100.0473823051948),
 (97, 143.45982142857142),
 (98, 130.19791666666669),
 (99, 144.2835001803752),
 (100, 333.54263392857143),
 (101, 174.27580855487102),
 (102, 250.76835317460316),
 (103, 197.0),
 (104, 218.84692314425766),
 (105, 127.4131718975469),
 (106, 297.98589250423487),
 (107, 938.0277534522688),
 (108, 118.65625),
 (109, 311.8923611111111),
 (110, 68.34138566213386),
 (111, 140.81150202734793),
 (112, 370.02961309523806),
 (113, 143.4924107142857),
 (114, 463.8909069548872),
 (115, 304.0747271825397),
 (116, 251.79681488773736),
 (117, 629.2165378024753),
 (118, 403.8074404761905),
 (119, 154.9375),
 (120, 262.4375),
 (121, 144.6969768669545),
 (122, 133.9280175553613),
 (123, 229.43263888888887),
 (124, 45.202529761904756),
 (125, 159.89263392857143),
 (126, 180.91145833333334),
 (127, 355.62618371212125),
 (128, 85.19158497064156),
 (129, 349.6375),
 (130, 108.31431878306879),
 (131, 100.9041423853924),
 (132, 592.9151062117955),
 (133, 478.4168244949495),
 (134, 220.2993727951407),
 (135, 81.3125),
 (136, 193.91666666666666),
 (137, 83.4375),
 (138, 339.1730233710785),
 (139, 1707.4089673913043),
 (140, 204.3125),
 (141, 102.35074404761903),
 (142, 159.97696761250214),
 (143, 137.97916666666669),
 (144, 464.4011906008389),
 (145, 274.77796378968253),
 (146, 126.36882856726606),
 (147, 201.9583774906724),
 (148, 226.3125),
 (149, 71.15473710317461),
 (150, 284.26875),
 (151, 127.06634224939889),
 (152, 117.875),
 (153, 140.73276619392766),
 (154, 214.93131233272493),
 (155, 113.24657088094871),
 (156, 230.375),
 (157, 93.61789772727272),
 (158, 856.590302579365),
 (159, 102.76736111111111),
 (160, 82.40625),
 (161, 99.16354166666666),
 (162, 206.38316807783715),
 (163, 127.39583333333333),
 (164, 95.1125),
 (165, 116.81377823565322),
 (166, 444.9113343253968),
 (167, 277.73333333333335),
 (168, 204.40987787290385),
 (169, 241.36183035714288),
 (170, 185.49743018768513),
 (171, 31.089186507936507),
 (172, 132.01041666666666),
 (173, 287.71934523809523),
 (174, 185.10416666666669),
 (175, 320.84375),
 (176, 251.94342117903867),
 (177, 701.5456755050507),
 (178, 311.88541666666663),
 (179, 153.0625),
 (180, 267.25),
 (181, 59.50290925883757),
 (182, 85.44111487470863),
 (183, 600.742860769745),
 (184, 178.29114583333333),
 (185, 40.890625),
 (186, 295.8589285714285),
 (187, 333.565625),
 (188, 337.85729166666664),
 (189, 195.11295009814748),
 (190, 89.66761363636364),
 (191, 138.9375),
 (192, 175.43035714285713),
 (193, 260.85625),
 (194, 276.4126497269996),
 (195, 131.28854166666667),
 (196, 349.12188305170895),
 (197, 361.328125),
 (198, 151.84375),
 (199, 172.58333333333331),
 (200, 358.503125),
 (201, 156.74062500000002),
 (202, 254.16584821428575),
 (203, 92.28125),
 (204, 397.00001896292696),
 (205, 140.16542207792207),
 (206, 151.50729166666667),
 (207, 394.059071684894),
 (208, 404.48541666666665),
 (209, 282.83908942364826),
 (210, 110.11714240187655),
 (211, 644.3415096221078),
 (212, 141.58333333333334),
 (213, 725.9762152777778),
 (214, 89.775),
 (215, 188.38125),
 (216, 279.7432291666667),
 (217, 235.02083333333331),
 (218, 20.322123015873014),
 (219, 318.2916666666667),
 (220, 308.6553952991453),
 (221, 431.4084580350205),
 (222, 130.34598214285714),
 (223, 185.38750000000002),
 (224, 246.74479166666666),
 (225, 693.6766729797979),
 (226, 505.4583333333333),
 (227, 949.7000909391534),
 (228, 253.0109944471843),
 (229, 270.5032979305913),
 (230, 72.78420106617669),
 (231, 206.87287681598997),
 (232, 110.04512315245745),
 (233, 300.26383928571425),
 (234, 154.875),
 (235, 115.62365499084248),
 (236, 252.8025162337662),
 (237, 207.946875),
 (238, 75.8125),
 (239, 78.75520833333334),
 (240, 254.81026785714286),
 (241, 191.81654462587846),
 (242, 453.52083333333337),
 (243, 231.125),
 (244, 243.76098873928794),
 (245, 162.19375),
 (246, 285.3125),
 (247, 88.75),
 (248, 120.0),
 (249, 103.6875),
 (250, 70.23958333333333),
 (251, 477.27285449027636),
 (252, 185.54511105724342),
 (253, 511.34574636534774),
 (254, 249.89583333333331),
 (255, 442.20734126984127),
 (256, 307.25),
 (257, 88.71335074239488),
 (258, 255.91357571057878),
 (259, 125.44895833333332),
 (260, 195.7538519458604),
 (261, 272.7145526960784),
 (262, 456.31193181818185),
 (263, 181.61979166666666),
 (264, 90.02891042780749),
 (265, 182.9375),
 (266, 303.4572916666667),
 (267, 430.8072939781028),
 (268, 230.41666666666666),
 (269, 60.6),
 (270, 71.68303571428571),
 (271, 218.05208333333334),
 (272, 112.403125),
 (273, 504.375),
 (274, 373.3333333333333),
 (275, 219.26041666666666),
 (276, 189.58993047188582),
 (277, 58.94955357142856),
 (278, 222.86979166666669),
 (279, 211.46666666666667),
 (280, 211.57291666666666),
 (281, 241.51041666666666),
 (282, 80.90833333333333),
 (283, 131.10416666666669),
 (284, 227.36344246031751),
 (285, 261.1625),
 (286, 86.90625),
 (287, 230.25086354617605),
 (288, 264.52208644625983),
 (289, 99.625),
 (290, 135.6875),
 (291, 528.6024508477633),
 (292, 352.21875),
 (293, 43.908730158730165),
 (294, 907.2784840611928),
 (295, 178.14258952852705),
 (296, 355.9835027965227),
 (297, 262.4943654303487),
 (298, 216.00709134615383),
 (299, 178.11484648616022),
 (300, 151.91079858379229),
 (301, 388.80453869047625),
 (302, 713.796875),
 (303, 324.04017857142856),
 (304, 114.64702982507258),
 (305, 47.04873511904762),
 (306, 116.96112351190476),
 (307, 342.10484782937505),
 (308, 108.83802083333332),
 (309, 257.1097379689132),
 (310, 60.26979166666667),
 (311, 120.93229166666667),
 (312, 345.48958333333337),
 (313, 174.03125),
 (314, 268.8260416666667),
 (315, 331.90625000000006),
 (316, 211.5625),
 (317, 182.97956349206348),
 (318, 158.875),
 (319, 354.99345238095236),
 (320, 571.908161976912),
 (321, 57.46875),
 (322, 99.09375),
 (323, 154.5),
 (324, 182.92241925367225),
 (325, 116.13541666666667),
 (326, 222.60927755034382),
 (327, 136.98942064879566),
 (328, 332.1328125),
 (329, 157.08020833333336),
 (330, 191.16666666666666),
 (331, 79.3125),
 (332, 275.60255809855994),
 (333, 200.58333333333334),
 (334, 110.678125),
 (335, 93.9983630952381),
 (336, 36.80416666666666),
 (337, 618.5),
 (338, 126.99895012719327),
 (339, 97.0),
 (340, 531.9375),
 (341, 486.5625),
 (342, 192.84791666666666),
 (343, 77.3125),
 (344, 605.0871527141393),
 (345, 449.60550595238095),
 (346, 109.23557692307692),
 (347, 216.4),
 (348, 845.38125),
 (349, 436.64176353130307),
 (350, 161.62663690476188),
 (351, 869.0328125),
 (352, 269.51801375546535),
 (353, 126.1875),
 (354, 338.625),
 (355, 526.0625),
 (356, 231.14145183376993),
 (357, 470.8715277777777),
 (358, 177.36830357142858),
 (359, 245.56080115533737),
 (360, 129.21868686868686),
 (361, 234.80357142857144),
 (362, 217.5),
 (363, 281.6883826087705),
 (364, 136.71389041793137),
 (365, 198.8269775080849),
 (366, 315.08832105394606),
 (367, 201.03125),
 (368, 197.1960411846925),
 (369, 189.59730113636363),
 (370, 76.21145833333333),
 (371, 297.33229166666666),
 (372, 149.4035082972583),
 (373, 123.66338383838384),
 (374, 71867.5),
 (375, 390.3552236937106),
 (376, 481.16250000000014),
 (377, 117.10291687479187),
 (378, 237.81629464285712),
 (379, 140.64329212454214),
 (380, 178.15625),
 (381, 192.625),
 (382, 235.40147020145207),
 (383, 343.5969391719392),
 (384, 155.63541666666666),
 (385, 285.0435763888889),
 (386, 191.52083333333331),
 (387, 419.21726179779733),
 (388, 103.0625),
 (389, 543.3528251262627),
 (390, 154.13541666666666),
 (391, 191.9635912698413),
 (392, 256.40756448412696),
 (393, 228.31879283910533),
 (394, 296.51208062770564),
 (395, 138.47799873737372),
 (396, 231.61224478646352),
 (397, 334.70833333333337),
 (398, 291.625),
 (399, 368.94046335608834),
 (400, 183.88958333333335),
 (401, 114.1907738095238),
 (402, 513.63125),
 (403, 92.61420454545454),
 (404, 272.65676181457434),
 (405, 224.809375),
 (406, 238.59094332327425),
 (407, 139.5),
 (408, 178.5),
 (409, 334.4548746392497),
 (410, 1242.988872540841),
 (411, 120.5875),
 (412, 248.9375),
 (413, 245.92187349742602),
 (414, 168.31799693535177),
 (415, 291.7648226773227),
 (416, 68.89583333333333),
 (417, 136.25),
 (418, 582.28125),
 (419, 123.29583333333335),
 (420, 326.74851190476187),
 (421, 228.62340796789093),
 (422, 301.67948615399996),
 (423, 44.591354218697965),
 (424, 197.73958333333334),
 (425, 274.0324103526067),
 (426, 284.375),
 (427, 132.50833333333333),
 (428, 215.640625),
 (429, 138.14583333333334),
 (430, 137.0625),
 (431, 414.66666666666663),
 (432, 260.2240327380953),
 (433, 233.17640472793676),
 (434, 177.93616071428573),
 (435, 210.30208333333331),
 (436, 103.3480921855922),
 (437, 297.77083333333337),
 (438, 204.01351686507937),
 (439, 446.60520833333334),
 (440, 133.38482142857143),
 (441, 305.96841856060604),
 (442, 1156.0625),
 (443, 269.39650733497564),
 (444, 164.84895833333334),
 (445, 139.263713023088),
 (446, 357.4198863636364),
 (447, 151.3659864898498),
 (448, 222.89112522708646),
 (449, 142.54743917109903),
 (450, 81.21875),
 (451, 54.3125),
 (452, 69.56666666666666),
 (453, 229.30882936507936),
 (454, 156.98125),
 (455, 1342.315625),
 (456, 376.9848260246698),
 (457, 28438.716145833332),
 (458, 194.5964285714286),
 (459, 33.55),
 (460, 242.0239678162932),
 (461, 107.03411172161174),
 (462, 287.2428412381776),
 (463, 131.171875),
 (464, 202.29131944444447),
 (465, 124.23340773809524),
 (466, 110.13541666666666),
 (467, 251.8677785365649),
 (468, 59.00625),
 (469, 94.0),
 (470, 126.46517857142857),
 (471, 427.9044146825397),
 (472, 202.54166666666669),
 (473, 234.40055099618513),
 (474, 268.6311543752214),
 (475, 147.75247408841156),
 (476, 344.8785342261905),
 (477, 116.34583333333333),
 (478, 241.57787923881676),
 (479, 369.17245901356654),
 (480, 255.12321428571425),
 (481, 78.1392361111111),
 (482, 176.12574404761904),
 (483, 179.7247023809524),
 (484, 401.779063985431),
 (485, 159.38255269036517),
 (486, 467.8018008033633),
 (487, 367.1171875),
 (488, 992.7291666666667),
 (489, 360.85116316984073),
 (490, 89.34496753246752),
 (491, 275.7698657884513),
 (492, 277.42545996609743),
 (493, 50.01634372571871),
 (494, 164.3439133190245),
 (495, 303.29166666666663),
 (496, 314.14427083333334),
 (497, 57.985119047619044),
 (498, 163.20833333333331),
 (499, 331.59375),
 (500, 296.71077553730584),
 (501, 460.54520782663934),
 (502, 403.575359107804),
 (503, 159.7854280001844),
 (504, 447.0885416666667),
 (505, 383.1406837406015),
 (506, 55.7125),
 (507, 444.75408155924316),
 (508, 62.622916666666676),
 (509, 175.00766369047616),
 (510, 250.41145833333334),
 (511, 135.8125),
 (512, 92.20333905421707),
 (513, 29.227083333333333),
 (514, 132.51767701763106),
 (515, 169.6586606612723),
 (516, 232.609375),
 (517, 44.28005952380952),
 (518, 110.99910714285714),
 (519, 267.28652458142346),
 (520, 171.84375),
 (521, 268.1),
 (522, 115.67901785714285),
 (523, 386.8997173267087),
 (524, 213.1),
 (525, 306.18159225257597),
 (526, 129.0),
 (527, 609.8916666666668),
 (528, 323.365625),
 (529, 295.82498511595736),
 (530, 111.59479166666667),
 (531, 220.25),
 (532, 379.5266672761414),
 (533, 182.92241925367225),
 (534, 80.109464000411),
 (535, 273.0496188719329),
 (536, 273.94485294117646),
 (537, 506.9479166666667),
 (538, 93.00066964285715),
 (539, 348.459375),
 (540, 171.4073373544432),
 (541, 219.296875),
 (542, 270.453125),
 (543, 393.4270833333333),
 (544, 158.91875),
 (545, 261.97768811050065),
 (546, 289.93520855383116),
 (547, 184.92101726398602),
 (548, 321.53988095238094),
 (549, 383.37916666666666),
 (550, 85.578125),
 (551, 98.75570436507937),
 (552, 281.42708333333337),
 (553, 135.125),
 (554, 231.55444555444555),
 (555, 162.8125),
 (556, 85.4296875),
 (557, 78.44415876977554),
 (558, 740.0822916666667),
 (559, 168.80409727794452),
 (560, 396.4049107142857),
 (561, 38.78125),
 (562, 299.2461251670371),
 (563, 427.2193903926869),
 (564, 102.95833333333334),
 (565, 129.22148578906697),
 (566, 158.675),
 (567, 127.6875),
 (568, 95.5625),
 (569, 227.40625),
 (570, 274.52261904761906),
 (571, 101.26458333333333),
 (572, 246.7125),
 (573, 413.16238313328927),
 (574, 453.44969336219333),
 (575, 133.56046626984127),
 (576, 420.73437500000006),
 (577, 201.6208333333333),
 (578, 492.8609375),
 (579, 226.453125),
 (580, 96.67403768106892),
 (581, 814.6067989565473),
 (582, 184.2902892246642),
 (583, 272.6996960048358),
 (584, 272.47324246933624),
 (585, 266.90625),
 (586, 339.30899556448753),
 (587, 205.91666666666669),
 (588, 92.98601190476191),
 (589, 125.06170574673864),
 (590, 340.646054753146),
 (591, 515.2321428571429),
 (592, 63.504315476190484),
 (593, 1166.0729166666665),
 (594, 95.6875),
 (595, 169.6586606612723),
 (596, 160.7531433894945),
 (597, 420.0859651319355),
 (598, 158.44631112688813),
 (599, 156.47946567831676),
 (600, 244.69791666666666),
 (601, 103.2875),
 (602, 429.7244724893162),
 (603, 225.7373559844392),
 (604, 623.078125),
 (605, 189.498920140532),
 (606, 267.61806746773425),
 (607, 327.86458333333337),
 (608, 109.78125),
 (609, 235.16349557724945),
 (610, 39.64583333333333),
 (611, 374.41659218506777),
 (612, 160.75),
 (613, 323.8504554473304),
 (614, 398.0760416666667),
 (615, 97.42408685064935),
 (616, 197.3125),
 (617, 180.3296130952381),
 (618, 184.92708333333334),
 (619, 190.83092502037954),
 (620, 257.3415935924652),
 (621, 282.60357142857146),
 (622, 253.73143106893104),
 (623, 174.11513157894737),
 (624, 139.22395833333334),
 (625, 182.90233607951546),
 (626, 107.71461789778321),
 (627, 123.09298149766902),
 (628, 338.0401364212413),
 (629, 55.04856061184186),
 (630, 126.99895012719327),
 (631, 336.1969656021083),
 (632, 249.82021223342574),
 (633, 730.7401785714285),
 (634, 240.234375),
 (635, 130.359375),
 (636, 237.11638926841135),
 (637, 278.1400429410201),
 (638, 59.24097222222221),
 (639, 131.18937756322626),
 (640, 210.50130095598846),
 (641, 113.8125),
 (642, 238.45782306660098),
 (643, 182.01912202380953),
 (644, 160.44752435064936),
 (645, 448.9375),
 (646, 372.0070616883117),
 (647, 157.63980654761906),
 (648, 152.09343614989197),
 (649, 270.7846111931632),
 (650, 162.90554570199828),
 (651, 104.11160714285714),
 (652, 282.00344216059375),
 (653, 93.6875),
 (654, 187.86086309523807),
 (655, 230.03804922569978),
 (656, 238.07387577797235),
 (657, 156.84093790667742),
 (658, 179.01666666666665),
 (659, 270.0967842920968),
 (660, 188.59375),
 (661, 195.22291666666666),
 (662, 268.3147742105775),
 (663, 304.51540707337784),
 (664, 207.0336692638502),
 (665, 89.56875),
 (666, 136.3125),
 (667, 160.6035714285714),
 (668, 348.4393644881198),
 (669, 270.20281636777037),
 (670, 316.0833333333333),
 (671, 267.390625),
 (672, 116.54039994339804),
 (673, 96.37090773809523),
 (674, 245.06770833333334),
 (675, 60.64895833333333),
 (676, 206.60416666666666),
 (677, 113.55833333333334),
 (678, 326.3703137445842),
 (679, 74.31770833333334),
 (680, 89.6875),
 (681, 203.29791666666665),
 (682, 94.5309831661394),
 (683, 126.62152148292404),
 (684, 151.65902777777777),
 (685, 630.784375),
 (686, 269.9328256443173),
 (687, 279.18944699532454),
 (688, 302.11055627356126),
 (689, 355.1708333333333),
 (690, 186.63541666666669),
 (691, 164.70833333333331),
 (692, 287.42149860606133),
 (693, 113.33507858201392),
 (694, 229.35196844617803),
 (695, 159.8421762265512),
 (696, 266.1391119172992),
 (697, 238.7806379119408),
 (698, 77.92819361305276),
 (699, 204.1125),
 (700, 229.7679638806889),
 (701, 226.37916666666663),
 (702, 121.54166666666666),
 (703, 715.859375),
 (704, 185.20089285714286),
 (705, 145.7455639846265),
 (706, 166.1476721552549),
 (707, 228.65821636402327),
 (708, 464.97474731933653),
 (709, 363.8772321428571),
 (710, 307.70907353286646),
 (711, 401.90625),
 (712, 213.0078125),
 (713, 455.453017038413),
 (714, 255.4383270916138),
 (715, 114.0733630952381),
 (716, 130.86609172077922),
 (717, 469.936507936508),
 (718, 146.5),
 (719, 109.48541666666667),
 (720, 147.91827065295814),
 (721, 246.63816964285715),
 (722, 157.30833333333334),
 (723, 160.60555444388626),
 (724, 279.89006737757154),
 (725, 134.9375),
 (726, 120.27708333333334),
 (727, 77.39185606060607),
 (728, 145.07291666666669),
 (729, 159.22215197996448),
 (730, 197.1916666666667),
 (731, 65.24305555555556),
 (732, 429.8002976190476),
 (733, 150.828125),
 (734, 99.15625),
 (735, 152.12738095238097),
 (736, 256.9125),
 (737, 290.6688988095238),
 (738, 189.1875),
 (739, 155.4),
 (740, 285.23269074675324),
 (741, 135.02886904761905),
 (742, 186.5530753968254),
 (743, 157.86249999999998),
 (744, 560.9557291666666),
 (745, 171.33333333333334),
 (746, 208.67634246881084),
 (747, 110.11691468253969),
 (748, 100.96875),
 (749, 811.9599702380951),
 (750, 131.72916666666669),
 (751, 148.96875),
 (752, 165.01041666666669),
 (753, 61.625),
 (754, 216.34358766233765),
 (755, 323.55416449869574),
 (756, 211.8397435897436),
 (757, 381.08060897435894),
 (758, 227.18125),
 (759, 167.89576465770585),
 (760, 161.99531249999998),
 (761, 386.1212121212121),
 (762, 176.0129023488315),
 (763, 100.0),
 (764, 583.716435172528),
 (765, 384.5009825400762),
 (766, 130.34065075286054),
 (767, 525.3313988095238),
 (768, 106.11398809523808),
 (769, 244.5822953241611),
 (770, 109.4638418700967),
 (771, 198.08908504689754),
 (772, 443.3625),
 (773, 501.4375),
 (774, 43.86919642857143),
 (775, 400.6104166666667),
 (776, 183.04017857142856),
 (777, 276.4982917278977),
 (778, 147.4375),
 (779, 127.54375),
 (780, 154.81220238095239),
 (781, 360.2291666666667),
 (782, 137.875),
 (783, 248.68749819970557),
 (784, 229.34065087872654),
 (785, 102.696875),
 (786, 244.8336086728888),
 (787, 218.20699418596692),
 (788, 224.0),
 (789, 398.709339107286),
 (790, 170.83333333333334),
 (791, 200.18936011904762),
 (792, 751.4575033990911),
 (793, 162.90666590354087),
 (794, 741.2840627526465),
 (795, 513.6051709121678),
 (796, 205.85571441701344),
 (797, 211.53125),
 (798, 239.08137386005808),
 (799, 115.31114938830626),
 (800, 160.17407116554756),
 (801, 285.16699340587746),
 (802, 581.9228469567679),
 (803, 196.23871527777774),
 (804, 41.364583333333336),
 (805, 158.20355899034277),
 (806, 210.8979864728254),
 (807, 66.898463606934),
 (808, 153.48333333333335),
 (809, 186.7067708333333),
 (810, 189.2),
 (811, 291.78587835775335),
 (812, 186.24638973279045),
 (813, 229.01941296362733),
 (814, 295.90104166666663),
 (815, 168.95875),
 (816, 144.79621027961565),
 (817, 131.640625),
 (818, 1313.7375),
 (819, 278.9524673821549),
 (820, 155.025),
 (821, 275.7348958333334),
 (822, 225.2434523809524),
 (823, 164.45025112364863),
 (824, 638.0322916666667),
 (825, 106.70625),
 (826, 118.83125),
 (827, 1739.125),
 (828, 95.74479166666667),
 (829, 215.19375000000002),
 (830, 904.2486449314574),
 (831, 279.09097630820975),
 (832, 359.8037259615384),
 (833, 105.97712053571428),
 (834, 65.6875),
 (835, 99.20833333333333),
 (836, 243.31748511904763),
 (837, 284.9445075757576),
 (838, 165.1363014285962),
 (839, 130.35466738405253),
 (840, 141.01875),
 (841, 110.75),
 (842, 271.26041666666663),
 (843, 117.13683035714286),
 (844, 183.171875),
 (845, 445.0545565164693),
 (846, 84.72291666666666),
 (847, 224.53993055555554),
 (848, 690.9751680859888),
 (849, 134.62627129480416),
 (850, 202.15885416666669),
 (851, 138.57876301008852),
 (852, 352.8762756502739),
 (853, 147.30152384514633),
 (854, 460.803083028083),
 (855, 191.2153300140764),
 (856, 128.10669642857144),
 (857, 150.06994047619048),
 (858, 155.875),
 (859, 343.237185341909),
 (860, 111.25),
 (861, 53.645833333333336),
 (862, 223.81085663696683),
 (863, 341.45520833333336),
 (864, 291.01666666666665),
 (865, 1106.6494791666667),
 (866, 244.5625),
 (867, 187.975),
 (868, 378.23794642857143),
 (869, 547.8617413759489),
 (870, 124.38778289059618),
 (871, 619.8858497693577),
 (872, 138.46248872655121),
 (873, 117.26041666666666),
 (874, 240.0745624889669),
 (875, 130.22291666666666),
 (876, 465.484375),
 (877, 178.25),
 (878, 99.453125),
 (879, 197.99717261904763),
 (880, 585.1875),
 (881, 1302.5684695512819),
 (882, 132.14583333333334),
 (883, 164.7158457230108),
 (884, 151.5450911935287),
 (885, 269.70428630624923),
 (886, 266.512774181674),
 (887, 290.790625),
 (888, 366.3027935606061),
 (889, 451.0433083768881),
 (890, 129.625),
 (891, 406.71875),
 (892, 226.76874999999998),
 (893, 1023.59375),
 (894, 276.6502036619635),
 (895, 430.720493433281),
 (896, 260.5357886904762),
 (897, 216.06994047619048),
 (898, 102.06145833333335),
 (899, 368.4614650974026),
 (900, 256.6798859126984),
 (901, 130.7886028235288),
 (902, 324.79653393434114),
 (903, 203.7457405512998),
 (904, 274.7228341002757),
 (905, 47.828125),
 (906, 61.55682494588744),
 (907, 154.73541666666665),
 (908, 117.4875288364788),
 (909, 67.89583333333333),
 (910, 164.140625),
 (911, 190.54218827137936),
 (912, 127.37068963001867),
 (913, 132.60297568176148),
 (914, 782.5869047619047),
 (915, 112.8125),
 (916, 166.6163648237263),
 (917, 15.114583333333334),
 (918, 89.81483134920634),
 (919, 96.875),
 (920, 126.3191287878788),
 (921, 163.8828125),
 (922, 114.74925595238095),
 (923, 174.23333333333335),
 (924, 121.87916666666666),
 (925, 137.34557956570424),
 (926, 138.48055555555555),
 (927, 209.4205887862138),
 (928, 124.43148630981476),
 (929, 746.21701300586),
 (930, 135.78125000000003),
 (931, 134.9345982142857),
 (932, 149.52083333333331),
 (933, 347.5),
 (934, 39.666666666666664),
 (935, 330.9375),
 (936, 236.04283571716223),
 (937, 41.3125),
 (938, 207.57533114594165),
 (939, 157.0182330799683),
 (940, 218.03466795958812),
 (941, 179.46875),
 (942, 184.5),
 (943, 77.72916666666667),
 (944, 121.72005208333333),
 (945, 197.6875),
 (946, 253.33333333333334),
 (947, 416.82374108942935),
 (948, 199.5173595934089),
 (949, 113.9375),
 (950, 157.5013392857143),
 (951, 160.2623511904762),
 (952, 200.73045634920632),
 (953, 84.1175273943463),
 (954, 410.84375),
 (955, 642.1875),
 (956, 200.37429331339717),
 (957, 447.15),
 (958, 36.0625),
 (959, 163.375),
 (960, 252.1244318181818),
 (961, 142.53225280642562),
 (962, 276.80579004329),
 (963, 278.79495665372934),
 (964, 242.4640984771098),
 (965, 236.09375),
 (966, 377.60470628442766),
 (967, 289.96395537209355),
 (968, 161.0967261904762),
 (969, 639.7734374999999),
 (970, 160.53958333333333),
 (971, 127.97163825757576),
 (972, 198.04310609614225),
 (973, 50.82222222222223),
 (974, 124.56406614219117),
 (975, 187.14852317821067),
 (976, 459.4761904761905),
 (977, 301.4979166666667),
 (978, 146.13321078431372),
 (979, 129.9375),
 (980, 226.40959821428572),
 (981, 264.9895833333333),
 (982, 121.97355248917748),
 (983, 237.32574266210574),
 (984, 96.49759615384616),
 (985, 45.45550595238095),
 (986, 164.59928713894467),
 (987, 453.28333333333336),
 (988, 989.8226133892248),
 (989, 194.6875),
 (990, 88.0625),
 (991, 1881.086755952381),
 (992, 365.95714285714286),
 (993, 224.51533473437092),
 (994, 211.31699543686673),
 (995, 200.06321025328836),
 (996, 80.30744047619048),
 (997, 244.93027900515807),
 (998, 403.7749040835723),
 (999, 203.64940025252525),
 (1000, 259.0859375),
 ...]

In [69]:
# write out predictions
import csv
soln_file = "results.csv"
with open(soln_file, 'w') as soln_fh:
    soln_csv = csv.writer(soln_fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    soln_csv.writerow(['Id','plays'])
    for i, p in results:
        soln_csv.writerow([i, p])

In [98]:
# construct the similarity matrix for the artistas
artist_matrix = np.vstack([get_artist_feature(row) for _,row in artists.iterrows()])


Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for Freezepop.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for Various Artists.
Unable to find terms for Various Artists.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for !!!.
Unable to find terms for !!!.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for Phish.
Could not find hotness for nan.
Unable to find terms for nan.

In [99]:
def sim(X,Y):
    '''
    Given two matrices where each row represents an element, calculates the
    distances between all pairs of elements i,j and returns a matrix D where
    D_i,j is the distances between the pairs
    '''
    dist =  X.dot(X.T).diagonal() - 2*X.dot(Y.T) + Y.dot(Y.T).diagonal()
    maxdist = np.max(dist)
    
    return dist / maxdist

In [100]:
sim_matrix = sim(artist_matrix, artist_matrix)

In [88]:
dump_data({'artist_similarity':sim_matrix})

In [107]:
# let's make a nice covariance matrix of the feature set for the artists
%matplotlib
import seaborn as sns

def make_corr_plot(d, title="plot"):
    f, ax = plt.subplots(figsize=(9, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.corrplot(d, annot=False, sig_stars=False,
                 diag_names=False, cmap=cmap, ax=ax)
    f.tight_layout()
    f.savefig(title)


Using matplotlib backend: agg

In [108]:
make_corr_plot(artist_matrix)

In [111]:
short_artist_matrix = np.vstack([get_artist_feature(row)[:10] for _,row in artists.iterrows()])


Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for Freezepop.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for Various Artists.
Unable to find terms for Various Artists.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for !!!.
Unable to find terms for !!!.
Could not find hotness for nan.
Unable to find terms for nan.
Could not find hotness for Phish.
Could not find hotness for nan.
Unable to find terms for nan.

In [112]:
make_corr_plot(short_artist_matrix, "Few Features")

In [114]:
user_matrix = np.vstack([get_user_feature(row) for _,row in users.iterrows()])

In [115]:
make_corr_plot(user_matrix, "User features")

In [ ]: