notebook.community

Edit and run



In [1]:

    
# standard libraries
import numpy as np
import pandas as pd

# for plotting
import matplotlib.pyplot as plt
import pickle

# for interpreting data
from pyechonest import artist

# for machine learning
from sklearn.ensemble import RandomForestRegressor as rf
from scipy.sparse import coo_matrix

from pyechonest import config
config.ECHO_NEST_API_KEY='EIVX1I4WCCD7FQRFV'

# turn off to avoid displaying test output
display = False



In [2]:

    
qfile = "adjusted_quantiles_thing.csv"



In [3]:

    
qdata = pd.read_csv(qfile)



In [10]:

    
%matplotlib
plt.hist(qdata[qdata.plays < 3500].plays, bins=40)
plt.title("Quantile Predictions truncated at 3500")
plt.xlabel("Number of Play Counts")
plt.ylabel("Play Counts")









    



Using matplotlib backend: TkAgg






    Out[10]:





<matplotlib.text.Text at 0x1f3df668>



In [12]:

    
bfile = "kevs_rocks.csv"
bdata = pd.read_csv(bfile)



In [13]:

    
plt.hist(bdata[bdata.plays < 3500].plays, bins=40)
plt.title("Normal Prediction truncated at 3500")
plt.xlabel("Number of Play Counts")
plt.ylabel("Play Counts")









    Out[13]:





<matplotlib.text.Text at 0x1f3df668>



In [14]:

    
plt.title("Quantile (blue) and Normal (green) Predictions")









    Out[14]:





<matplotlib.text.Text at 0x1f5ca7b8>



In [16]:

    
f = plt.figure()
plt.hist(np.log(bdata[bdata.plays < 3500].plays), bins=40)
plt.title("Log of Normal Prediction truncated at 3500")
plt.xlabel("Number of Log Play Counts")
plt.ylabel("Log Play Counts")









    Out[16]:





<matplotlib.text.Text at 0x1f9a3208>



In [ ]: