In [1]:
# standard libraries
import numpy as np
import pandas as pd

# for plotting
import matplotlib.pyplot as plt
import pickle

# for interpreting data
from pyechonest import artist

# for machine learning
from sklearn.ensemble import RandomForestRegressor as rf
from scipy.sparse import coo_matrix

from pyechonest import config
config.ECHO_NEST_API_KEY='EIVX1I4WCCD7FQRFV'

# turn off to avoid displaying test output
display = False

In [2]:
qfile = "adjusted_quantiles_thing.csv"

In [3]:
qdata = pd.read_csv(qfile)

In [10]:
%matplotlib
plt.hist(qdata[qdata.plays < 3500].plays, bins=40)
plt.title("Quantile Predictions truncated at 3500")
plt.xlabel("Number of Play Counts")
plt.ylabel("Play Counts")


Using matplotlib backend: TkAgg
Out[10]:
<matplotlib.text.Text at 0x1f3df668>

In [12]:
bfile = "kevs_rocks.csv"
bdata = pd.read_csv(bfile)

In [13]:
plt.hist(bdata[bdata.plays < 3500].plays, bins=40)
plt.title("Normal Prediction truncated at 3500")
plt.xlabel("Number of Play Counts")
plt.ylabel("Play Counts")


Out[13]:
<matplotlib.text.Text at 0x1f3df668>

In [14]:
plt.title("Quantile (blue) and Normal (green) Predictions")


Out[14]:
<matplotlib.text.Text at 0x1f5ca7b8>

In [16]:
f = plt.figure()
plt.hist(np.log(bdata[bdata.plays < 3500].plays), bins=40)
plt.title("Log of Normal Prediction truncated at 3500")
plt.xlabel("Number of Log Play Counts")
plt.ylabel("Log Play Counts")


Out[16]:
<matplotlib.text.Text at 0x1f9a3208>

In [ ]: