In [1]:
# standard libraries
import numpy as np
import pandas as pd
# for plotting
import matplotlib.pyplot as plt
import pickle
# for interpreting data
from pyechonest import artist
# for machine learning
from sklearn.ensemble import RandomForestRegressor as rf
from scipy.sparse import coo_matrix
from pyechonest import config
config.ECHO_NEST_API_KEY='EIVX1I4WCCD7FQRFV'
# turn off to avoid displaying test output
display = False
In [2]:
qfile = "adjusted_quantiles_thing.csv"
In [3]:
qdata = pd.read_csv(qfile)
In [10]:
%matplotlib
plt.hist(qdata[qdata.plays < 3500].plays, bins=40)
plt.title("Quantile Predictions truncated at 3500")
plt.xlabel("Number of Play Counts")
plt.ylabel("Play Counts")
Out[10]:
In [12]:
bfile = "kevs_rocks.csv"
bdata = pd.read_csv(bfile)
In [13]:
plt.hist(bdata[bdata.plays < 3500].plays, bins=40)
plt.title("Normal Prediction truncated at 3500")
plt.xlabel("Number of Play Counts")
plt.ylabel("Play Counts")
Out[13]:
In [14]:
plt.title("Quantile (blue) and Normal (green) Predictions")
Out[14]:
In [16]:
f = plt.figure()
plt.hist(np.log(bdata[bdata.plays < 3500].plays), bins=40)
plt.title("Log of Normal Prediction truncated at 3500")
plt.xlabel("Number of Log Play Counts")
plt.ylabel("Log Play Counts")
Out[16]:
In [ ]: