In [3]:
# import requests
from __future__ import print_function # (at top of module)
import time
import sys
import os
import pandas as pd
import numpy as np
PROJ_ROOT = os.path.join(os.getcwd(), os.pardir)
%load_ext watermark
%watermark -a "Thomas Turner" -d -t -v -p numpy,pandas -g
In [4]:
# load environment variables from .env file using dotenv.
from dotenv import load_dotenv
dotenv_path = os.path.join(PROJ_ROOT, '.env')
load_dotenv(dotenv_path)
# Load the "autoreload" extension
%load_ext autoreload
# always reload modules marked with "%aimport"
%autoreload 1
# add the 'src' directory as one where we can import modules
src_dir = os.path.join(PROJ_ROOT, 'src')
sys.path.append(src_dir)
# import my methods from the source code
%aimport data.spotipy_functions
from data.spotipy_functions import get_tracks_from_playlist
from data.spotipy_functions import get_features_for_tracks
In [5]:
# This line tells the notebook to show plots inside of the notebook
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sb
In [6]:
playlists = pd.read_csv(os.path.join(PROJ_ROOT,'data','raw','spotify_playlists.csv'))
In [7]:
playlists
Out[7]:
In [10]:
data_array = []
for i in range(1):
print(i)
table = get_features_for_tracks(get_tracks_from_playlist(playlists.ix[i,2],playlists.ix[i,0]))
table['playlist_id'] = playlists.ix[i,0]
table['playlist_name'] = playlists.ix[i,1]
table['data_set'] = i
data_array.append(table)
master_dataset = pd.concat(data_array)
In [11]:
master_dataset.shape
Out[11]:
In [12]:
feature_subset = master_dataset[['playlist_name','acousticness','danceability','energy','speechiness','tempo','valence']]
sb.pairplot(feature_subset.dropna(), hue='playlist_name')
Out[12]:
In [12]:
sb.violinplot(x='data_set', y='tempo', data=feature_subset)
Out[12]:
In [14]:
tempo_vs_danceable = master_dataset[['playlist_name','danceability','tempo',]]
sb.pairplot(tempo_vs_danceable.dropna(), hue='playlist_name')
Out[14]:
In [94]:
sb.violinplot?
In [ ]: