This notebook contains all steps to import, clean, aggregate and merge the 3 datasets I queried from the MusicBrainz database.

Queries located here

The three datasets consist of: allalbums_newmeta2.csv - All recordings by the bands The Beatles & The Rolling Stones.

bs_covers.csv - All cover versions (re-recordings) of original material from both bands.

work_cover.csv - Essentially a lookup of the workids representing the original songs from each band.


In [3]:
# Import data and modules. References to Plotly may be ignored for now, though the library offers far nicer 
# visualizations over basic Pandas and Matplotlib.
# encoding: utf-8
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.cross_validation import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from textblob import TextBlob, Word
from nltk.stem.snowball import SnowballStemmer


import plotly.plotly as py
import plotly.tools as tls
tls.embed('https://plot.ly/~cufflinks/8')
import cufflinks as cf

py.sign_in('maxrose', '••••••••••')

cf.set_config_file(offline=True, world_readable=False, theme='ggplot')

plt.rcParams['figure.figsize'] = 8, 6

### Frame with all cover versions
covers = pd.read_table('data/bs_covers.csv', encoding= 'utf-8',header=0,delimiter='|')

### Frame with workid and is_cover (written by either Beatles or Stones)
is_cover = pd.read_csv("data/work_cover.csv")

### Frame with source data for all recordings by Beatle and stones
bs = pd.read_table('data/allalbums_newmeta2.csv', encoding= 'utf-8',header=0,delimiter='|')
### fix releasedate
bs.set_value(bs.releasename.str.contains("Meet The Beatles"), 'releasedate', '1964-01-20')
### format releasedate as datetime
bs['releasedate'] = pd.to_datetime(pd.Series(bs.releasedate))
#bs.head(4)



In [4]:
# Update various data mistakes - better web-scraping matches for lyrics
bs.set_value(bs.songname.str.contains('Keep'), 'songname', 'Keep Your Hands Off My Baby')
bs.set_value(bs.songname.str.contains('Baby You’re a Rich Man'), 'songname', 'Baby, You’re a Rich Man')
bs.set_value(bs.songname.str.contains("It's Only Rock 'n' Roll (but I Like It)"), 'songname', "It's Only Rock 'n Roll (But I Like It)")
bs.set_value(bs.songname.str.contains("When I’m Sixty‐Four"), 'songname', "When I'm Sixty-Four")
bs.set_value(bs.songname.str.contains("Ob‐La‐Di, Ob‐La‐Da"), 'songname', "Ob-La-Di, Ob-La-Da")
bs.set_value(bs.songname.str.contains("Sure To Fall"), 'songname', "Sure To Fall (In Love With You)")
bs.set_value(bs.songname.str.contains("Honey, Don’t!"), 'songname', "Honey, Don’t")
bs.set_value(bs.songname.str.contains("Love in Vain Blues"), 'songname', "Love in Vain")
# Replace bad unicode character to apostrophe.
bs.replace(u"\xe2", "'")


/Users/maxrose/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:4: UserWarning:

This pattern has match groups. To actually get the groups, use str.extract.

Out[4]:
artistid artist songname releasename workid recordingid release_group releasedate label_cnt country_cnt rating
0 303 The Beatles A Taste of Honey 1962 Live Recordings 295302 4732310 420949 1962-01-01 1 1 0
1 303 The Beatles Ask Me Why 1962 Live Recordings 295296 4732323 420949 1962-01-01 1 1 0
2 303 The Beatles Be-Bop-A-Lula 1962 Live Recordings 8387505 4732321 420949 1962-01-01 1 1 0
3 303 The Beatles Everybody’s Trying to Be My Baby 1962 Live Recordings 368508 4732314 420949 1962-01-01 1 1 0
4 303 The Beatles Falling in Love Again (Can’t Help It) 1962 Live Recordings 2374813 4732319 420949 1962-01-01 1 1 0
5 303 The Beatles Hallelujah, I Love Her So 1962 Live Recordings 2648186 4732322 420949 1962-01-01 1 1 0
6 303 The Beatles Hippy Hippy Shake 1962 Live Recordings 6569228 4732303 420949 1962-01-01 1 1 0
7 303 The Beatles I Remember You 1962 Live Recordings 506842 4732329 420949 1962-01-01 1 1 0
8 303 The Beatles I Saw Her Standing There 1962 Live Recordings 295291 4732300 420949 1962-01-01 1 1 0
9 303 The Beatles I Wish I Could Shimmy Like My Sister Kate 1962 Live Recordings 12516025 4732327 420949 1962-01-01 1 1 0
10 303 The Beatles I’m Gonna Sit Right Down and Cry (Over You) 1962 Live Recordings 737424 4732301 420949 1962-01-01 1 1 0
11 303 The Beatles I’m Talking About You 1962 Live Recordings 8034555 4732326 420949 1962-01-01 1 1 0
12 303 The Beatles Lend Me Your Comb 1962 Live Recordings 12523637 4732305 420949 1962-01-01 1 1 0
13 303 The Beatles Little Queenie 1962 Live Recordings 6007740 4732318 420949 1962-01-01 1 1 0
14 303 The Beatles Long Tall Sally 1962 Live Recordings 9026995 4732328 420949 1962-01-01 1 1 0
15 303 The Beatles Matchbox 1962 Live Recordings 12446929 4732325 420949 1962-01-01 1 1 0
16 303 The Beatles Red Sails in the Sunset 1962 Live Recordings 2954920 4732324 420949 1962-01-01 1 1 0
17 303 The Beatles Roll Over Beethoven 1962 Live Recordings 8322242 4732302 420949 1962-01-01 2 2 0
18 303 The Beatles Sweet Little Sixteen 1962 Live Recordings 6007733 4732304 420949 1962-01-01 1 1 0
19 303 The Beatles Till There Was You 1962 Live Recordings 286220 4732313 420949 1962-01-01 1 1 0
20 303 The Beatles Twist and Shout 1962 Live Recordings 7426353 4732308 420949 1962-01-01 2 2 0
21 303 The Beatles A Hard Day’s Night Best Of the Beatles 6253455 16359607 1392597 1962-01-01 1 1 0
22 303 The Beatles All My Loving Best Of the Beatles 2261384 14506508 1392597 1962-01-01 1 1 0
23 303 The Beatles Do You Want to Know a Secret Best Of the Beatles 295301 295301 1392597 1962-01-01 1 1 0
24 303 The Beatles Eight Days a Week Best Of the Beatles 368502 6385770 1392597 1962-01-01 1 1 0
25 303 The Beatles I’ll Follow the Sun Best Of the Beatles 368499 3527048 1392597 1962-01-01 1 1 0
26 303 The Beatles Misery Best Of the Beatles 295292 295292 1392597 1962-01-01 1 1 0
27 303 The Beatles No Reply Best Of the Beatles 368495 2710474 1392597 1962-01-01 1 1 0
28 303 The Beatles Roll Over Beethoven Best Of the Beatles 8322242 16359627 1392597 1962-01-01 1 1 0
29 303 The Beatles Twist and Shout Best Of the Beatles 7426353 16359609 1392597 1962-01-01 1 1 0
... ... ... ... ... ... ... ... ... ... ... ...
2063 825 The Rolling Stones Ruby Tuesday GRRR! 277644 14077603 1201539 2012-11-12 1 1 90
2064 825 The Rolling Stones Ruby Tuesday GRRR! 277644 14243089 1201539 2012-11-12 2 2 90
2065 825 The Rolling Stones Salt of the Earth GRRR! 12444489 14243093 1201539 2012-11-12 1 1 90
2066 825 The Rolling Stones She’s a Rainbow GRRR! 306564 14077613 1201539 2012-11-12 2 2 90
2067 825 The Rolling Stones Start Me Up GRRR! 185377 14243121 1201539 2012-11-12 3 3 90
2068 825 The Rolling Stones Street Fighting Man GRRR! 198365 14077611 1201539 2012-11-12 3 3 90
2069 825 The Rolling Stones Streets of Love GRRR! 12726465 4506741 1201539 2012-11-12 2 2 90
2070 825 The Rolling Stones Sympathy for the Devil GRRR! 198360 198360 1201539 2012-11-12 1 1 90
2071 825 The Rolling Stones Sympathy for the Devil GRRR! 198360 14243091 1201539 2012-11-12 2 2 90
2072 825 The Rolling Stones That's How Strong My Love Is GRRR! 8115933 14243064 1201539 2012-11-12 1 1 90
2073 825 The Rolling Stones The Last Time GRRR! 7841507 246835 1201539 2012-11-12 3 3 90
2074 825 The Rolling Stones The Red Rooster GRRR! 587423 14077592 1201539 2012-11-12 1 1 90
2075 825 The Rolling Stones Time Is on My Side GRRR! 4378180 14077595 1201539 2012-11-12 1 1 90
2076 825 The Rolling Stones Time Is on My Side GRRR! 4378180 14243074 1201539 2012-11-12 1 1 90
2077 825 The Rolling Stones Tumbling Dice GRRR! 6788059 14077616 1201539 2012-11-12 1 1 90
2078 825 The Rolling Stones Tumbling Dice GRRR! 6788059 14243103 1201539 2012-11-12 2 2 90
2079 825 The Rolling Stones Under My Thumb GRRR! 435908 14077601 1201539 2012-11-12 1 1 90
2080 825 The Rolling Stones Under My Thumb GRRR! 435908 14243079 1201539 2012-11-12 1 1 90
2081 825 The Rolling Stones Undercover of the Night GRRR! 12460142 685731 1201539 2012-11-12 3 3 90
2082 825 The Rolling Stones Waiting on a Friend GRRR! 12460097 14077627 1201539 2012-11-12 1 1 90
2083 825 The Rolling Stones Waiting on a Friend GRRR! 12460097 14243119 1201539 2012-11-12 2 2 90
2084 825 The Rolling Stones We Love You GRRR! 5652857 14077605 1201539 2012-11-12 2 2 90
2085 825 The Rolling Stones Wild Horses GRRR! 224903 224903 1201539 2012-11-12 1 1 90
2086 825 The Rolling Stones Wild Horses GRRR! 224903 14243100 1201539 2012-11-12 2 2 90
2087 825 The Rolling Stones You Can’t Always Get What You Want GRRR! 245459 14077609 1201539 2012-11-12 1 1 90
2088 825 The Rolling Stones You Can’t Always Get What You Want GRRR! 245459 14243098 1201539 2012-11-12 2 2 90
2089 825 The Rolling Stones (I Can’t Get No) Satisfaction Sweet Summer Sun: Hyde Park Live 357708 15493278 1318409 2013-11-11 1 1 0
2090 825 The Rolling Stones Gimme Shelter Sweet Summer Sun: Hyde Park Live 382868 15493273 1318409 2013-11-11 1 1 0
2091 825 The Rolling Stones (I Can’t Get No) Satisfaction The Rolling Stones Live: The Rolling Stones 50... 357708 17443789 1494552 2013-04-30 1 1 80
2092 825 The Rolling Stones Wild Horses Wild Horses 224903 224903 1524223 2015-05-12 1 1 0

2093 rows × 11 columns


In [5]:
### Add Times recorded for each song
vc = pd.DataFrame(bs.songname.value_counts().reset_index())
vc.columns = ['songname', 'timesrec']
bs = pd.merge(bs, vc, on='songname')

In [6]:
print bs.shape
print bs.describe()
print covers.shape
print covers.describe()


(2093, 12)
          artistid        workid   recordingid  release_group    label_cnt  \
count  2093.000000  2.093000e+03  2.093000e+03   2.093000e+03  2093.000000   
mean    537.688008  3.853344e+06  6.432656e+06   6.387133e+05     4.200191   
std     259.732383  4.367795e+06  6.097454e+06   5.705731e+05     4.811113   
min     303.000000  3.631000e+03  1.524300e+04   2.717000e+03     1.000000   
25%     303.000000  2.862180e+05  3.056350e+05   5.128900e+04     1.000000   
50%     303.000000  6.747610e+05  4.732308e+06   5.035320e+05     2.000000   
75%     825.000000  7.025700e+06  1.252034e+07   1.201539e+06     6.000000   
max     825.000000  1.295406e+07  1.858489e+07   1.618206e+06    25.000000   

       country_cnt       rating     timesrec  
count  2093.000000  2093.000000  2093.000000  
mean      4.200191    41.798853    10.496417  
std       4.811113    40.098924     6.871089  
min       1.000000     0.000000     1.000000  
25%       1.000000     0.000000     6.000000  
50%       2.000000    50.000000     9.000000  
75%       6.000000    80.000000    15.000000  
max      25.000000   100.000000    36.000000  
(6123, 7)
       recording_id  source_artist  rec_artist_id        workid      rating
count  6.123000e+03    6123.000000   6.123000e+03  6.123000e+03  809.000000
mean   7.477646e+06     393.793729   3.146894e+05  2.548784e+06   77.370828
std    5.281781e+06     197.881834   4.290297e+05  3.380675e+06   18.185538
min    2.246000e+03     303.000000   1.700000e+01  3.631000e+03   20.000000
25%    2.700596e+06     303.000000   1.637050e+04  1.825500e+05         NaN
50%    6.856845e+06     303.000000   9.821400e+04  3.828680e+05         NaN
75%    1.145830e+07     303.000000   4.421515e+05  5.682002e+06         NaN
max    1.932767e+07     825.000000   1.655875e+06  1.292543e+07  100.000000
/Users/maxrose/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.py:3834: RuntimeWarning:

Invalid value encountered in percentile


In [7]:
### aggregations for Beatles and Rolling Stones recordings
aggregations = {
    'releasename':{'num_releases':'count'}
    ,'releasedate':{'maxreleasedate':'min'}
    ,'rating': {'avg_rating':'min'}
    ,'country_cnt':{'countries':'nunique'}
    #,'songname': {'times_rec':'value_counts'}
}
#bsagg = bs.groupby(['workid', 'artist', 'songname', 'releasename','release_group','releasedate']).agg(aggregations).reset_index()
bsagg = bs.groupby(['workid', 'artist', 'songname']).agg(aggregations).reset_index()
bsagg.columns = bsagg.columns.droplevel(1)
bsagg.columns = ['workid','artist','songname','minreleasedate','num_releases','countries','avg_rating']

In [8]:
### Prepare for scraping Lyrics
# encoding: utf-8
import requests as req
from urllib import quote_plus
from bs4 import BeautifulSoup, SoupStrainer
only_lyrics = SoupStrainer('div',class_='lyricbox')

After trying out the Textblob library to incorporate sentiment polarity as a feature, I found "lyrics.wikia.com" to be simple to scrape the entire lyrics for most of the songs in my data, from which I will also extract sentiment analysis as an added feature.


In [9]:
### Function for scraping lyrics from Wikia using Beautiful Soup.
baseurl = 'http://lyrics.wikia.com/wiki/'
def getLyrics(workid,artist, song):
    #ssong = song.replace("’", "'")
    #print ssong
    tUrl = "%s%s:%s" %(baseurl,artist,song)
    result = req.get(tUrl)
    lyric = BeautifulSoup(result.text, 'html.parser', parse_only=only_lyrics).get_text(' ',strip=True)
    #lyrics.append(workid, song, lyric)
    return lyric

In [10]:
### Get lyrics for each song, add to aggregated data
lyrics = []
for idx, row in bsagg.iterrows():
    lyrics.append(getLyrics(row.workid, row.artist, row.songname))
bsagg['lyrics'] = lyrics

In [11]:
### Get sentiment of text
def detect_sentiment(text):
    return TextBlob(text).sentiment.polarity

In [12]:
### Add sentiment of songname, lyrics to aggregated data
bsagg['title_sent'] = bsagg.songname.apply(detect_sentiment)
bsagg['lyric_sent'] = bsagg.lyrics.apply(detect_sentiment)

In [13]:
### clean up covers dataframe

covers['src_id'] = covers.source_artist.map({303:0,825:1}) 
#covers.drop(['srcid'], axis = 1, inplace = True)

### fill missing ratings with mean for workid where ratings exist.
means = covers[covers.rating > 0].groupby('workid').rating.mean()
covers = covers.set_index(['workid'])
covers['rating'] = covers['rating'].fillna(means)
covers = covers.reset_index()
### Fill with 0 where no ratings exist for this cover song.
covers.rating.fillna(0, inplace=True)

In [14]:
covers.head()


Out[14]:
workid recording_id source_artist recording_artist record_name rec_artist_id rating src_id
0 245459 2889179 825 Züri West (Bitte Baby) Blib no chli bi mir / Lue zersch ... 148091 70.00 1
1 357708 10470131 825 Cat Power (I Can't Get No) Satisfaction 35 62.92 1
2 357708 5974631 825 Bill Cosby (I Can't Get No) Satisfaction 91 40.00 1
3 357708 6718106 825 The Ventures (I Can't Get No) Satisfaction 343 60.00 1
4 357708 3024003 825 Manfred Mann (I Can't Get No) Satisfaction 820 60.00 1

In [15]:
### Aggregations for covers

aggregations = {
    'recording_id':{'times_covered':'count'}
    ,'rec_artist_id':{'artist_cnt':'nunique'}
    ,'rating': {'cov_rating_avg':'mean'}
}

coversagg = covers.groupby(['workid']).agg(aggregations).reset_index()
coversagg.columns = coversagg.columns.droplevel(1)
coversagg.columns = ['workid','artist_cnt','cov_rating_avg','times_covered']
coversagg.shape


Out[15]:
(221, 4)

In [16]:
### merge dataframes for analysis
### adding is_cover binary
bsagg = bsagg.merge(is_cover[['workid', 'is_cover']], on=['workid'])
### adding cover aggregates
bsagg = bsagg.merge(coversagg[['workid','artist_cnt','cov_rating_avg','times_covered']], how='left', on=['workid'])

In [17]:
import plotly
plotly.offline.iplot([{
    'x': bsagg.num_releases,
    'y': bsagg.avg_rating
    
}])



In [18]:
sns.heatmap(bsagg.corr())


Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x11d797090>

In [19]:
bsagg.shape


Out[19]:
(374, 14)

In [20]:
from pandas.tools.plotting import scatter_matrix
scatter_matrix(bsagg, alpha=0.5, figsize=(12, 12), diagonal='kde')


Out[20]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x11a50b910>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11de25810>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1214f6410>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12155ba10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1215dd250>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x121623d50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1216a4e90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12171ce10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x121797090>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12180bfd0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x12187c150>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1218fb950>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x121953e90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x121b15450>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x121b4d610>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x121c051d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x121c88150>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x121ceb190>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x121e6e110>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x121ec8ed0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x121f5d110>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x121fe1090>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122141b10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1221c4a90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x121514ed0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122293b10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122416a90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122483610>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122507590>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12256b6d0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1225ed650>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122625590>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1226dd650>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12285f5d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1228c2990>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122943910>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1229ac710>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122b33850>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122bb77d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122d24410>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x122da6390>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122e0b4d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122e8c550>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122ec4610>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122f7c510>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123100490>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123161790>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1231e4710>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12324b510>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1232d3710>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x123356690>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1233ae450>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123430c50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1234a11d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123621750>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123659910>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1237114d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123794450>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1237f5f90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1239837d0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1239e8310>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123a69390>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123aeb310>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123b58610>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123bdb590>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123d4a110>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123dc9910>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123e2f450>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123eb14d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124035450>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1240a1750>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1241236d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124193250>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124212a50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124279590>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1242f9610>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12447b590>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1244c7d50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124558590>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1245bbad0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x124748090>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1245de810>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12482abd0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1248aeb50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124a1d490>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124a9d410>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124b03550>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124b845d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124bbb690>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124c73590>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x124cf8510>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124d5a810>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124ddd790>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124f43590>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x124fcd790>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12594f710>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1259bd1d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x125a40150>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x125aa5290>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x125c24210>]], dtype=object)

In [21]:
### Export cleaned data to csv for use in separate notebook.
bsagg.to_csv('data/Influence_clean.csv',sep='|',encoding='utf-8',index=False)