Work report

This template allows you to run an analysis on any work you want (that MusicBrainz knows about of course). Just fill the MBID of the work you want!


In [1]:
# for example, Schubert Klavierstück D. 946 no. 1
work_mbid = '83fbc6a7-6808-342e-aea7-ae68a1b87a64'

Setup


In [2]:
%run startup.ipy


Last notebook update: 2018-06-06
Git repo: git@bitbucket.org:loujine/musicbrainz-dataviz.git
Importing libs
Defining database parameters

Defining *sql* helper function
Last database update: 2018-06-02

Python packages versions:
numpy       1.14.3
pandas      0.23.0
sqlalchemy  1.2.8
CPython 3.7.0b5
IPython 6.4.0

Work name


In [3]:
work = sql("""
SELECT name
  FROM work
 WHERE gid = %(work_mbid)s;
    """, work_mbid=work_mbid)
work_name = work.name[0]

Recording list


In [4]:
def recording_list(work_mbid):
    recordings = sql("""
SELECT a.name AS artist,
       to_date(to_char(l.begin_date_year, '9999') ||
               to_char(COALESCE(l.end_date_month, 1), '99') ||
               to_char(COALESCE(l.end_date_day, 1), '99'), 'YYYY MM DD') AS start,
       to_date(to_char(l.end_date_year, '9999') ||
               to_char(COALESCE(l.end_date_month, 1), '99') ||
               to_char(COALESCE(l.end_date_day, 1), '99'), 'YYYY MM DD') AS end,
       r.gid AS mbid,
       r.length * interval '1ms' AS duration,
       p.name AS place,
       p.coordinates
  FROM work               AS w
  JOIN l_recording_work   AS lrw ON w.id = lrw.entity1
  JOIN recording          AS r   ON r.id = lrw.entity0
LEFT OUTER JOIN l_place_recording  AS lpr ON r.id = lpr.entity1
LEFT OUTER JOIN place              AS p   ON p.id = lpr.entity0
LEFT OUTER JOIN l_artist_recording AS lar ON r.id = lar.entity1
LEFT OUTER JOIN artist             AS a   ON a.id = lar.entity0
LEFT OUTER JOIN link               AS l   ON l.id = lar.link
 WHERE w.gid = %(work_mbid)s
ORDER BY start;
""", work_mbid=work_mbid)

    recordings.duration = recordings.duration.fillna(0).apply(lambda t: t.round('s'))
    recordings['seconds'] = recordings.duration.apply(lambda s: s.total_seconds())
    recordings.duration = recordings.duration.apply(lambda s: str(s).replace('0 days 00:', ''))
    recordings['url'] = recordings.mbid.apply(mb_recording_link)
    recordings.drop('mbid', axis=1, inplace=True)
    return recordings
    
recordings = recording_list(work_mbid)
print('Number of recordings: %d' % len(recordings))
print('First recordings (chronological order): ')

iplot(ff.create_table(recordings[['start', 'artist', 'duration', 'url']].head(10)))


Number of recordings: 61
First recordings (chronological order): 

Most frequent performers


In [5]:
recordings.groupby('artist').count().url.sort_values(ascending=False).head(10)


Out[5]:
artist
Claudio Arrau           4
Paul Lewis              4
Alfred Brendel          3
Philipp Knop            2
Brigitte Engerer        2
Wilhelm Kempff          2
Rudolf Firkušný         2
András Schiff           2
René Möller             2
Arts Music Recording    2
Name: url, dtype: int64

Performance timeline


In [6]:
# Transform into JSON
data = [{'start': line.start.isoformat(),         
         'content': line.artist,
        } for _, line in recordings.iterrows() if line.start]

# Send to Javascript
import json
from IPython.display import Javascript
Javascript("""window.recordingData={};""".format(json.dumps(data, indent=4)))


Out[6]:

In [7]:
%%html
<link rel="stylesheet" type="text/css"
      href="https://cdnjs.cloudflare.com/ajax/libs/vis/4.20.1/vis-timeline-graph2d.min.css" />



In [8]:
%%javascript
element.append('<div id="vis-container" style="width: 100%;height: 100%;"></div>');

requirejs.config({
    paths: {
        vis: '//cdnjs.cloudflare.com/ajax/libs/vis/4.20.1/vis'
    }
});

require(['vis'], function(vis){
  var data = new vis.DataSet(window.recordingData);
  var options = {
    editable: false
  };
  // create the timeline
  var container = document.getElementById('vis-container');
  var timeline = new vis.Timeline(container, data, options);
})


Performance durations


In [9]:
recordings_by_duration = recordings.sort_values(by='seconds', ascending=True)

# attribute unique random color to each pianist (black=unknown)
from hashlib import md5
color = recordings_by_duration.artist.apply(lambda s: '#' + md5(bytes(s, 'utf-8')).hexdigest()[:6] if s else '#000000')

# legend displayed on mouse hover
text = (recordings_by_duration.artist 
        + '<br>' 
        + recordings_by_duration.start.apply(lambda s: s.strftime('%Y-%m-%d') if pandas.notnull(s) else 'date unknown')
        + '<br>'
        + recordings_by_duration.duration.astype(str))

iplot({'data': [{'type': 'bar', 'y': recordings_by_duration.seconds,
                 'text': text, 'hoverinfo': 'text',
                 'marker': {'color': color}}],
       'layout': {'title': '{} recordings by duration'.format(work_name),
                  'xaxis1': {'title': 'Recording (arbitrary) number'},
                  'yaxis1': {'title': 'Duration (seconds)'}}})



In [10]:
recordings_by_place = recordings[['place', 'artist', 'start', 
                                  'coordinates']][~recordings.coordinates.isnull()]

import datetime
recordings_by_place['text'] = recordings_by_place[['place', 'artist', 'start']].apply(
    lambda s: '<br>'.join([el.isoformat() if isinstance(el, datetime.date) 
                                          else (el or '') for el in s]), axis=1)

In [11]:
iplot({'data': [{'type': 'scattergeo',
                 'lat': recordings_by_place.coordinates.apply(lambda t: eval(t)[0]),
                 'lon': recordings_by_place.coordinates.apply(lambda t: eval(t)[1]),
                 'text': recordings_by_place.text}],
       'layout': {'geo': {'showland': True,
                          'showcountries': True,
                          'projection': {'type': 'stereographic'}},
                  'title': 'Recordings by place'}})