Pre-Classical Clean Up 4: Hyperion

From the live database


In [2]:
from datetime import datetime

import pandas
import sqlalchemy

SITE_URL = 'https://musicbrainz.org'

# import postgresql environ variables
# defined in postgres-dockerfile/postgres.env
PGHOST = '127.0.0.1'
PGDATABASE = 'musicbrainz'
PGUSER = 'musicbrainz'
PGPASSWORD = 'musicbrainz'
PGPORT = 5433

def sql(query, **kwargs):
    """helper function for SQL queries using the %(...) syntax
    Parameters for the query must be passed as keyword arguments
    e.g. sql('SELECT * FROM artist WHERE name=%(singer)s', singer='Bob Dylan')
    """
    engine = sqlalchemy.create_engine(
        'postgresql+psycopg2://'
        '{PGUSER}:{PGPASSWORD}@{PGHOST}:{PGPORT}/{PGDATABASE}'.format(**globals()),
        isolation_level='READ UNCOMMITTED')
    return pandas.read_sql(query, engine, params=kwargs)

# helper function to build canonical URLs
def _mb_link(entity_type, mbid):
    return '<a href="{url}/{entity_type}/{mbid}">{mbid}</a>'.format(
        url=SITE_URL, **locals())

mb_release_link = lambda mbid: _mb_link('release', mbid) # noqa

In [3]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)

Main label


In [4]:
sql("""SELECT id, gid, name, comment FROM label WHERE name='hyperion';""")


Out[4]:
id gid name comment
0 388 08e6c3c8-81ab-405f-9cff-10f6b8db064c hyperion UK classical

In [5]:
sql("""
SELECT 
    DISTINCT label.id,
    label.name AS label,
    label.gid AS mbid,
    COUNT(*) AS releases_count
  FROM release       AS r
  JOIN release_label AS rl ON rl.release = r.id
  JOIN label               ON rl.label = label.id
  JOIN l_release_url AS lu ON r.id = lu.entity0
  JOIN url           AS u  ON u.id = lu.entity1
 WHERE u.url ILIKE '%%hyperion-records%%'
 GROUP BY label.id, label.name, label.gid
 ORDER BY releases_count DESC
;""")


Out[5]:
id label mbid releases_count
0 388 hyperion 08e6c3c8-81ab-405f-9cff-10f6b8db064c 1163
1 22708 helios 0a94e96a-9219-4dd7-a529-18d34e77f50f 217
2 16197 Signum Classics 79c26ea5-2313-4d53-84d9-b04219620c5f 19
3 2900 Gimell 3d971128-6d37-4870-b188-8e5b84bb1258 6
4 585 BMG Direct Marketing, Inc. 9a7d39a4-a887-40f3-a645-a9a136d1f13f 1
5 5964 Hallé 6347c43a-99d2-40d5-a001-1fc36e1073f8 1
6 53283 Mariinsky 6b060b9a-da6f-4a11-923e-8b88944c4503 1
7 59800 Appian Publications and Recordings f51e7981-6399-467c-af06-b4e9a29dbd8d 1
8 95583 The Choir of King's College Cambridge d5827354-3b3f-421e-a1fe-6e25e2f7b8ea 1

We keep only the first two for the moment, the other ones seem to only be distributed by hyperion.


In [6]:
mb_labels = sql("""
SELECT 
    label.name AS label,
    COUNT(*) AS releases_count
  FROM release       AS r
  JOIN release_label AS rl ON rl.release = r.id
  JOIN label               ON rl.label = label.id
 WHERE label.id IN (388, 22708, 16197, 2900, 53283, 59800, 95583)
 GROUP BY label.name
 ORDER BY releases_count DESC
;""")
mb_labels


Out[6]:
label releases_count
0 hyperion 1433
1 helios 265
2 Signum Classics 132
3 Gimell 96
4 Mariinsky 30
5 Appian Publications and Recordings 9
6 The Choir of King's College Cambridge 3

In [7]:
sql("""
SELECT 
    COUNT(*) AS releases_count
  FROM release       AS r
  LEFT OUTER JOIN release_label AS rl ON rl.release = r.id
  JOIN l_release_url AS lu ON r.id = lu.entity0
  JOIN url           AS u  ON u.id = lu.entity1
 WHERE u.url ILIKE '%%hyperion-records%%'
 AND rl.label IS NULL
;""")


Out[7]:
releases_count
0 0

Releases from hyperion labels in MB


In [8]:
sql("""
SELECT 
    COUNT(r.name)
  FROM release_label AS rl
  JOIN release       AS r  ON rl.release = r.id
  JOIN label               ON rl.label = label.id
 WHERE label.id IN (388, 22708);
""")


Out[8]:
count
0 1698

In [9]:
mb_releases = sql("""
SELECT 
    rl.catalog_number,
    r.barcode,
    r.name  AS title,
    ac.name AS ac,
    label.name AS label,
    CASE WHEN r.quality=-1 THEN 'unset'
         WHEN r.quality=0  THEN 'low'
         WHEN r.quality=1  THEN 'normal'
         WHEN r.quality=2  THEN 'high'
    END     AS quality,
    STRING_AGG(DISTINCT at.name, ', ') AS covers,
    STRING_AGG(DISTINCT 
        CASE WHEN u.url ILIKE '%%hyperion-records%%' THEN u.url
             ELSE NULL
        END, ', ') AS hyperion_url,
    r.gid   AS mb_url
  FROM release_label AS rl
  JOIN release       AS r  ON rl.release = r.id
  JOIN label               ON rl.label = label.id
  JOIN artist_credit AS ac ON r.artist_credit = ac.id
  LEFT OUTER JOIN cover_art_archive.cover_art      AS cova  ON cova.release = r.id
  LEFT OUTER JOIN cover_art_archive.cover_art_type AS covat ON covat.id = cova.id
  LEFT OUTER JOIN cover_art_archive.art_type       AS at    ON covat.type_id = at.id
  LEFT OUTER JOIN l_release_url AS lu ON r.id = lu.entity0
  LEFT OUTER JOIN url           AS u  ON u.id = lu.entity1
 WHERE label.id IN (388, 22708)
GROUP BY rl.catalog_number, r.barcode, r.name, ac.name, label.name, r.quality, r.gid
ORDER BY rl.catalog_number, r.barcode;
""")
mb_releases['mb_url'] = mb_releases.mb_url.apply(mb_release_link)
mb_releases['hyperion_url'] = mb_releases.hyperion_url.apply(lambda url: '<a href="{0}">{0}</a>'.format(url) if url else None)
mb_releases.head()


Out[9]:
catalog_number barcode title ac label quality covers hyperion_url mb_url
0 A66003 English Ayres and Duets The Camerata of London, Glenda Simpson, Paul Hillier, Barry Mason, Rosemary Thorndycraft hyperion unset Front <a href="https://www.hyperion-records.co.uk/dc.asp?dc=D_CDA66003">https://www.hyperion-records.co.uk/dc.asp?dc=D_CDA66003</a> <a href="https://musicbrainz.org/release/bca1a11c-2833-4f53-98ca-57a9b338a406">bca1a11c-2833-4f53-98ca-57a9b338a406</a>
1 A66011 Clarinet Quintets Sir Arthur Somervell, Gordon Jacob; Thea King, Aeolian Quartet hyperion unset None None <a href="https://musicbrainz.org/release/f18828ea-7dd7-4e50-91ea-b1edc9b36963">f18828ea-7dd7-4e50-91ea-b1edc9b36963</a>
2 A66022 The Clarinet in Concert Bruch, Mendelssohn, Crusell; Thea King, Georgina Dobrée, Nobuko Imai, London Symphony Orchestra, Alun Francis hyperion unset None None <a href="https://musicbrainz.org/release/da023711-d28a-4682-baf0-72a04a5dd675">da023711-d28a-4682-baf0-72a04a5dd675</a>
3 A66032 None Armonico tributo: Salzburg 1682 Georg Muffat; The Parley of Instruments, Roy Goodman, Peter Holman hyperion unset Front, Medium None <a href="https://musicbrainz.org/release/29cff5bc-0fd6-42ff-913c-6723e1b6b970">29cff5bc-0fd6-42ff-913c-6723e1b6b970</a>
4 A66050 None Sessions: Concerto for Orchestra / Panufnik: Sinfonia Votiva (Symphony no. 8) Sessions, Panufnik; Boston Symphony Orchestra, Seiji Ozawa hyperion unset None None <a href="https://musicbrainz.org/release/c310ee09-2b67-4bf8-9cf9-20526d4c383b">c310ee09-2b67-4bf8-9cf9-20526d4c383b</a>

In [10]:
mb_releases.shape


Out[10]:
(1698, 9)

1677 releases already in MB, some might be duplicates

Hyperion releases quality in MB


In [11]:
mb_releases_by_quality = sql("""
SELECT 
    CASE WHEN r.quality=-1 THEN 'unset'
         WHEN r.quality=0  THEN 'low'
         WHEN r.quality=1  THEN 'normal'
         WHEN r.quality=2  THEN 'high'
    END AS quality,    
    COUNT(r.gid)
  FROM release_label AS rl
  JOIN release       AS r  ON rl.release = r.id
  JOIN label               ON rl.label = label.id
 WHERE label.id IN (388, 22708)
GROUP BY r.quality
ORDER BY r.quality;
""")
mb_releases_by_quality


Out[11]:
quality count
0 unset 1347
1 low 10
2 normal 5
3 high 336

Hyperion releases with front cover in CAA


In [12]:
mb_releases_with_front = sql("""
SELECT 
    at.name,
    COUNT(r.gid)
  FROM release_label AS rl
  JOIN release       AS r  ON rl.release = r.id
  JOIN label               ON rl.label = label.id
  JOIN cover_art_archive.cover_art      AS cova  ON cova.release = r.id
  JOIN cover_art_archive.cover_art_type AS covat ON covat.id = cova.id
  JOIN cover_art_archive.art_type       AS at    ON covat.type_id = at.id
 WHERE label.id IN (388, 22708)
   AND at.name = 'Front'
 GROUP BY at.name;
""")
mb_releases_with_front


Out[12]:
name count
0 Front 952

Hyperion releases with PDF booklet in MB


In [13]:
mb_releases_with_booklet = sql("""
SELECT 
    rl.catalog_number,
    r.name AS title,
    STRING_AGG(at.name, ', ') AS covers,
    r.gid AS mb_url
  FROM release_label AS rl
  JOIN release       AS r  ON rl.release = r.id
  JOIN label               ON rl.label = label.id
  JOIN cover_art_archive.cover_art      AS cova  ON cova.release = r.id
  JOIN cover_art_archive.cover_art_type AS covat ON covat.id = cova.id
  JOIN cover_art_archive.art_type       AS at    ON covat.type_id = at.id
 WHERE label.id IN (388, 22708) 
   AND at.name = 'Booklet'
   AND cova.mime_type = 'application/pdf'
 GROUP BY rl.catalog_number, r.name, r.gid
ORDER BY rl.catalog_number;
""")
mb_releases_with_booklet['mb_url'] = mb_releases_with_booklet.mb_url.apply(mb_release_link)
mb_releases_with_booklet.head()


Out[13]:
catalog_number title covers mb_url
0 CDA66062 Bruckner Motets Booklet <a href="https://musicbrainz.org/release/0873033b-6b94-448f-bcc5-c267194df1c4">0873033b-6b94-448f-bcc5-c267194df1c4</a>
1 CDA66245 Requiem Booklet <a href="https://musicbrainz.org/release/c32fac32-2325-4f63-ac38-31d7daab4afc">c32fac32-2325-4f63-ac38-31d7daab4afc</a>
2 CDA66389 Motets Booklet <a href="https://musicbrainz.org/release/412e5b4e-6414-4e81-9f73-d27192307fab">412e5b4e-6414-4e81-9f73-d27192307fab</a>
3 CDA66426 Lamentations Booklet <a href="https://musicbrainz.org/release/b0357701-8d59-3fc8-aa72-3d5d655fdd7e">b0357701-8d59-3fc8-aa72-3d5d655fdd7e</a>
4 CDA67099 Missa Ecce ego Joannes Booklet <a href="https://musicbrainz.org/release/2cbbdf24-d9d1-44cb-a07e-83ccdf36be6c">2cbbdf24-d9d1-44cb-a07e-83ccdf36be6c</a>

In [14]:
mb_releases_with_booklet.shape


Out[14]:
(48, 4)

Hyperion releases with no catalogue number in MB


In [15]:
#mb_releases_wo_catno = sql("""
#SELECT release.barcode,
#       release.name AS title,
#       label.name AS label,
#       release.gid AS mb_url
#  FROM release_label AS rl
#  JOIN release       ON rl.release = release.id
#  JOIN label         ON rl.label = label.id
# WHERE label.id IN (388, 22708, 16197)
#   AND rl.catalog_number IS NULL
#ORDER BY rl.catalog_number;
#""")
#mb_releases_wo_catno['mb_url'] = mb_releases_wo_catno.mb_url.apply(mb_release_link)
#mb_releases_wo_catno.head()

In [16]:
#mb_releases_wo_catno.shape

In [17]:
mb_releases_wo_link = sql("""
SELECT 
    rl.catalog_number,
    r.barcode,
    r.name  AS title,
    label.name AS label,
    r.gid   AS mb_url
  FROM release_label AS rl
  JOIN release       AS r  ON rl.release = r.id
  JOIN label               ON rl.label = label.id
  LEFT OUTER JOIN l_release_url AS lu ON r.id = lu.entity0
  LEFT OUTER JOIN url           AS u  ON u.id = lu.entity1
 WHERE label.id IN (388, 22708)
   AND u.url IS NULL
ORDER BY rl.catalog_number, r.barcode;
""")
mb_releases_wo_link['mb_url'] = mb_releases_wo_link.mb_url.apply(mb_release_link)
mb_releases_wo_link.head()


Out[17]:
catalog_number barcode title label mb_url
0 A66210 Two Romantic Violin Concertos hyperion <a href="https://musicbrainz.org/release/28dbbf85-4ec1-4242-8469-6dbc800a8f5c">28dbbf85-4ec1-4242-8469-6dbc800a8f5c</a>
1 CDA20199 034571101996 Clarinet Concerto in A Major, K622 / Clarinet Quintet in A Major, K581 hyperion <a href="https://musicbrainz.org/release/0549aa69-e9bb-48ac-900d-7ee4844a616d">0549aa69-e9bb-48ac-900d-7ee4844a616d</a>
2 CDA66008 034571160085 How the World Wags hyperion <a href="https://musicbrainz.org/release/2aabc622-546a-42e9-8e17-ad7f8af38416">2aabc622-546a-42e9-8e17-ad7f8af38416</a>
3 CDA66056 034571160566 Songs and Dialogues hyperion <a href="https://musicbrainz.org/release/25d85cc4-27bb-3e73-b7f9-a166e9d5dbcb">25d85cc4-27bb-3e73-b7f9-a166e9d5dbcb</a>
4 CDA66076 034571160764 Howells: Requiem / Vaughan Williams: Mass in G minor hyperion <a href="https://musicbrainz.org/release/c50cf8d9-303d-44ba-bad1-06d7a6376bd7">c50cf8d9-303d-44ba-bad1-06d7a6376bd7</a>

In [18]:
mb_releases_wo_link.shape


Out[18]:
(57, 5)

In [ ]:

Official Hyperion catalogue


In [19]:
hyperion_releases = pd.read_html('https://www.hyperion-records.co.uk/n.asp?n=1')[0][:-1]

hyperion_releases.rename(columns={0: 'catalog_number', 1: 'name'}, inplace=True)
hyperion_releases['barcode'] = hyperion_releases.name.str[-12:]
hyperion_releases['name'] = hyperion_releases.name.str[:-12]
hyperion_releases['url'] = hyperion_releases.catalog_number.apply(
    lambda no: '<a href="https://www.hyperion-records.co.uk/dc.asp?dc=D_{0}">{0}</a>'.format(no))

In [20]:
hyperion_releases.head()


Out[20]:
catalog_number name barcode url
0 1EMBNB Jack: Black and Blue 034571100708 <a href="https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMBNB">1EMBNB</a>
1 1EMHDUS Harmonies d'un Soir 034571100753 <a href="https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMHDUS">1EMHDUS</a>
2 1EMIPM08 Pitts J: Airs & Fantasias 034571100678 <a href="https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMIPM08">1EMIPM08</a>
3 1EMJ2O Pitts: Jerusalem-Yerushalayim Superseded by 1EMJOY 034571100647 <a href="https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMJ2O">1EMJ2O</a>
4 1EMJOY Pitts: Jerusalem-Yerushalayim 034571101460 <a href="https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMJOY">1EMJOY</a>

Barcodes in MusicBrainz not found in Hyperion


In [21]:
barcodes_not_in_hyperion = set(mb_releases.barcode) - set(hyperion_releases.barcode) - {''} - {None}
print(barcodes_not_in_hyperion)
catno_not_in_hyperion = set(mb_releases.catalog_number) - set(hyperion_releases.catalog_number) - {''} - {None}
print(sorted(list(catno_not_in_hyperion)))


{'034571263168', '034571100104', '034571102948', '034571101217', '034571104508', '034571107905', '034571101996', '034571119977', '034571102764', '034571104607', '034571163215', '034571120126', '034511160779', '034571100111', '034571119632', '034571100173'}
['A66003', 'A66011', 'A66022', 'A66032', 'A66050', 'A66062', 'A66066', 'A66088', 'A66100', 'A66118', 'A66145', 'A66206', 'A66210', 'CDA 66065', 'CDA 66098', 'CDA 68146', 'CDA20021', 'CDA20199', 'CDA20276', 'CDA20294', 'CDA20450', 'CDA20460', 'CDA30001', 'CDA66162', 'CDA66251-2', 'CDA66321/2', 'CDA66591', 'CDA66631', 'CDA66632', 'CDA66911', 'CDA66912', 'CDA66913', 'CDA66914', 'CDA67131', 'CDA67132', 'CDA67221', 'CDA67222', 'CDA67223', 'CDA67224', 'CDA67381', 'CDA67461', 'CDA67462', 'CDA67471', 'CDA67541', 'CDA67542', 'CDA67751', 'CDA67752', 'CDA67951', 'CDA67952', 'CDA67953', 'CDA67991', 'CDA68041', 'CDA68042', 'CDA76472', 'CDD22012', 'CDS44071', 'CDS4414/51', 'CDS44171', 'CDS44172', 'CDS44173', 'CDS44174', 'CDS44175', 'CDS44176', 'CDS44177', 'CDS44178', 'CDS44179', 'CDS44180', 'CDS44181', 'GAW21963', 'GAW21997', 'HOMADEC03', 'HYP 10', 'HYP15', 'HYP21/3', 'HYPOZ', 'KA 66088', 'KA66316', 'KA66370', 'KING 2', 'KING 3', 'KING1']

In [22]:
barcodes_not_in_hyperion = set(mb_releases.barcode) - set(hyperion_releases.barcode) - {''} - {None}
releases_not_in_hyperion = mb_releases[mb_releases.barcode.apply(lambda b: b in barcodes_not_in_hyperion)]
releases_not_in_hyperion.head()


Out[22]:
catalog_number barcode title ac label quality covers hyperion_url mb_url
13 CDA20021 034571101217 Sacred Vocal Music Monteverdi; The Parley of Instruments, Emma Kirkby, Ian Partridge, David Thomas, Roy Goodman, Peter Holman hyperion unset None None <a href="https://musicbrainz.org/release/19d5fb58-d0cb-44ce-b73e-6a02a0768404">19d5fb58-d0cb-44ce-b73e-6a02a0768404</a>
14 CDA20199 034571101996 Clarinet Concerto in A Major, K622 / Clarinet Quintet in A Major, K581 Wolfgang Amadeus Mozart; Thea King, English Chamber Orchestra, Jeffrey Tate, Gabrieli String Quartet hyperion unset None None <a href="https://musicbrainz.org/release/0549aa69-e9bb-48ac-900d-7ee4844a616d">0549aa69-e9bb-48ac-900d-7ee4844a616d</a>
15 CDA20276 034571102764 String Sextets Johannes Brahms; The Raphael Ensemble hyperion unset None None <a href="https://musicbrainz.org/release/48b482a5-c7cc-4b6a-814d-b290f01d4579">48b482a5-c7cc-4b6a-814d-b290f01d4579</a>
16 CDA20294 034571102948 Stabat Mater / Salve Regina / In Caelestibus Regnis Giovanni Battista Pergolesi; The King's Consort, Robert King, Gillian Fisher, Michael Chance hyperion high Front <a href="http://www.hyperion-records.co.uk/dc.asp?dc=D_CDA66294">http://www.hyperion-records.co.uk/dc.asp?dc=D_CDA66294</a> <a href="https://musicbrainz.org/release/fa3e18b5-adf1-48a9-ac3f-0a834b3136a3">fa3e18b5-adf1-48a9-ac3f-0a834b3136a3</a>
17 CDA20450 034571104508 Celtic Symphony / Hebridean Symphony / The Witch of Atlas / The Sea Reivers Bantock; Royal Philharmonic Orchestra, Vernon Handley hyperion unset None None <a href="https://musicbrainz.org/release/002a59f0-b93d-3343-9985-5108f6f45f99">002a59f0-b93d-3343-9985-5108f6f45f99</a>

In [23]:
releases_not_in_hyperion.select(lambda x: not releases_not_in_hyperion.ix[x].catalog_number.startswith('CDA'))


/usr/lib/python3/dist-packages/ipykernel_launcher.py:1: FutureWarning: 'select' is deprecated and will be removed in a future release. You can use .loc[labels.map(crit)] as a replacement
  """Entry point for launching an IPython kernel.
/usr/lib/python3/dist-packages/ipykernel_launcher.py:1: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.
Out[23]:
catalog_number barcode title ac label quality covers hyperion_url mb_url
1270 CDD22012 034571120126 Music for Holy Week and Easter Sunday Orlande de Lassus; Pro Cantione Antiqua, Bruno Turner hyperion unset None None <a href="https://musicbrainz.org/release/87fd5453-1145-38ee-a0ff-ba9085f0182d">87fd5453-1145-38ee-a0ff-ba9085f0182d</a>
1655 GAW21963 034571119632 Piano Music by Federico Mompou Federico Mompou; Stephen Hough hyperion unset None None <a href="https://musicbrainz.org/release/2c150df3-e169-4a2b-a902-7017ba57766f">2c150df3-e169-4a2b-a902-7017ba57766f</a>
1656 GAW21997 034571119977 Music for St. James the Greater Dufay; The Binchois Consort, Andrew Kirkman hyperion unset None None <a href="https://musicbrainz.org/release/7b43461e-10ac-36f7-8a17-d19bdf1a89b2">7b43461e-10ac-36f7-8a17-d19bdf1a89b2</a>
1658 HYP 10 034571100104 A 10th Anniversary Sampler From Hyperion, 1980-1990 Various Artists hyperion unset Front None <a href="https://musicbrainz.org/release/6a5f7faa-a0a7-4d1e-bebe-1855e2b5693e">6a5f7faa-a0a7-4d1e-bebe-1855e2b5693e</a>
1660 HYP15 034571100173 British Music on Hyperion Various Artists hyperion unset Front None <a href="https://musicbrainz.org/release/fdbff1f2-fc64-4b1f-85af-46fc0689c4f0">fdbff1f2-fc64-4b1f-85af-46fc0689c4f0</a>
1666 KA66316 034571263168 Missa O Rex Gloriae / Missa Viri Galilaei Giovanni Pierluigi da Palestrina; Westminster Cathedral Choir, James O’Donnell hyperion unset None None <a href="https://musicbrainz.org/release/8cb3c20f-9adb-44ee-86f5-0a28e0e1ef88">8cb3c20f-9adb-44ee-86f5-0a28e0e1ef88</a>
1668 KING1 034571100111 The Music of The King's Consort The King's Consort, Robert King hyperion unset None None <a href="https://musicbrainz.org/release/69944692-6caf-413b-881d-c40f279f45b9">69944692-6caf-413b-881d-c40f279f45b9</a>

In [24]:
set(releases_not_in_hyperion.catalog_number).intersection(set(hyperion_releases.catalog_number))


Out[24]:
{'CDA66077', 'CDA66790'}

Releases not in MusicBrainz


In [25]:
hyphel_releases = hyperion_releases[hyperion_releases.catalog_number.str.startswith('CD')]

In [26]:
barcodes_not_in_musicbrainz = set(hyphel_releases.barcode) - set(mb_releases.barcode)
releases_not_in_mb = hyphel_releases[hyphel_releases.barcode.apply(lambda b: b in barcodes_not_in_musicbrainz)]

Template


In [27]:
from jinja2 import Template
template = Template("""
<!doctype html>

<html lang="en">
  <head>
    <meta charset="utf-8">
    <title>Community Cleanup #4: Hyperion</title>
    <link href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
  </head>

  <body style="margin: 20px;">
    <h1>Pre-Classical Clean Up #4</h1>
    <a href="https://community.metabrainz.org/t/community-cleanup-4-hyperion/356213">
      <img src="https://i.imgur.com/qtAyjmj.png" width="668" height="187">
    </a>
    
    <p>Latest update: {{ datetime.utcnow().isoformat()[:19] }}</p>
    
    <ol>
      <li><a href="#labels">Labels corresponding to Hyperion in MusicBrainz</a></li>
      <li><a href="#booklets">Hyperion releases with PDF booklet in MB</a></li>
      <li><a href="#wo_catno">Hyperion releases with no catalog number in MB</a></li>
      <li><a href="#not_in_hyp">Releases in MusicBrainz not found in Hyperion catalogue</a></li>
      <li><a href="#quality">Hyperion grouped by quality in MB</a></li>
      <li><a href="#all_mb">All Hyperion releases in MusicBrainz</a></li>
      <li><a href="#not_in_mb">Releases in Hyperion catalogue missing in MusicBrainz</a></li>
      <li><a href="#official">Official Hyperion catalogue</a></li>
    </ol>
    
    <h2 id="labels">Labels corresponding to Hyperion in MusicBrainz</h2>
    {{ mb_labels.to_html(index=False) }}
    <p>We keep only the first two for the moment, the other ones seem to only be distributed by hyperion.</p>
    
    <h2 id="booklets">Hyperion releases with PDF booklet in MB</h2>
    <p>{{ mb_releases_with_booklet.shape[0] }} releases (Feb. 2nd: 51)</p>
    <p>These should be checked and the booklets removed if coming from hyperion</p>
    {{ mb_releases_with_booklet.to_html(index=False) }}
    
    <h2 id="wo_catno">Hyperion releases with no catalog number in MB</h2>
    <p>Cleaned! (Only one left which is a pseudo-release)</p>
    
    <h2 id="not_in_hyp">Releases in MusicBrainz not found in Hyperion catalogue</h2>
    <p>{{ releases_not_in_hyperion.shape[0] }} releases found by naive barcode comparison (Feb. 1st: 45)</p>
    <p>These should be checked and barcode fixed if that is the explanation</p>
    {{ releases_not_in_hyperion.to_html(index=False) }}

    <h2 id="quality">Hyperion releases grouped by quality in MB</h2>
    {{ mb_releases_by_quality.to_html(index=False) }}
    
    <h2 id="all_mb">All Hyperion releases in MusicBrainz</h2>
    <p>{{ mb_releases.shape[0] }} releases (Feb. 1st: ~1551)</p>
    {{ mb_releases.to_html(index=True) }}
    
    <h2 id="not_in_mb">Releases in Hyperion catalogue missing in MusicBrainz</h2>
    <p>{{ releases_not_in_mb.shape[0] }} releases found by naive barcode comparison</p>
    {{ releases_not_in_mb.to_html(index=True) }}
    
    <h2 id="official">Official Hyperion catalogue</h2>
    <p>{{ hyperion_releases.shape[0] }} releases</p>
    {{ hyperion_releases.to_html(index=True) }}
  </body>
</html>
""")

with open('docs/hyperion_releases.html', 'w') as f:
    f.write(template.render(**globals())
            .replace('&lt;', '<').replace('&gt;', '>')
            .replace('class="dataframe"', 'class="table table-striped table-hover table-sm"')
            .replace('thead', 'thead class="thead-light"'))