End-to-end learning for music audio

# データのダウンロード
wget http://mi.soi.city.ac.uk/datasets/magnatagatune/mp3.zip.001
wget http://mi.soi.city.ac.uk/datasets/magnatagatune/mp3.zip.002
wget http://mi.soi.city.ac.uk/datasets/magnatagatune/mp3.zip.003

# 結合
cat data/mp3.zip.* > data/music.zip

# 解凍
unzip data/music.zip -d music

In [45]:
%matplotlib inline
import os
import matplotlib.pyplot as plt

MP3ファイルのロード


In [4]:
import numpy as np
from pydub import AudioSegment

def mp3_to_array(file):
    # MP3 => RAW
    song = AudioSegment.from_mp3(file)
    song_arr = np.fromstring(song._data, np.int16)
    return song_arr

In [5]:
%ls data/music/1/ambient_teknology-phoenix-01-ambient_teknology-0-29.mp3


data/music/1/ambient_teknology-phoenix-01-ambient_teknology-0-29.mp3

In [8]:
file = 'data/music/1/ambient_teknology-phoenix-01-ambient_teknology-0-29.mp3'
song = mp3_to_array(file)

In [11]:
plt.plot(song)


Out[11]:
[<matplotlib.lines.Line2D at 0x106695ef0>]

楽曲タグデータをロード

  • ランダムに3000曲を抽出
  • よく使われるタグ50個を抽出
  • 各曲には複数のタグがついている

In [18]:
import pandas as pd

tags_df = pd.read_csv('data/annotations_final.csv', delim_whitespace=True)
# 全体をランダムにサンプリング
tags_df = tags_df.sample(frac=1)
# 最初の3000曲を使う
tags_df = tags_df[:3000]

tags_df


Out[18]:
clip_id no voice singer duet plucking hard rock world bongos harpsichord female singing ... rap metal hip hop quick water baroque women fiddle english mp3_path
4929 10810 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 7/dac_crowell-the_sea_and_the_sky-02-umi_no_ka...
10062 22053 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1/mijo-fata_morgana-05-deep_bass_9-0-29.mp3
19365 42594 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 6/ed_martin-luis_milan__el_maestro-10-fantasia...
17077 37483 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 2/ensemble_mirable-conversations_galantes-08-s...
6236 13612 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 e/magnatune_com-magnatune_at_the_cc_salon-03-l...
13796 30258 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 4/tilopa-kyotaku_live-07-ajikan-204-233.mp3
24429 54483 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 b/jacob_heringman-jane_pickeringes_lute_book-1...
8722 19083 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 f/satori-healing_sounds_of_tibet-04-pray-59-88...
10914 23976 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 3/american_baroque-mozart_4_quartets_for_strin...
21173 46514 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 d/daniel_ben_pienaar-book_2_cd1_welltempered_c...
19066 41784 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 7/monoide-zeitpunkt-09-zeitpunkt-291-320.mp3
3129 6857 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 5/new_york_consort_of_viols-dances_and_canzona...
10821 23792 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 3/dj_cary-eastern_grooves-05-oriental_distorti...
4735 10413 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 4/falling_you-touch-02-the_art_of_possession_n...
7339 16086 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 d/sarasa_ensemble_labelle_sylvan-bach_cantatas...
5758 12585 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 f/kyiv_chamber_choir-masterpieces_of_the_ukrai...
20943 46059 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 a/bjorn_fogelberg-karooshi_porn-11-life_in_a_t...
17807 39045 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 3/dj_cary-eastern_grooves-09-death_and_rainken...
19680 43253 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 4/jami_sieber-hidden_sky-10-mandlovu_mind-0-29...
12079 26524 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 d/ambient_teknology-the_all_seeing_eye_project...
22016 48393 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 b/elizabeth_wolff-moments_musicaux-12-moritz_m...
7342 16091 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 9/the_seldon_plan-making_circles-03-westcheste...
20871 45916 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 6/drop_trio-cezanne-11-invisible_pants-349-378...
6273 13671 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 b/seismic_anamoly-dead_mans_hand-03-long_gone-...
7663 16800 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 8/mercy_machine-mercy_machine-04-bones-30-59.mp3
20348 44748 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 3/dj_cary-eastern_grooves-10-waddi_jahrapoon-2...
23708 52565 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 b/lisa_debenedictis-mixter_one-15-cuckoo_remix...
25254 56985 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 a/edward_martin_and_william_bastian-virtues_an...
22681 49990 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 5/john_fleagle-worlds_bliss__medieval_songs_of...
8326 18266 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 2/paul_avgerinos-gnosis-04-know_thyself-233-26...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8176 17961 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 3/emmas_mini-beat_generation_mad_trick-04-high...
19721 43353 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 8/mediva-viva_mediva-10-mui_grandes_noit_e_dia...
14211 31216 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 a/electric_frankenstein-the_time_is_now-07-e_f...
23339 51633 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 5/john_fleagle-worlds_bliss__medieval_songs_of...
1592 3482 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 2/paul_avgerinos-gnosis-01-pure_in_heart-88-11...
6633 14397 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 a/plunkett-14_days-03-picture-0-29.mp3
25411 57430 0 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 4/ensemble_mirable-triemer_six_cello_sonatas-2...
18961 41588 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 5/rapoon-fallen_gods-09-valley-407-436.mp3
9537 20917 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 9/american_bach_soloists-heinrich_schutz__musi...
4378 9495 0 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0/ensemble_vermillian-stolen_jewels-02-rosenmu...
5545 12066 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 2/magnatune_compilation-electronica-03-cargo_c...
21667 47618 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 a/jacob_heringman-black_cow-12-bakfark_qui_hab...
19971 43881 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 4/falling_you-touch-10-reading_the_leaves_by_m...
5925 12989 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 7/monoide-zeitpunkt-03-first_love-233-262.mp3
21674 47626 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 d/kyiv_chamber_choir-praise_the_lord-12-balaki...
11926 26200 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 3/dj_cary-downtempo_chill-06-breathing_shiva_i...
2899 6283 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 5/mr_epic-sideways-02-blue_days-175-204.mp3
18783 41207 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 8/stargarden-music_for_modern_listening-09-sup...
25376 57356 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 d/daniel_ben_pienaar-book_2_cd1_welltempered_c...
15794 34636 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 4/magnatune-romantic_dinner_classical_compilat...
12807 28106 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 5/paul_avgerinos-muse_of_the_round_sky-06-muse...
1106 2470 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0/beth_quist-silver-01-liquid_silver-262-291.mp3
2851 6187 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 f/strojovna_07-dirnix-02-basetra_noje-117-146.mp3
14433 31665 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 b/rob_costlow-sophomore_jinx-07-goodbyes-175-2...
1638 3580 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1/the_rajdhani_quartet-the_gandhi_memorial_con...
24807 55594 0 0 0 0 0 0 0 0 1 ... 0 0 0 0 0 0 0 0 0 e/philharmonia_baroque_orchestra-handel__atala...
4596 10057 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 5/paul_avgerinos-muse_of_the_round_sky-02-song...
7223 15760 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 2/ensemble_sreteniye___three_holies_church_cho...
23990 53280 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 2/vito_paternoster-cd2bach_cello_suites-15-sui...
12485 27392 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0/rocket_city_riot-middle_age_suicide-06-im_go...

3000 rows × 190 columns


In [34]:
top50_tags = tags_df.iloc[:, 1:189].sum().sort_values(ascending=False).index[:50].tolist()
y = tags_df[top50_tags].values
y


Out[34]:
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

楽曲データをロード

  • tags_dfのmp3_pathからファイルパスを取得
  • mp3_to_array()でnumpy arrayをロード
  • (samples, features, channels) になるようにreshape
  • 音声波形は1次元なのでchannelsは1
  • 訓練データはすべて同じサイズなのでfeaturesは同じになるはず(パディング不要)

In [48]:
files = tags_df.mp3_path.values
files = [os.path.join('data', 'music', x) for x in files]

In [49]:
X = np.array([mp3_to_array(file) for file in files])
X = X.reshape(X.shape[0], X.shape[1], 1)

In [50]:
X.shape


Out[50]:
(3000, 465984, 1)

訓練データとテストデータに分割


In [51]:
from sklearn.model_selection import train_test_split
random_state = 42

train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=random_state)

In [59]:
print(train_x.shape)
print(test_x.shape)
print(train_y.shape)
print(test_y.shape)


(2400, 465984, 1)
(600, 465984, 1)
(2400, 50)
(600, 50)

In [61]:
plt.plot(train_x[0])


Out[61]:
[<matplotlib.lines.Line2D at 0x1b4ed6390>]

In [56]:
np.save('train_x.npy', train_x)
np.save('test_x.npy', test_x)
np.save('train_y.npy', train_y)
np.save('test_y.npy', test_y)

訓練


In [ ]:
import numpy as np
from keras.models import Model
from keras.layers import Dense, Flatten, Input, Conv1D, MaxPooling1D
from keras.callbacks import CSVLogger, ModelCheckpoint

train_x = np.load('train_x.npy')
train_y = np.load('train_y.npy')

test_x = np.load('test_x.npy')
test_y = np.load('test_y.npy')

print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)

features = train_x.shape[1]

x_inputs = Input(shape=(features, 1), name='x_inputs')
x = Conv1D(128, 256, strides=256, padding='valid', activation='relu')(x_inputs)  # strided conv
x = Conv1D(32, 8, activation='relu')(x)
x = MaxPooling1D(4)(x)
x = Conv1D(32, 8, activation='relu')(x)
x = MaxPooling1D(4)(x)
x = Conv1D(32, 8, activation='relu')(x)
x = MaxPooling1D(4)(x)
x = Conv1D(32, 8, activation='relu')(x)
x = MaxPooling1D(4)(x)
x = Flatten()(x)
x = Dense(100, activation='relu')(x)
x_outputs = Dense(50, activation='sigmoid', name='x_outputs')(x)

model = Model(inputs=x_inputs, outputs=x_outputs)
model.compile(optimizer='adam',
              loss='categorical_crossentropy')

logger = CSVLogger('history.log')
checkpoint = ModelCheckpoint(
    'model.{epoch:02d}-{val_loss:.3f}.h5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    mode='auto')

model.fit(train_x, train_y, batch_size=600, epochs=50,
          validation_data=[test_x, test_y],
          callbacks=[logger, checkpoint])

予測

  • taggerは複数のタグを出力するのでevaluate()ではダメ?

In [58]:
import numpy as np
from keras.models import load_model
from sklearn.metrics import roc_auc_score

test_x = np.load('test_x.npy')
test_y = np.load('test_y.npy')

model = load_model('model.22-9.187-0.202.h5')

pred_y = model.predict(test_x, batch_size=50)
print(roc_auc_score(test_y, pred_y))
print(model.evaluate(test_x, test_y))


Using TensorFlow backend.

In [ ]: