# データのダウンロード
wget http://mi.soi.city.ac.uk/datasets/magnatagatune/mp3.zip.001
wget http://mi.soi.city.ac.uk/datasets/magnatagatune/mp3.zip.002
wget http://mi.soi.city.ac.uk/datasets/magnatagatune/mp3.zip.003
# 結合
cat data/mp3.zip.* > data/music.zip
# 解凍
unzip data/music.zip -d music
In [45]:
%matplotlib inline
import os
import matplotlib.pyplot as plt
In [4]:
import numpy as np
from pydub import AudioSegment
def mp3_to_array(file):
# MP3 => RAW
song = AudioSegment.from_mp3(file)
song_arr = np.fromstring(song._data, np.int16)
return song_arr
In [5]:
%ls data/music/1/ambient_teknology-phoenix-01-ambient_teknology-0-29.mp3
In [8]:
file = 'data/music/1/ambient_teknology-phoenix-01-ambient_teknology-0-29.mp3'
song = mp3_to_array(file)
In [11]:
plt.plot(song)
Out[11]:
In [18]:
import pandas as pd
tags_df = pd.read_csv('data/annotations_final.csv', delim_whitespace=True)
# 全体をランダムにサンプリング
tags_df = tags_df.sample(frac=1)
# 最初の3000曲を使う
tags_df = tags_df[:3000]
tags_df
Out[18]:
In [34]:
top50_tags = tags_df.iloc[:, 1:189].sum().sort_values(ascending=False).index[:50].tolist()
y = tags_df[top50_tags].values
y
Out[34]:
In [48]:
files = tags_df.mp3_path.values
files = [os.path.join('data', 'music', x) for x in files]
In [49]:
X = np.array([mp3_to_array(file) for file in files])
X = X.reshape(X.shape[0], X.shape[1], 1)
In [50]:
X.shape
Out[50]:
In [51]:
from sklearn.model_selection import train_test_split
random_state = 42
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=random_state)
In [59]:
print(train_x.shape)
print(test_x.shape)
print(train_y.shape)
print(test_y.shape)
In [61]:
plt.plot(train_x[0])
Out[61]:
In [56]:
np.save('train_x.npy', train_x)
np.save('test_x.npy', test_x)
np.save('train_y.npy', train_y)
np.save('test_y.npy', test_y)
In [ ]:
import numpy as np
from keras.models import Model
from keras.layers import Dense, Flatten, Input, Conv1D, MaxPooling1D
from keras.callbacks import CSVLogger, ModelCheckpoint
train_x = np.load('train_x.npy')
train_y = np.load('train_y.npy')
test_x = np.load('test_x.npy')
test_y = np.load('test_y.npy')
print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)
features = train_x.shape[1]
x_inputs = Input(shape=(features, 1), name='x_inputs')
x = Conv1D(128, 256, strides=256, padding='valid', activation='relu')(x_inputs) # strided conv
x = Conv1D(32, 8, activation='relu')(x)
x = MaxPooling1D(4)(x)
x = Conv1D(32, 8, activation='relu')(x)
x = MaxPooling1D(4)(x)
x = Conv1D(32, 8, activation='relu')(x)
x = MaxPooling1D(4)(x)
x = Conv1D(32, 8, activation='relu')(x)
x = MaxPooling1D(4)(x)
x = Flatten()(x)
x = Dense(100, activation='relu')(x)
x_outputs = Dense(50, activation='sigmoid', name='x_outputs')(x)
model = Model(inputs=x_inputs, outputs=x_outputs)
model.compile(optimizer='adam',
loss='categorical_crossentropy')
logger = CSVLogger('history.log')
checkpoint = ModelCheckpoint(
'model.{epoch:02d}-{val_loss:.3f}.h5',
monitor='val_loss',
verbose=1,
save_best_only=True,
mode='auto')
model.fit(train_x, train_y, batch_size=600, epochs=50,
validation_data=[test_x, test_y],
callbacks=[logger, checkpoint])
In [58]:
import numpy as np
from keras.models import load_model
from sklearn.metrics import roc_auc_score
test_x = np.load('test_x.npy')
test_y = np.load('test_y.npy')
model = load_model('model.22-9.187-0.202.h5')
pred_y = model.predict(test_x, batch_size=50)
print(roc_auc_score(test_y, pred_y))
print(model.evaluate(test_x, test_y))
In [ ]: