In [2]:
import os
import shutil
import spotipy
import pickle
import pandas as pd
import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from collections import Counter
if not os.path.exists('clean_midi'):
# Download the 'Clean MIDI' dataset from http://colinraffel.com/projects/lmd/
from six.moves import urllib
import StringIO
import gzip
import tarfile
FILE_URL = 'http://hog.ee.columbia.edu/craffel/lmd/clean_midi.tar.gz'
response = urllib.request.urlopen(FILE_URL)
print("INFO: Downloaded {}".format(FILE_URL))
compressedFile = StringIO.StringIO()
compressedFile.write(response.read())
compressedFile.seek(0)
decompressedFile = gzip.GzipFile(fileobj=compressedFile, mode='rb')
OUTFILE_PATH = 'clean_midi.tar'
with open(OUTFILE_PATH, 'wb') as outfile:
outfile.write(decompressedFile.read())
tar = tarfile.open(OUTFILE_PATH)
tar.extractall()
tar.close()
print("INFO: Extracted data")
else:
print("INFO: Found `clean_midi` directory")
In [ ]:
if not os.path.exists("genres.p"):
# Use Spotify's API to genre lookup. Login first and get your OAuth token:
# https://developer.spotify.com/web-api/search-item/
# NOTE: Replace `AUTH` value with your token.
AUTH = "ENTER-MY-AUTH-KEY"
# Get artists from folder names
artists = [item for item in os.listdir('clean_midi') if not item.startswith('.')]
sp = spotipy.Spotify(auth=AUTH)
genres = {}
for i,artist in enumerate(artists):
try:
results = sp.search(q=artist, type='artist',limit=1)
items = results['artists']['items']
genre_list = items[0]['genres'] if len(items) else items['genres']
genres[artist] = genre_list
if i < 5:
print("INFO: Preview {}/5".format(i + 1),
artist, genre_list[:5])
except Exception as e:
print("INFO: ", artist, "not included: ", e)
# Save to pickle file
pickle.dump(genres,open("genres.p","wb"))
else:
# Load genres meta-data
genres = pickle.load(open("genres.p","rb"))
In [13]:
# Get the most common genres
flattened_list = [item for sublist in list(genres.values()) for item in sublist]
c = Counter(flattened_list)
c.most_common()[:20]
Out[13]:
In [337]:
# Convert labels to vectors
categories = set(sorted(list(flattened_list)))
df = pd.DataFrame(columns=categories)
for author,genre_list in genres.items():
row = pd.Series(np.zeros(len(categories)),name=author)
for ind,genre in enumerate(categories):
if genre in genre_list:
row[ind] = 1
d = pd.DataFrame(row).T
d.columns = categories
df = pd.concat([df,d])
df = df.reindex_axis(sorted(df.columns), axis=1)
# Assign label for each author corresponding with meta-genre (eg, rock, classical)
def getStyle(genre_substring):
"""Get data where features contain `genre_substring`."""
style_index = np.asarray([genre_substring in x for x in df.columns])
style_array = df.iloc[:,style_index].any(axis=1)
return style_array
# Create array of color/labels
color_array = np.zeros(df.shape[0])
genre_labels = ['other','rock','metal','pop','mellow','country', 'rap','classical']
for i,g in enumerate(genre_labels):
if g == 'other':
pass
else:
color_array[np.where(getStyle(g))] = i
In [175]:
# 2-dimensions
model = TSNE(random_state=0)
np.set_printoptions(suppress=True)
X_tsne = model.fit_transform(df.values)
X_pca = PCA().fit_transform(df.values)
# 3-dimensions
model3d = TSNE(n_components=3, random_state=0)
np.set_printoptions(suppress=True)
X_tsne3d = model3d.fit_transform(df.values)
X_pca3d = PCA(n_components=3).fit_transform(df.values)
In [345]:
%pylab inline
CMAP_NAME = 'Set1'
cmap = matplotlib.cm.get_cmap(CMAP_NAME,lut=max(color_array)+1)
figure(figsize=(10, 5))
suptitle('Artist-genre visualization (2-D)')
axes_tsne = []
axes_pca = []
# TSNE
subplot(121)
title('TSNE')
for l in set(color_array):
ax = plt.scatter(X_tsne[color_array==l][:, 0], X_tsne[color_array==l][:, 1],c=cmap(l/max(color_array)), s=5)
axes_tsne.append(ax)
legend(handles=axes_tsne, labels=genre_labels,frameon=True,markerscale=2)
# PCA
subplot(122)
title('PCA')
for l in set(color_array):
ax = plt.scatter(X_pca[color_array==l][:, 0], X_pca[color_array==l][:, 1],c=cmap(l/max(color_array)), s=5)
axes_pca.append(ax)
legend(handles=axes_pca, labels=genre_labels,frameon=True,markerscale=2)
plt.show()
In [348]:
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(10, 5))
suptitle('Artist-genre visualization (3-D)')
ax = fig.add_subplot(121, projection='3d')
title('TSNE')
axes_tsne = []
axes_pca = []
scatter_proxies = []
for l in set(color_array):
ax.scatter(X_tsne3d[color_array==l][:, 0], X_tsne3d[color_array==l][:, 1],X_tsne3d[color_array==l][:, 2],c=cmap(l/max(color_array)), s=5,zdir='x')
axes_tsne.append(fig.gca())
proxy = matplotlib.lines.Line2D([0],[0], linestyle="none", c=cmap(l/max(color_array)), marker = 'o')
scatter_proxies.append(proxy)
legend(handles=scatter_proxies, labels=genre_labels,numpoints=1,frameon=True,markerscale=0.8)
ax2 = fig.add_subplot(122, projection='3d')
title('PCA')
for l in set(color_array):
ax2.scatter(X_pca3d[color_array==l][:, 0], X_pca3d[color_array==l][:, 1], X_pca3d[color_array==l][:, 2],c=cmap(l/max(color_array)),s=5)
axes_pca.append(fig.gca())
legend(handles=scatter_proxies, labels=genre_labels,numpoints=1,frameon=True,markerscale=0.8)
plt.show()
In [5]:
MIDI_DIR = os.path.join(os.getcwd(),'clean_midi')
def get_artists(genre):
"""Get artists with label `genre`."""
artists = [artist for artist, gs in genres.items() if genre in gs]
return artists
# Get artist with genres 'soft rock' and 'disco'
genre_data = {}
metal = get_artists('metal')
classical = get_artists('classical')
genre_data['metal'] = metal
genre_data['classical'] = classical
# Copy artists to a genre-specific folder
for genre, artists in genre_data.items():
try:
for artist in artists:
_genre = genre.replace(' ','_').replace('&','n')
shutil.copytree(os.path.join(MIDI_DIR,artist),os.path.join(os.getcwd(),'subsets',_genre,artist))
except Exception as e:
print(e)
Run ./train_model.sh
in project directory, or enter the following commands in the subset
folder and create corresponding .tfrecord
files:
for genre in */
do
if [[ $genre == *examples* ]]
then continue
fi
convert_dir_to_note_sequences \
--input_dir=$genre \
--output_file=/tmp/${genre%/}_notesequences.tfrecord \
--recursive && echo "INFO: ${genre%/} converted to NoteSequences"
done
The subsets
folder content should now contain .tfrecord
files:
classical/ metal/
classical_notesequences.tfrecord metal_notesequences.tfrecord
Create the melody datasets for each genre:
for genre in */
do
if [[ $genre == *examples* ]]
then continue
fi
melody_rnn_create_dataset \
--config=attention_rnn \
--input=/tmp/${genre%/}_notesequences.tfrecord \
--output_dir=sequence_examples/${genre} \
--eval_ratio=0.10 && echo "INFO: ${genre%/} database created."
done
Now train the model:
for genre in */
do
if [[ $genre == *examples* ]]
then continue
fi
melody_rnn_train \
--config=attention_rnn \
--run_dir=/tmp/melody_rnn/logdir/run1/${genre} \
--sequence_example_file=$(pwd)/sequence_examples/${genre%/}/training_melodies.tfrecord \
--hparams="batch_size=64,rnn_layer_sizes=[64,64]" \
--num_training_steps=2000 && echo "INFO: ${genre%/} model trained."
done
Generate melodies:
for genre in */ ;
do
if [[ $genre == *examples* ]];
then continue
fi
melody_rnn_generate \
--config=attention_rnn \
--run_dir=/tmp/melody_rnn/logdir/run1/${genre} \
--output_dir=/tmp/melody_rnn/generated/${genre} \
--num_outputs=10 \
--num_steps=128 \
--hparams="batch_size=64,rnn_layer_sizes=[64,64]" \
--primer_melody="[60]" && echo "INFO: ${genre%/} melodies generated."
done
Your melodies can be found in /tmp/melody_rnn/generated/[genre]
. Convert to mp3's to play on modern browser or OS.