In [1]:
import numpy as np
import pickle
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
%matplotlib inline
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../')
import scripts.map_and_average as mapper
import scripts.util_feature_learning as util_feature_learning
In [4]:
trainset, valset, testset = mapper.load_train_val_test_sets()
traindata, trainlabels, trainaudiolabels = trainset
valdata, vallabels, valaudiolabels = valset
testdata, testlabels, testaudiolabels = testset
labels = np.concatenate((trainlabels, vallabels, testlabels)).ravel()
audiolabels = np.concatenate((trainaudiolabels, valaudiolabels, testaudiolabels)).ravel()
print traindata.shape, valdata.shape, testdata.shape
In [12]:
min_variance = 0.99
feat_labels, feat_inds = mapper.get_feat_inds(n_dim=traindata.shape[1])
for i in range(len(feat_inds)):
print "mapping " + feat_labels[i]
inds = feat_inds[i]
ssm_feat = util_feature_learning.Transformer()
if min_variance is not None:
ssm_feat.fit_data(traindata[:, inds], trainlabels, n_components=len(inds), pca_only=True)
n_components = np.where(ssm_feat.pca_transformer.explained_variance_ratio_.cumsum()>min_variance)[0][0]+1
print n_components, len(inds)
ssm_feat.fit_lda_data(traindata[:, inds], trainlabels.ravel(), n_components=n_components)
print "done fitting"
WW = ssm_feat.lda_transformer.scalings_
plt.figure()
plt.imshow(WW[:, :n_components], aspect='auto')
plt.colorbar()
plt.title('lda')
plt.xlabel('components')
plt.ylabel('features')
WW = ssm_feat.pca_transformer.components_.T
plt.figure()
plt.imshow(WW[:, :n_components], aspect='auto')
plt.colorbar()
plt.title('pca')
plt.xlabel('components')
plt.ylabel('features')
In [15]:
i = 2
min_variance = 0.99
feat_labels, feat_inds = mapper.get_feat_inds(n_dim=traindata.shape[1])
print "mapping " + feat_labels[i]
inds = feat_inds[i]
ssm_feat = util_feature_learning.Transformer()
if min_variance is not None:
ssm_feat.fit_data(traindata[:, inds], trainlabels, n_components=len(inds), pca_only=True)
n_components = np.where(ssm_feat.pca_transformer.explained_variance_ratio_.cumsum()>min_variance)[0][0]+1
print n_components, len(inds)
ssm_feat.fit_lda_data(traindata[:, inds], trainlabels.ravel(), n_components=n_components)
print "done fitting"
WW_lda = ssm_feat.lda_transformer.scalings_
WW_pca = ssm_feat.pca_transformer.components_.T
In [16]:
# Explained Variance
print ssm_feat.lda_transformer.explained_variance_ratio_
print ssm_feat.pca_transformer.explained_variance_ratio_
In [17]:
plt.rcParams["font.family"] = "Arial"
plt.rcParams["font.size"] = "12"
fig, ax = plt.subplots(1,1)
cax = ax.imshow(WW_lda[:, :n_components], aspect='auto')
fig.colorbar(cax, format="%1.1f")
ax.set_xlabel('Components')
ax.set_ylabel('Features')
y_loc = np.arange(10, 80, 20)
y_labs = ['mean(MFCC)', 'mean(DELTA)', 'std(MFCC)', 'std(DELTA)']
ax.set_yticks(y_loc)
ax.set_yticklabels(y_labs, rotation=65)
ax.set_yticks(np.arange(-0.5, 80, 20), minor=True)
ax.tick_params(which = 'major', direction = 'out')
ax.grid(which='minor')
plt.savefig('../data/lda_'+feat_labels[i]+'.pdf', bbox_inches='tight')
plt.savefig('../data/lda_'+feat_labels[i]+'.eps', bbox_inches='tight')
fig, ax = plt.subplots(1,1)
cax = ax.imshow(WW_pca[:, :n_components], aspect='auto')
fig.colorbar(cax, format="%1.1f")
ax.set_xlabel('Components')
ax.set_ylabel('Features')
y_loc = np.arange(10, 80, 20)
y_labs = ['mean(MFCC)', 'mean(DELTA)', 'std(MFCC)', 'std(DELTA)']
ax.set_yticks(y_loc)
ax.set_yticklabels(y_labs, rotation=65)
ax.set_yticks(np.arange(-0.5, 80, 20), minor=True)
ax.tick_params(which = 'major', direction = 'out')
ax.grid(which='minor')
plt.savefig('../data/pca_'+feat_labels[i]+'.pdf', bbox_inches='tight')
plt.savefig('../data/pca_'+feat_labels[i]+'.eps', bbox_inches='tight')
In [18]:
#X_list, Y, Yaudio = pickle.load(open('../data/lda_data_melodia_8.pickle','rb'))
X_list, Y, Yaudio = pickle.load(open('../data/lda_data_melodia_8_30sec.pickle','rb'))
X = np.concatenate(X_list, axis=1)
print X.shape
In [28]:
from sklearn.model_selection import train_test_split
RANDOM_STATE = 12345
X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6,
random_state=RANDOM_STATE, stratify=Y)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5,
random_state=RANDOM_STATE, stratify=Y_val_test)
print X_train.shape, X_test.shape
In [29]:
ssm_feat = util_feature_learning.Transformer()
ssm_feat.classify(X_train, Y_train, X_test, Y_test)
Out[29]:
In [30]:
classifier_WW = ssm_feat.modelLDA.scalings_
fig, ax = plt.subplots(1,1)
cax = ax.imshow(classifier_WW, aspect='auto')
fig.colorbar(cax)
ax.set_xlabel('Components')
ax.set_ylabel('Features')
feat_lens = np.array([X_list[i].shape[1] for i in range(len(X_list))])
boundaries = np.concatenate([[0], np.cumsum(feat_lens)])
y_loc = np.diff(boundaries) / 2.0 + boundaries[:-1]
y_labs = ['rhythm', 'melody', 'timbre', 'harmony']
ax.set_yticks(y_loc)
ax.set_yticklabels(y_labs, rotation=65)
ax.set_yticks(np.cumsum(feat_lens)-0.5, minor=True)
ax.tick_params(which = 'major', direction = 'out')
ax.grid(which='minor')
plt.savefig('../data/lda_classifier.pdf')
In [31]:
# Explained Variance
print ssm_feat.modelLDA.explained_variance_ratio_
In [ ]: