In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from librosa import cqt
os.chdir('..') # ONLY EXECUTE ONCE PER SESSION!
from music_transcription.onset_detection.read_data import read_X
from music_transcription.onset_detection.cnn_onset_detection import CnnOnsetDetector
In [2]:
wavpath = r'data\recordings\audio\mim-riff1-short-slow.wav'
In [3]:
onset_detector = CnnOnsetDetector.from_zip('models/onset_detection/20170601-3-channels_ds1-4_80-perc_adjusted-labels_with_config.zip')
sg = onset_detector.feature_extractor._read_and_extract([wavpath])
np.array(sg).shape
Out[3]:
In [5]:
for spectrogram in sg:
plt.figure(figsize=(12,6))
plt.imshow(np.array(spectrogram).T, aspect=8, origin='lower', cmap=plt.cm.spectral)
In [4]:
fr_hz = onset_detector.feature_extractor.frame_rate_hz
sr = onset_detector.feature_extractor.sample_rate
subs = onset_detector.feature_extractor.subsampling_step
samples, _ = read_X(wavpath, fr_hz, sr, subs)
print(samples.shape)
samples = samples.ravel()
print(samples.shape)
In [62]:
cqt_spect = cqt(samples, sr=sr)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(12,6))
plt.imshow(cqt_spect, aspect=8, origin='lower', cmap=plt.cm.spectral)
yt = np.arange(0, cqt_spect.shape[0], 5)
_ = plt.yticks(yt, yt+24) # range starts at midi pitch 24
midipitch 33-93, using 1,2 and 3 bins per note
no scaling (seems to amplify harmonic notes (octaves)
Default values used:
sparsity fraction of the energy in each basis. 0 = off
In [5]:
hopl = 512 # must be 2^n cannot be 441 (= subs)
n_bins=60
lowfreq = 55.0 # A1 (midi 33) # gtr E 82.4068892282
# highfreq = lowfreq * 2**(n_bins/12)
cqt_spect = cqt(samples, sr=sr, hop_length=hopl, fmin=lowfreq, n_bins=n_bins, scale=False)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,6))
plt.imshow(cqt_spect, aspect=8, origin='lower', cmap=plt.cm.spectral)
plt.title('1 bin per note, aspect=8')
yt = np.arange(0, cqt_spect.shape[0], 5)
_ = plt.yticks(yt, yt+33) # range starts at midi pitch 33
cqt_spect = cqt(samples, sr=sr, hop_length=hopl, fmin=lowfreq, n_bins=n_bins*2, bins_per_octave=24, scale=False)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,6))
plt.imshow(cqt_spect, aspect=4, origin='lower', cmap=plt.cm.spectral)
plt.title('2 bins per note, aspect=4 (scaled down y axis to match the size of the 1 bin plot)')
yt = np.arange(0, cqt_spect.shape[0], 10)
_ = plt.yticks(yt, (yt/2+33).astype('int')) # range starts at midi pitch 33
plt.figure(figsize=(24,12))
plt.imshow(cqt_spect, aspect=8, origin='lower', cmap=plt.cm.spectral)
plt.title('2 bins per note, aspect=8')
yt = np.arange(0, cqt_spect.shape[0], 4)
_ = plt.yticks(yt, (yt/2+33).astype('int')) # range starts at midi pitch 33
cqt_spect = cqt(samples, sr=sr, hop_length=hopl, fmin=lowfreq, n_bins=n_bins*3, bins_per_octave=36, scale=False)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,9))
plt.imshow(cqt_spect, aspect=4, origin='lower', cmap=plt.cm.spectral)
plt.title('3 bins per note, aspect=4 (scaled down y axis)')
yt = np.arange(0, cqt_spect.shape[0], 9)
_ = plt.yticks(yt, (yt/3+33).astype('int')) # range starts at midi pitch 33
plt.figure(figsize=(24,18))
plt.imshow(cqt_spect, aspect=8, origin='lower', cmap=plt.cm.spectral)
plt.title('3 bins per note, aspect=8')
yt = np.arange(0, cqt_spect.shape[0], 3)
_ = plt.yticks(yt, (yt/3+33).astype('int')) # range starts at midi pitch 33
In [25]:
cqt_spect = cqt(samples, sr=sr, hop_length=hopl, fmin=lowfreq, n_bins=n_bins*3, bins_per_octave=36, scale=False)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
print(cqt_spect.shape)
plt.figure(figsize=(12,12))
plt.imshow(cqt_spect[:, 0:86], aspect=1/4, origin='lower', cmap=plt.cm.Greys)
plt.title('First second with three onsets')
yt = np.arange(0, cqt_spect.shape[0], 3)
_ = plt.yticks(yt, (yt/3+33).astype('int')) # range starts at midi pitch 33
_ = plt.axvline(sr / hopl * 0.293, color='red')
_ = plt.axvline(sr / hopl * 0.617, color='red')
_ = plt.axvline(sr / hopl * 0.96, color='red')
In [79]:
hopl = 256
cqt_spect = cqt(samples, sr=sr, hop_length=hopl, fmin=lowfreq, n_bins=n_bins, scale=False)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,6))
plt.imshow(cqt_spect, aspect=8, origin='lower', cmap=plt.cm.spectral)
plt.title('1 bin per note, aspect=8')
yt = np.arange(0, cqt_spect.shape[0], 5)
_ = plt.yticks(yt, yt+33) # range starts at midi pitch 33
cqt_spect = cqt(samples, sr=sr, hop_length=hopl, fmin=lowfreq, n_bins=n_bins*2, bins_per_octave=24, scale=False)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,6))
plt.imshow(cqt_spect, aspect=4, origin='lower', cmap=plt.cm.spectral)
plt.title('2 bins per note, aspect=4 (scaled down y axis to match the size of the 1 bin plot)')
yt = np.arange(0, cqt_spect.shape[0], 10)
_ = plt.yticks(yt, (yt/2+33).astype('int')) # range starts at midi pitch 33
plt.figure(figsize=(24,12))
plt.imshow(cqt_spect, aspect=8, origin='lower', cmap=plt.cm.spectral)
plt.title('2 bins per note, aspect=8')
yt = np.arange(0, cqt_spect.shape[0], 4)
_ = plt.yticks(yt, (yt/2+33).astype('int')) # range starts at midi pitch 33
cqt_spect = cqt(samples, sr=sr, hop_length=hopl, fmin=lowfreq, n_bins=n_bins*3, bins_per_octave=36, scale=False)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,9))
plt.imshow(cqt_spect, aspect=4, origin='lower', cmap=plt.cm.spectral)
plt.title('3 bins per note, aspect=4 (scaled down y axis)')
yt = np.arange(0, cqt_spect.shape[0], 9)
_ = plt.yticks(yt, (yt/3+33).astype('int')) # range starts at midi pitch 33
plt.figure(figsize=(24,18))
plt.imshow(cqt_spect, aspect=8, origin='lower', cmap=plt.cm.spectral)
plt.title('3 bins per note, aspect=8')
yt = np.arange(0, cqt_spect.shape[0], 3)
_ = plt.yticks(yt, (yt/3+33).astype('int')) # range starts at midi pitch 33
In [80]:
hopl = 1024
cqt_spect = cqt(samples, sr=sr, hop_length=hopl, fmin=lowfreq, n_bins=n_bins, scale=False)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,6))
plt.imshow(cqt_spect, aspect=8, origin='lower', cmap=plt.cm.spectral)
plt.title('1 bin per note, aspect=8')
yt = np.arange(0, cqt_spect.shape[0], 5)
_ = plt.yticks(yt, yt+33) # range starts at midi pitch 33
cqt_spect = cqt(samples, sr=sr, hop_length=hopl, fmin=lowfreq, n_bins=n_bins*2, bins_per_octave=24, scale=False)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,6))
plt.imshow(cqt_spect, aspect=4, origin='lower', cmap=plt.cm.spectral)
plt.title('2 bins per note, aspect=4 (scaled down y axis to match the size of the 1 bin plot)')
yt = np.arange(0, cqt_spect.shape[0], 10)
_ = plt.yticks(yt, (yt/2+33).astype('int')) # range starts at midi pitch 33
plt.figure(figsize=(24,12))
plt.imshow(cqt_spect, aspect=8, origin='lower', cmap=plt.cm.spectral)
plt.title('2 bins per note, aspect=8')
yt = np.arange(0, cqt_spect.shape[0], 4)
_ = plt.yticks(yt, (yt/2+33).astype('int')) # range starts at midi pitch 33
cqt_spect = cqt(samples, sr=sr, hop_length=hopl, fmin=lowfreq, n_bins=n_bins*3, bins_per_octave=36, scale=False)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,9))
plt.imshow(cqt_spect, aspect=4, origin='lower', cmap=plt.cm.spectral)
plt.title('3 bins per note, aspect=4 (scaled down y axis)')
yt = np.arange(0, cqt_spect.shape[0], 9)
_ = plt.yticks(yt, (yt/3+33).astype('int')) # range starts at midi pitch 33
plt.figure(figsize=(24,18))
plt.imshow(cqt_spect, aspect=8, origin='lower', cmap=plt.cm.spectral)
plt.title('3 bins per note, aspect=8')
yt = np.arange(0, cqt_spect.shape[0], 3)
_ = plt.yticks(yt, (yt/3+33).astype('int')) # range starts at midi pitch 33
In [122]:
cqt_spect = cqt(samples, sr=sr, hop_length=512, fmin=lowfreq, n_bins=n_bins*4, scale=False, bins_per_octave=48)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,6))
plt.imshow(cqt_spect, aspect=2, origin='lower', cmap=plt.cm.spectral)
plt.title('4 bins per note, aspect=2')
yt = np.arange(0, cqt_spect.shape[0], 8)
_ = plt.yticks(yt, (yt/4+33).astype('int')) # range starts at midi pitch 33
plt.figure(figsize=(24,24))
plt.imshow(cqt_spect, aspect=8, origin='lower', cmap=plt.cm.spectral)
plt.title('4 bins per note, aspect=8')
yt = np.arange(0, cqt_spect.shape[0], 4)
_ = plt.yticks(yt, (yt/4+33).astype('int')) # range starts at midi pitch 33
In [126]:
cqt_spect = cqt(samples, sr=sr, hop_length=512, fmin=lowfreq, n_bins=n_bins*5, scale=False, bins_per_octave=60)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,6))
plt.imshow(cqt_spect, aspect=2, origin='lower', cmap=plt.cm.spectral)
plt.title('5 bins per note, aspect=2')
yt = np.arange(0, cqt_spect.shape[0], 10)
_ = plt.yticks(yt, (yt/5+33).astype('int')) # range starts at midi pitch 33
plt.figure(figsize=(24,24))
plt.imshow(cqt_spect, aspect=4, origin='lower', cmap=plt.cm.spectral)
plt.title('5 bins per note, aspect=4')
yt = np.arange(0, cqt_spect.shape[0], 5)
_ = plt.yticks(yt, (yt/5+33).astype('int')) # range starts at midi pitch 33
In [128]:
cqt_spect = cqt(samples, sr=sr, hop_length=1024, fmin=lowfreq, n_bins=n_bins*5, scale=False, bins_per_octave=60)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,24))
plt.imshow(cqt_spect, aspect=2, origin='lower', cmap=plt.cm.spectral)
plt.title('5 bins per note, aspect=2, hopsize=1024')
yt = np.arange(0, cqt_spect.shape[0], 10)
_ = plt.yticks(yt, (yt/5+33).astype('int')) # range starts at midi pitch 33
In [123]:
cqt_spect = cqt(samples, sr=sr, hop_length=512, fmin=lowfreq, n_bins=n_bins*9, scale=False, bins_per_octave=108)
cqt_spect = np.abs(cqt_spect) # convert from complex to real (uses "norm")
plt.figure(figsize=(24,6))
plt.imshow(cqt_spect, aspect=2, origin='lower', cmap=plt.cm.spectral)
plt.title('9 bins per note, aspect=2')
yt = np.arange(0, cqt_spect.shape[0], 18)
_ = plt.yticks(yt, (yt/9+33).astype('int')) # range starts at midi pitch 33
plt.figure(figsize=(24,30))
plt.imshow(cqt_spect, aspect=8, origin='lower', cmap=plt.cm.spectral)
plt.title('9 bins per note, aspect=8')
yt = np.arange(0, cqt_spect.shape[0], 9)
_ = plt.yticks(yt, (yt/9+33).astype('int')) # range starts at midi pitch 33
Mit zunehmenden bins per note nimmt die Auflösung in der Zeitdomäne eher ab. Um ebenfalls 3 verschiedene Spektrogramme mit möglichst vielen Informationen zu bieten wählren wir folgende 3 Varianten:
In [ ]: