In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
import numpy as np
import vmo.analysis as van
import vmo.generate as vge
import matplotlib.pyplot as plt
import sklearn.preprocessing as pre
import librosa, vmo
import IPython.display
%matplotlib inline

Load audio file



In [3]:

    
target_file = '../files/tenor_sax.wav'
playback_y, sr = librosa.load(target_file)



In [4]:

    
IPython.display.Audio(data=playback_y, rate=sr)









    Out[4]:

Parameters



In [5]:

    
sample_rate = 22050
fft_size = 2048*4
hop_size = fft_size/2

Analysis



In [6]:

    
y, sr = librosa.load(target_file, sr=sample_rate)
C = librosa.feature.chroma_stft(y=y, sr=sr, n_fft=fft_size, hop_length=hop_size, octwidth = None)
feature = np.log(C+np.finfo(float).eps)
feature = pre.normalize(feature)



In [11]:

    
chroma_frames = feature.transpose()
r = (0.0, 1.01, 0.01) 
ideal_t = vmo.find_threshold(chroma_frames, r = r,flag = 'a', dim=12)
oracle_t = vmo.build_oracle(chroma_frames, flag = 'a', threshold = ideal_t[0][1], feature = 'chroma', dim=12)



In [14]:

    
x = np.array([i[1] for i in ideal_t[1]])
y = [i[0] for i in ideal_t[1]]
fig = plt.figure(figsize = (10,3))
plt.plot(x, y, linewidth = 2)
plt.title('IR vs. Threshold Value(vmo)', fontsize = 18)
plt.grid(b = 'on')
plt.xlabel('Threshold', fontsize = 14)
plt.ylabel('IR', fontsize = 14)
plt.xlim(0,0.2)
plt.tight_layout()

Synthesis



In [15]:

    
seq = vge.improvise(oracle_t, seq_len = oracle_t.n_states-1, LRS = 2, weight = 'lrs')
x, _w, new_sr = vge.audio_synthesis(target_file, 'vmo_synthesis_test.wav', seq, 
                                    analysis_sr=sample_rate, buffer_size = fft_size, hop = hop_size)



In [16]:

    
IPython.display.Audio(data=x, rate=new_sr)









    Out[16]:



In [ ]: