Goal: Given an input signal,
Imports:
In [1]:
import essentia
import essentia.standard as ess
import librosa
import pandas
import warnings
warnings.filterwarnings("ignore")
Set figure size:
In [2]:
rcParams['figure.figsize'] = (15, 6)
Read an input signal:
In [3]:
fs = 44100
loader = ess.MonoLoader(filename='../test/bizet_in.wav', sampleRate=fs)
signal_in = loader()
Play the signal:
In [4]:
from IPython.display import Audio
#Audio(data=signal_in, rate=44100) # may take a while to load
Given a signal, return onset times:
In [5]:
def get_onsets(signal_in):
onset_times, onset_rate = ess.OnsetRate()(signal_in)
return onset_times
Given an audio segment, return pitch:
In [6]:
yin = ess.PitchYinFFT()
spectrum = ess.Spectrum()
def get_pitch(segment):
if len(segment) < 4096: # hack to get around Essentia error
N = len(segment) if len(segment) % 2 == 0 else len(segment) - 1
else:
N = 4096
pitch, pitch_conf = yin(spectrum(segment[:N]))
return pitch
Given a pitch, return a sine wave:
In [7]:
def generate_sine(pitch, n_duration):
n = arange(n_duration)
return 0.2*sin(2*pi*pitch*n/fs)
Given an input signal, transcribe it, and render the transcription using sine waves:
In [8]:
def transcribe_pitch(signal_in):
# initialize output signal
signal_out = essentia.array(zeros(len(signal_in)))
# onset detection
onsets = get_onsets(signal_in)
# for each onset
onsets = append(onsets, len(signal_in)/fs)
for i in range(len(onsets)-1):
# pitch detect each segment
n0 = int(onsets[i]*44100)
n1 = int(onsets[i+1]*44100)
pitch = get_pitch(signal_in[n0:n1])
# generate sine wave; add to output signal
signal_out[n0:n1] = generate_sine(pitch, n1-n0)
return signal_out
Transcribe:
In [9]:
signal_out = transcribe_pitch(signal_in)
Write output to file:
In [10]:
#Audio(data=signal_out, rate=fs)
#Audio(data=sum([signal_out, signal_in], axis=0), rate=fs)
#ess.MonoWriter(filename='../test/out.wav')(signal_out)
Plot the onsets:
In [11]:
t = arange(len(signal_in))/float(fs)
x = pandas.Series(signal_in, index=t)
x.plot()
xlabel('Time (seconds)')
for onset in get_onsets(signal_in):
axvline(onset, color='r')
Plot the input signal:
In [12]:
fmin = librosa.midi_to_hz(60)
fmax = librosa.midi_to_hz(120)
CQT = librosa.cqt(signal_in[fs:], fs, hop_length=2048, fmin=fmin)
librosa.display.specshow(librosa.logamplitude(CQT), sr=fs, y_axis='cqt_note', fmin=fmin)
Out[12]:
Plot the output signal:
In [13]:
fmin = librosa.midi_to_hz(60)
fmax = librosa.midi_to_hz(120)
CQT = librosa.cqt(signal_out[fs:], fs, hop_length=2048, fmin=fmin)
librosa.display.specshow(librosa.logamplitude(CQT), sr=fs, y_axis='cqt_note', fmin=fmin)
Out[13]: