In [7]:

    
%pylab inline
import librosa
from IPython.display import display,Audio
from numpy.fft import fft









    



Populating the interactive namespace from numpy and matplotlib

Analysis and synthesis

Phase vocoder



In [8]:

    
#load audio
s, fs = librosa.load("../wavs/Reich_Music18.wav")

# audio fragment
t1 = 10*fs
t2 = 18*fs
sigin = s[t1:t2]

#output input size
L = sigin.size

#size of analysis window and soverlap
N = 2048
H = int(N/4)

#output audio size
Lout = sigin.size

# signal blocks for processing and output
phi  = zeros(N)
out = zeros(N, dtype=complex)
sigout = zeros(L)

# max input amp, window
win = hanning(N)
p = 0

while p < L-(N+H):

    # take the spectra of two consecutive windows     
    spec1 =  fft(win*sigin[p:p+N])
    spec2 =  fft(win*sigin[p+H:p+N+H])
    # take their phase difference and integrate
    phi += (angle(spec2) - angle(spec1))
    
    # bring the phase back to between pi and -pi    
    phi =  (( -phi + pi) % (2.0 * pi ) - pi) * -1.0       
    
    out.real, out.imag = cos(phi), sin(phi)
    
    # inverse FFT and overlap-add        
    sigout[p:p+N] += win*ifft(out*abs(spec2)).real
    
    p += H



In [9]:

    
print('Original audio')
display(Audio(data=sigin,rate=fs))
print('Synth audio')
Audio(data=sigout,rate=fs)









    



Original audio






    





                
              






    



Synth audio






    Out[9]:

Time stretching



In [10]:

    
#load audio
s, fs = librosa.load("../wavs/Reich_Music18.wav")

# audio fragment
t1 = 10*fs
t2 = 18*fs
sigin = s[t1:t2]
L = sigin.size

#size of analysis window and soverlap
N = 2048
H = int(N/4)
tscale = 0.1

#output audio size
Lout = int(L/tscale+N)

# signal blocks for processing and output
phi  = zeros(N)
out = zeros(N, dtype=complex)
sigout = zeros(Lout)

# max input amp, window
win = hanning(N)
pout = 0
pstretch = 0

while pstretch < L-(N+H):

    p = int(pstretch)
    
    # take the spectra of two consecutive windows     
    spec1 =  fft(win*sigin[p:p+N])
    spec2 =  fft(win*sigin[p+H:p+N+H])
    # take their phase difference and integrate
    phi += (angle(spec2) - angle(spec1))
    
    # bring the phase back to between pi and -pi    
    phi =  (( -phi + pi) % (2.0 * pi ) - pi) * -1.0       
    
    out.real, out.imag = cos(phi), sin(phi)
    
    # inverse FFT and overlap-add        
    sigout[pout:pout+N] += win*ifft(out*abs(spec2)).real
    
    pout += H
    pstretch += H*tscale



In [11]:

    
Audio(data=sigout,rate=fs)









    Out[11]:

Infinite time stretch = Freezing



In [34]:

    
N = 2048
H = int(N/4)

# read input and get the timescale factor

s, fs = librosa.load("../wavs/Reich_Music18.wav")

t1 = 15*fs
t2 = 18*fs
sigin = s[t1:t2]

phaserandom = 0
randomblock = 0

L = 5*fs

# signal blocks for processing and output
phi  = zeros(N)
out = zeros(N, dtype=complex)
sigout = zeros(L)

# max input amp, window
win = hanning(N)
p = 0

while p < L-(N+H):

    # random block
    p1 = randint(0,H)*randomblock
    
    # take the spectra of two consecutive windows  
    spec1 =  fft(win*sigin[p1:p1+N])
    spec2 =  fft(win*sigin[p1+H:p1+N+H])
    
    # take their phase difference and integrate
    phi += (angle(spec2) - angle(spec1)) + randn(N)*phaserandom
    
    # bring the phase back to between pi and -pi
    phi =  (( -phi + pi) % (2.0 * pi ) - pi) * -1.0
    
    out.real, out.imag = cos(phi), sin(phi)
    # inverse FFT and overlap-add
    
    sigout[p:p+N] += win*ifft(abs(spec2)*out).real
    p += H



In [35]:

    
Audio(data=sigout,rate=fs)









    Out[35]:



In [14]:

    
def freeze(sigin, L, N=2048, H=None, phaserandom = 0, randomblock = 0, winfunc=hanning):

    if H is None:
        H = int(N/4)
        
    # signal blocks for processing and output
    phi  = zeros(N)
    out = zeros(N, dtype=complex)
    sigout = zeros(L)

    # max input amp, window
    win = winfunc(N)
    p = 0

    while p < L-(N+H):

        # random block
        p1 = randint(0,H)*randomblock

        # take the spectra of two consecutive windows  
        spec1 =  fft(win*sigin[p1:p1+N])
        spec2 =  fft(win*sigin[p1+H:p1+N+H])
        # take their phase difference and integrate
        phi += (angle(spec2) - angle(spec1)) + randn(N)*phaserandom

        # bring the phase back to between pi and -pi
        phi =  (( -phi + pi) % (2.0 * pi ) - pi) * -1.0

        out.real, out.imag = cos(phi), sin(phi)
        
        # inverse FFT and overlap-add
        sigout[p:p+N] += win*ifft(abs(spec2)*out).real
        p += H

    return sigout



In [15]:

    
s, fs = librosa.load("../wavs/Reich_Music18.wav")

t1 = 15*fs
t2 = 18*fs
sigin = s[t1:t2]

N = 2048
H = int(N/4)

L = 5*fs

sigout = freeze(sigin, L, N=4096)
display(Audio(data=sigout,rate=fs))

sigout = freeze(sigin, L, N=512)
display(Audio(data=sigout,rate=fs))

sigout = freeze(sigin, L, N=2048, phaserandom = 0, randomblock = 0 )
display(Audio(data=sigout,rate=fs))

sigout = freeze(sigin, L, N=2048, phaserandom = 0.5, randomblock = 1 )
display(Audio(data=sigout,rate=fs))

Pure Data Phase vocoder with Lock option



In [53]:

    
N = 2048
H = int(N/4)

# read input and get the timescale factor

s, fs = librosa.load("../wavs/Reich_Music18.wav")

t1 = 15*fs
t2 = 18*fs
sigin = s[t1:t2]

lock = 1

L = 5*fs

# signal blocks for processing and output
phi  = zeros(N)
prev = ones(N, dtype=complex)
sigout = zeros(L)

# max input amp, window
win = hanning(N)
p = 0
eps = 1e-20

while p < L-(N+H):

    # random block
    p1 = randint(0,H)*randomblock
    
    # take the spectra of two consecutive windows  
    spec1 =  fft(win*sigin[p1:p1+N])
    spec2 =  fft(win*sigin[p1+H:p1+N+H])
    
    X = prev/abs(prev+eps)*conjugate(spec1)
    
    #lock
    X = X + (roll(X,1) + roll(X,-1))*lock
        
    prev = X/abs(X+eps)*spec2
    
    sigout[p:p+N] += win*ifft(prev).real
    p += H



In [54]:

    
Audio(data=sigout,rate=fs)









    Out[54]:



In [ ]:



In [ ]: