In [7]:
%pylab inline
import librosa
from IPython.display import display,Audio
from numpy.fft import fft


Populating the interactive namespace from numpy and matplotlib

Analysis and synthesis

Phase vocoder


In [8]:
#load audio
s, fs = librosa.load("../wavs/Reich_Music18.wav")

# audio fragment
t1 = 10*fs
t2 = 18*fs
sigin = s[t1:t2]

#output input size
L = sigin.size

#size of analysis window and soverlap
N = 2048
H = int(N/4)

#output audio size
Lout = sigin.size

# signal blocks for processing and output
phi  = zeros(N)
out = zeros(N, dtype=complex)
sigout = zeros(L)

# max input amp, window
win = hanning(N)
p = 0

while p < L-(N+H):

    # take the spectra of two consecutive windows     
    spec1 =  fft(win*sigin[p:p+N])
    spec2 =  fft(win*sigin[p+H:p+N+H])
    # take their phase difference and integrate
    phi += (angle(spec2) - angle(spec1))
    
    # bring the phase back to between pi and -pi    
    phi =  (( -phi + pi) % (2.0 * pi ) - pi) * -1.0       
    
    out.real, out.imag = cos(phi), sin(phi)
    
    # inverse FFT and overlap-add        
    sigout[p:p+N] += win*ifft(out*abs(spec2)).real
    
    p += H

In [9]:
print('Original audio')
display(Audio(data=sigin,rate=fs))
print('Synth audio')
Audio(data=sigout,rate=fs)


Original audio
Synth audio
Out[9]:

Time stretching


In [10]:
#load audio
s, fs = librosa.load("../wavs/Reich_Music18.wav")

# audio fragment
t1 = 10*fs
t2 = 18*fs
sigin = s[t1:t2]
L = sigin.size

#size of analysis window and soverlap
N = 2048
H = int(N/4)
tscale = 0.1

#output audio size
Lout = int(L/tscale+N)

# signal blocks for processing and output
phi  = zeros(N)
out = zeros(N, dtype=complex)
sigout = zeros(Lout)

# max input amp, window
win = hanning(N)
pout = 0
pstretch = 0

while pstretch < L-(N+H):

    p = int(pstretch)
    
    # take the spectra of two consecutive windows     
    spec1 =  fft(win*sigin[p:p+N])
    spec2 =  fft(win*sigin[p+H:p+N+H])
    # take their phase difference and integrate
    phi += (angle(spec2) - angle(spec1))
    
    # bring the phase back to between pi and -pi    
    phi =  (( -phi + pi) % (2.0 * pi ) - pi) * -1.0       
    
    out.real, out.imag = cos(phi), sin(phi)
    
    # inverse FFT and overlap-add        
    sigout[pout:pout+N] += win*ifft(out*abs(spec2)).real
    
    pout += H
    pstretch += H*tscale

In [11]:
Audio(data=sigout,rate=fs)


Out[11]:

Infinite time stretch = Freezing


In [34]:
N = 2048
H = int(N/4)

# read input and get the timescale factor

s, fs = librosa.load("../wavs/Reich_Music18.wav")

t1 = 15*fs
t2 = 18*fs
sigin = s[t1:t2]

phaserandom = 0
randomblock = 0

L = 5*fs

# signal blocks for processing and output
phi  = zeros(N)
out = zeros(N, dtype=complex)
sigout = zeros(L)

# max input amp, window
win = hanning(N)
p = 0

while p < L-(N+H):

    # random block
    p1 = randint(0,H)*randomblock
    
    # take the spectra of two consecutive windows  
    spec1 =  fft(win*sigin[p1:p1+N])
    spec2 =  fft(win*sigin[p1+H:p1+N+H])
    
    # take their phase difference and integrate
    phi += (angle(spec2) - angle(spec1)) + randn(N)*phaserandom
    
    # bring the phase back to between pi and -pi
    phi =  (( -phi + pi) % (2.0 * pi ) - pi) * -1.0
    
    out.real, out.imag = cos(phi), sin(phi)
    # inverse FFT and overlap-add
    
    sigout[p:p+N] += win*ifft(abs(spec2)*out).real
    p += H

In [35]:
Audio(data=sigout,rate=fs)


Out[35]:

In [14]:
def freeze(sigin, L, N=2048, H=None, phaserandom = 0, randomblock = 0, winfunc=hanning):

    if H is None:
        H = int(N/4)
        
    # signal blocks for processing and output
    phi  = zeros(N)
    out = zeros(N, dtype=complex)
    sigout = zeros(L)

    # max input amp, window
    win = winfunc(N)
    p = 0

    while p < L-(N+H):

        # random block
        p1 = randint(0,H)*randomblock

        # take the spectra of two consecutive windows  
        spec1 =  fft(win*sigin[p1:p1+N])
        spec2 =  fft(win*sigin[p1+H:p1+N+H])
        # take their phase difference and integrate
        phi += (angle(spec2) - angle(spec1)) + randn(N)*phaserandom

        # bring the phase back to between pi and -pi
        phi =  (( -phi + pi) % (2.0 * pi ) - pi) * -1.0

        out.real, out.imag = cos(phi), sin(phi)
        
        # inverse FFT and overlap-add
        sigout[p:p+N] += win*ifft(abs(spec2)*out).real
        p += H

    return sigout

In [15]:
s, fs = librosa.load("../wavs/Reich_Music18.wav")

t1 = 15*fs
t2 = 18*fs
sigin = s[t1:t2]

N = 2048
H = int(N/4)

L = 5*fs

sigout = freeze(sigin, L, N=4096)
display(Audio(data=sigout,rate=fs))

sigout = freeze(sigin, L, N=512)
display(Audio(data=sigout,rate=fs))

sigout = freeze(sigin, L, N=2048, phaserandom = 0, randomblock = 0 )
display(Audio(data=sigout,rate=fs))

sigout = freeze(sigin, L, N=2048, phaserandom = 0.5, randomblock = 1 )
display(Audio(data=sigout,rate=fs))


Pure Data Phase vocoder with Lock option


In [53]:
N = 2048
H = int(N/4)

# read input and get the timescale factor

s, fs = librosa.load("../wavs/Reich_Music18.wav")

t1 = 15*fs
t2 = 18*fs
sigin = s[t1:t2]

lock = 1

L = 5*fs

# signal blocks for processing and output
phi  = zeros(N)
prev = ones(N, dtype=complex)
sigout = zeros(L)

# max input amp, window
win = hanning(N)
p = 0
eps = 1e-20

while p < L-(N+H):

    # random block
    p1 = randint(0,H)*randomblock
    
    # take the spectra of two consecutive windows  
    spec1 =  fft(win*sigin[p1:p1+N])
    spec2 =  fft(win*sigin[p1+H:p1+N+H])
    
    X = prev/abs(prev+eps)*conjugate(spec1)
    
    #lock
    X = X + (roll(X,1) + roll(X,-1))*lock
        
    prev = X/abs(X+eps)*spec2
    
    sigout[p:p+N] += win*ifft(prev).real
    p += H

In [54]:
Audio(data=sigout,rate=fs)


Out[54]:

In [ ]:


In [ ]: