This notebook gives a brief instroduction to SPTK.jl, a Julia wrapper of Speech Signal Processing Toolkit (SPTK).
In [1]:
### Plotting settings ###
# Configure plottinig settings (using `PyPlot` in this notebook)
using PyCall
using PyPlot
PyDict(matplotlib["rcParams"])["figure.figsize"] = (12, 5);
In [2]:
using WAV
import SPTK
In [3]:
filepath = joinpath(Pkg.dir("SPTK"), "examples", "test16k.wav")
x, fs = wavread(filepath, format="native")
x = convert(Vector{Float64}, vec(x)) # monoral
fs = convert(Int, fs)
# Visualize the speech signal in time-domain
plot(1:endof(x), x, label="a speech signal")
xlim(1, endof(x))
xlabel("sample")
legend()
Out[3]:
In [4]:
# Pick a short segment
pos = 3000
fftlen = 1024
# Note that mel-generalized cepstrum analysis basically assumes window is power-normalized.
xw = x[pos+1:pos+fftlen] .* SPTK.blackman(fftlen)
plot(1:endof(xw), xw, linewidth="2", label="a windowed speech signal")
xlim(1, endof(xw))
xlabel("sample")
legend()
Out[4]:
In [5]:
# Plotting utility for visualizing spectral envelope estimate
function pplot(sp, envelope; title="envelope")
plot(sp, "b-", linewidth="2", label="Original log spectrum 20log|X(ω)|")
plot(20/log(10)*(envelope), "r-", linewidth="3", label=title)
xlim(1, length(sp))
xlabel("frequency bin")
ylabel("log amplitude")
legend()
end
Out[5]:
In [6]:
# Compute spectrum 20log|X(ω)| for a windowed signal
sp = 20log10(abs(rfft(xw)));
In [7]:
# Linear Cepstrum
c = SPTK.mgcep(xw, 20, 0.0, 0.0)
pplot(sp, real(SPTK.mgc2sp(c, 0.0, 0.0, fftlen)), title="Linear frequency cepstrum based envelope")
Out[7]:
In [8]:
# Mel-Cepstrum
mc = SPTK.mcep(xw, 20, 0.41)
pplot(sp, real(SPTK.mgc2sp(mc, 0.41, 0.0, fftlen)), title="Mel-cepstrum based envelope")
Out[8]:
In [9]:
# LPC Cepstrum
mgc = SPTK.mgcep(xw, 20, 0.0, -1.0)
pplot(sp, real(SPTK.mgc2sp(mgc, 0.0, -1.0, fftlen)), title="LPC cepstrum based envelope")
Out[9]:
In [10]:
# Warped LPC
mgc = SPTK.mgcep(xw, 20, 0.41, -1.0)
pplot(sp, real(SPTK.mgc2sp(mgc, 0.41, -1.0, fftlen)), title="Warped LPC based envelope")
Out[10]:
In [11]:
# Generalized Cepstrum
mgc = SPTK.gcep(xw, 20, -0.35)
pplot(sp, real(SPTK.mgc2sp(mgc, 0.0, -0.35, fftlen)), title="Generalized cepstrum based envelope")
Out[11]:
In [12]:
# Mel-Generalized Cepstrum
mgc = SPTK.mgcep(xw, 20, 0.41, -0.35)
pplot(sp, real(SPTK.mgc2sp(mgc, 0.41, -0.35, fftlen)), title="Mel-generalized cepstrum based envelope")
Out[12]:
In [13]:
# Utilities for splitting a time sequence into overlapping frames
countframes(x::AbstractVector, framelen, hopsize) = div(length(x) - framelen, hopsize) + 1
function splitframes(x::AbstractVector, framelen=1024, hopsize=framelen>>1)
N = countframes(x, framelen, hopsize)
frames = Array(eltype(x), framelen, N)
@inbounds for i = 1:N
frames[:,i] = x[(i-1)*hopsize+1:(i-1)*hopsize+framelen]
end
frames
end
Out[13]:
In [14]:
winlen = 1024
hopsize = winlen>>1
xw = splitframes(x, winlen, hopsize) .* SPTK.blackman(winlen);
@show size(xw)
Out[14]:
In [15]:
# Let's see how spectral envelope estimates are changed with different order of mel-cepstrum
# Estimate spectral envelope by mel-cepstrum analysis where order = 20
logsp = real(SPTK.mgc2sp(SPTK.mcep(xw, 20, 0.41), 0.41, 0.0, winlen))
imshow(20/log(10)*logsp, origin="lower", aspect="auto")
colorbar()
Out[15]:
In [16]:
# order = 30
logsp = real(SPTK.mgc2sp(SPTK.mcep(xw, 30, 0.41), 0.41, 0.0, winlen))
imshow(20/log(10)*logsp, origin="lower", aspect="auto")
colorbar()
Out[16]:
In [17]:
# order = 40
logsp = real(SPTK.mgc2sp(SPTK.mcep(xw, 40, 0.41), 0.41, 0.0, winlen))
imshow(20/log(10)*logsp, origin="lower", aspect="auto")
colorbar()
Out[17]:
SPTK supports two algorithms below:
In [18]:
# SWIPE'
f0_swipe = SPTK.swipe(x, fs, hopsize, otype=1)
plot(f0_swipe, label="F0 trajectory estimated by SWIPE'", linewidth=3)
# RAPT
f0_rapt = SPTK.rapt(map(Float32, x), fs, hopsize, otype=1)
plot(f0_rapt, label="F0 trajectory estimated by RAPT'", linewidth=3)
xlim(0, length(f0_rapt))
ylim(0, 230)
xlabel("frame")
ylabel("Hz")
legend()
Out[18]:
In [19]:
pitch = SPTK.swipe(x, fs, hopsize, otype=0)
ex = SPTK.excite(pitch, hopsize, gaussian=false)
plot(ex, label="Source excitation")
xlim(0, length(ex))
xlabel("sample")
legend()
Out[19]: