In [1]:
%load_ext autoreload
%matplotlib inline
%autoreload 2
import matplotlib.pyplot as plt
from IPython.display import Audio, display
import numpy as np
from pysas import World, waveread
In [2]:
signal, samplingrate, _ = waveread("test/cmu_arctic/arctic_a0001.wav") # from http://festvox.org/cmu_arctic/dbs_bdl.html
In [3]:
world = World(samplingrate)
In [4]:
f0, spec_mat, aperiod_mat = world.analyze(signal)
In [5]:
plt.plot(f0)
Out[5]:
In [6]:
spec = spec_mat[300]
In [7]:
plt.plot(np.log(spec))
Out[7]:
In [8]:
out = world.synthesis(f0, spec_mat, aperiod_mat)
In [9]:
plt.plot(signal)
plt.plot(out)
Out[9]:
In [10]:
display(Audio(data=np.int16(out * 32767.0), rate=16000))
In [11]:
from pysas.mcep import spec2mcep, mcep2spec, mcep2coef, coef2mcep, estimate_alpha
In [12]:
alpha = round(estimate_alpha(samplingrate), 3)
alpha
Out[12]:
In [13]:
mcep = spec2mcep(spec, 24, alpha)
In [14]:
spec2 = mcep2spec(mcep, alpha, world.fftsize())
In [15]:
plt.plot(np.log(spec))
plt.plot(np.log(spec2[:world.envelopesize()]))
Out[15]:
In [16]:
i = 80* 300
windowsize = 1024
sig = signal[i:i+windowsize] * np.hanning(windowsize)
power_spectrum = (np.absolute(np.fft.fft(sig)) ** 2)[:(windowsize>>1) + 1]
plt.plot(np.log(power_spectrum))
Out[16]:
In [17]:
fft_mcep = spec2mcep(power_spectrum, 20, alpha)
reconst_pspec = mcep2spec(fft_mcep, alpha, windowsize)
In [18]:
plt.plot(np.log(spec))
plt.plot(np.log(spec2))
plt.plot(np.log(power_spectrum))
plt.plot(np.log(reconst_pspec))
Out[18]:
In [19]:
coef = mcep2coef(fft_mcep, alpha)
reconst_mcep = coef2mcep(coef, alpha)
In [20]:
plt.plot(fft_mcep)
plt.plot(reconst_mcep)
plt.plot(coef)
Out[20]:
In [21]:
from pysas.excite import ExcitePulse
In [22]:
ep = ExcitePulse(16000, 80, False)
In [23]:
plt.plot(ep.gen(f0))
Out[23]:
In [24]:
display(Audio(data=np.int16(ep.gen(f0) * 700), rate=16000))
In [25]:
from pysas.synthesis.mlsa import MLSAFilter
from pysas.synthesis import Synthesis
from pysas.mcep import spec2mcep_from_matrix
In [26]:
cpestrum_dim = 128
mcep_mat = spec2mcep_from_matrix(spec_mat, cpestrum_dim, alpha)
In [27]:
coef_mat = []
for i in range(mcep_mat.shape[0]):
coef_mat.append(mcep2coef(mcep_mat[i], alpha))
coef_mat = np.array(coef_mat)
In [28]:
mlsa = MLSAFilter(cpestrum_dim, alpha, 5)
syn = Synthesis(80, mlsa)
pulse = ep.gen(f0)
In [29]:
synth = syn.synthesis(pulse, coef_mat)
In [30]:
plt.plot(synth)
plt.plot(signal)
Out[30]:
In [31]:
display(Audio(data=np.int16(signal * 32767.0), rate=16000))
In [32]:
display(Audio(data=np.int16(synth * 32767.0), rate=16000))
In [33]:
mlsa = MLSAFilter(cpestrum_dim, alpha*1.3 , 5)
syn = Synthesis(80, mlsa)
pulse = ep.gen(f0 * 0.7)
a = syn.synthesis(pulse, coef_mat)
display(Audio(data=np.int16(a * 32767.0), rate=16000))
In [ ]: