In [1]:
from pyvad import vad, trim, split
from librosa import load
import matplotlib.pyplot as plt
import numpy as np
import IPython.display
In [2]:
name = "test/voice/arctic_a0007.wav"
data, fs = load(name)
time = np.linspace(0, len(data)/fs, len(data)) # time axis
plt.plot(time, data)
plt.show()
In [3]:
%time vact = vad(data, fs, fs_vad = 16000, hop_length = 30, vad_mode=3)
In [4]:
fig, ax1 = plt.subplots()
ax1.plot(time, data, label='speech waveform')
ax1.set_xlabel("TIME [s]")
ax2=ax1.twinx()
ax2.plot(time, vact, color="r", label = 'vad')
plt.yticks([0, 1] ,('unvoice', 'voice'))
ax2.set_ylim([-0.01, 1.01])
plt.legend()
plt.show()
In [5]:
%time edges = trim(data, fs, fs_vad = 16000, hop_length = 30, vad_mode=3)
In [6]:
trimed = data[edges[0]:edges[1]]
time = np.linspace(0, len(trimed)/fs, len(trimed)) # time axis
fig, ax1 = plt.subplots()
ax1.plot(time, trimed, label='speech waveform')
ax1.set_xlabel("TIME [s]")
plt.show()
In [7]:
%time edges = split(data, fs, fs_vad = 8000, hop_length = 10, vad_mode=3)
In [8]:
for i, edge in enumerate(edges):
seg = data[edge[0]:edge[1]]
time = np.linspace(0, len(seg)/fs, len(seg)) # time axis
fig, ax1 = plt.subplots()
ax1.plot(time, seg, label='speech waveform')
ax1.set_xlabel("TIME [s]")
plt.show()
In [ ]: