Import modules


In [1]:
from pyvad import vad, trim, split
from librosa import load
import matplotlib.pyplot as plt
import numpy as np
import IPython.display

Speech data load


In [2]:
name = "test/voice/arctic_a0007.wav"
data, fs = load(name)
time = np.linspace(0, len(data)/fs, len(data)) # time axis
plt.plot(time, data)
plt.show()


Do VAD (int)


In [3]:
%time vact = vad(data, fs, fs_vad = 16000, hop_length = 30, vad_mode=3)


CPU times: user 99.1 ms, sys: 5.39 ms, total: 105 ms
Wall time: 93.7 ms

Plot result


In [4]:
fig, ax1 = plt.subplots()

ax1.plot(time, data, label='speech waveform')
ax1.set_xlabel("TIME [s]")

ax2=ax1.twinx()
ax2.plot(time, vact, color="r", label = 'vad')
plt.yticks([0, 1] ,('unvoice', 'voice'))
ax2.set_ylim([-0.01, 1.01])

plt.legend()
plt.show()


trim


In [5]:
%time edges = trim(data, fs, fs_vad = 16000, hop_length = 30, vad_mode=3)


CPU times: user 85.8 ms, sys: 3.52 ms, total: 89.3 ms
Wall time: 92.1 ms

Plot result


In [6]:
trimed = data[edges[0]:edges[1]]
time = np.linspace(0, len(trimed)/fs, len(trimed)) # time axis
fig, ax1 = plt.subplots()

ax1.plot(time, trimed, label='speech waveform')
ax1.set_xlabel("TIME [s]")

plt.show()


split


In [7]:
%time edges = split(data, fs, fs_vad = 8000, hop_length = 10, vad_mode=3)


CPU times: user 82.9 ms, sys: 4.07 ms, total: 87 ms
Wall time: 87.1 ms

Plot result


In [8]:
for i, edge in enumerate(edges):
    seg = data[edge[0]:edge[1]]
    time = np.linspace(0, len(seg)/fs, len(seg)) # time axis
    
    fig, ax1 = plt.subplots()

    ax1.plot(time, seg, label='speech waveform')
    ax1.set_xlabel("TIME [s]")

    plt.show()



In [ ]: