In [41]:
import numpy as np
import pandas as pd
import librosa
from tqdm import tqdm
In [42]:
wavlist = '/data/hktxt/AISHELL-2/iOS/data/wav.scp' # wav files list
trans = '/data/hktxt/AISHELL-2/iOS/data/trans.txt' # transition
spk_info = '/data/hktxt/AISHELL-2/iOS/data/spk_info.txt' #speaker info
In [43]:
wav = pd.read_csv(wavlist, header=None, sep="\s+", names=["folder","file"])
In [44]:
wav
Out[44]:
In [45]:
len(wav) # total wav files 1009223
Out[45]:
In [46]:
wav.folder[1]
Out[46]:
In [47]:
wav.folder[1:9]
Out[47]:
In [48]:
wav.folder[1][1:6]
Out[48]:
In [49]:
wav.folder[1:3]
Out[49]:
In [50]:
l = []
for i in tqdm(range(len(wav))):
l.append(wav.folder[i][1:6])
In [51]:
len(l)
Out[51]:
In [52]:
l1 = np.unique(l) #shun xu bu bian
In [53]:
len(l1)
Out[53]:
In [54]:
l2 = list(set(l))#gai bian le shun xu
In [55]:
len(l2)
Out[55]:
In [56]:
l1
Out[56]:
In [57]:
l2[:10]
Out[57]:
In [58]:
l2.sort()
In [59]:
l2[:10]
Out[59]:
In [60]:
label = {}
for i in tqdm(range(len(l2))):
label[l2[i]] = i
In [100]:
label['C0001'] #the first one
Out[100]:
In [101]:
label['D2166'] #the last one
Out[101]:
In [102]:
label['C0754']
Out[102]:
In [63]:
dataframe = pd.DataFrame({'name':list(label.keys()),'label':list(label.values())})
dataframe.to_csv("label.csv", index = False, sep=',')
In [64]:
###check
t = pd.read_csv('label.csv', sep=',')
In [65]:
t[:10]
Out[65]:
In [66]:
lt = dict(zip(t.name, t.label))
In [67]:
#print first 5 elems
print([item for item in lt.items()][:5])
In [68]:
#split dataset into train and test
from sklearn.model_selection import train_test_split
train, test = train_test_split(wav, test_size=0.2)
In [69]:
len(train)
Out[69]:
In [70]:
len(test)
Out[70]:
In [71]:
wav[1:10]
Out[71]:
In [72]:
ll = [lt[x[1:6]] for x in wav.folder]
In [73]:
len(ll)
Out[73]:
In [74]:
max(ll)
Out[74]:
In [75]:
min(ll)
Out[75]:
In [76]:
ll[499]
Out[76]:
In [77]:
ll[500]
Out[77]:
In [78]:
wav.folder[499]
Out[78]:
In [79]:
wav.folder[500]
Out[79]:
In [80]:
wav.insert(0,'label',[lt[x[1:6]] for x in wav.folder])
In [81]:
wav[495:505]
Out[81]:
In [82]:
wav[995:1005]
Out[82]:
In [83]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(wav.folder, test_size=0.2) #split data into 0.8 train and 0.2 test
train, valid = train_test_split(train, test_size=0.25) #split train(0.8) into 0.6 train and 0.2 valid
In [84]:
len(train)
Out[84]:
In [85]:
len(test)
Out[85]:
In [86]:
len(valid)
Out[86]:
In [87]:
len(train)/len(wav)
Out[87]:
In [88]:
len(test)/len(wav)
Out[88]:
In [89]:
len(valid)/len(wav)
Out[89]:
In [90]:
phase = np.random.choice([1, 2, 3], size=len(wav), p=[.9, .05, .05])
In [91]:
len(phase)
Out[91]:
In [92]:
list(phase).count(1) #0.9
Out[92]:
In [93]:
list(phase).count(2) #0.05
Out[93]:
In [94]:
list(phase).count(3) #0.05
Out[94]:
In [96]:
shuffered_wav = wav.sample(frac=1) #shuffer row
In [97]:
len(shuffered_wav)
Out[97]:
In [103]:
shuffered_wav[:10]
Out[103]:
In [104]:
# insert phase to shuffered_wav
shuffered_wav.insert(0, 'phase', phase)
In [105]:
len(shuffered_wav)
Out[105]:
In [106]:
shuffered_wav[:10]
Out[106]:
In [107]:
## sort shuffered_wav
new_wav = shuffered_wav.sort_index()
In [108]:
len(new_wav)
Out[108]:
In [109]:
new_wav[:10]
Out[109]:
In [110]:
new_wav[1000:1001]
Out[110]:
In [111]:
wav[1000:1001]
Out[111]:
In [112]:
## save to csv
dataframe = pd.DataFrame({'phase':new_wav.phase,'label':new_wav.label, 'folder':new_wav.folder, 'file':new_wav.file})
dataframe.to_csv("wav91.csv", index = False, sep=',')
In [113]:
wwav = pd.read_csv('wav91.csv', sep=',')
In [114]:
wwav[:10]
Out[114]:
In [288]:
lt = wwav.loc[wwav['file'] == 'wav/C0001/IC0001W0004.wav'].label
In [289]:
lt
Out[289]:
In [290]:
type(lt)
Out[290]:
In [286]:
llt = np.array(lt)
In [287]:
type(llt)
Out[287]:
In [291]:
int(lt)
Out[291]:
In [24]:
import numpy as np
s = np.random.randint(0, 1, size=3)
In [20]:
s
Out[20]:
In [21]:
e = s + 3
In [22]:
e
Out[22]:
In [17]:
e[0]
Out[17]:
In [26]:
np.random.randint(1, 2)
Out[26]:
In [28]:
np.random.choice()
Out[28]:
In [30]:
import os
p = 'a'
epoch_num = 2
pa = os.path.join(p, 'model_snapshot_{}.pkl'.format(epoch_num+1))
In [31]:
pa
Out[31]:
In [ ]: