Notebook initialization


In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from IPython.display import Image, display, Audio
import librosa

Import dataset


In [ ]:
predicted = pd.read_csv('../data/output/predicted/unlabeled_predicted.csv')
predicted.head()

Split into groups


In [ ]:
p = predicted
pred_discard = p[p.pred_label==1]
pred_review = p[p.pred_label==0]

In [ ]:
## Sort by length

Check longest discared audios


In [ ]:
p = pred_discard.sort(['length'], ascending=[0])
top = p.head(10)
top

In [ ]:
for index, row in top.iterrows():
    display(Image(row['image_file']))

In [ ]:
data, sr = librosa.load(top.iloc[0]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(top.iloc[1]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(top.iloc[2]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(top.iloc[3]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(top.iloc[4]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(top.iloc[5]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(top.iloc[6]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(top.iloc[7]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(top.iloc[8]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(top.iloc[9]['audio_file'], sr=None)
Audio(data=data, rate=sr)

Random check discarded audios


In [ ]:
# Discarded sample
p = pred_discard
sample = p.sample(5)

In [ ]:
for index, row in sample.iterrows():
    display(Image(row['image_file']))

In [ ]:
data, sr = librosa.load(sample.iloc[0]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(sample.iloc[1]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(sample.iloc[2]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(sample.iloc[3]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(sample.iloc[4]['audio_file'], sr=None)
Audio(data=data, rate=sr)

Balanced vs Unbalanced check


In [ ]:
p = predicted
diff = p[p.pred_label != p.pred_label90]
diff.head()

In [ ]:
d = diff.sort(['length'], ascending=[0])
top = d.head(10)
top

In [ ]:
for index, row in top.iterrows():
    display(Image(row['image_file']))

In [ ]:
data, sr = librosa.load(top.iloc[0]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]:
data, sr = librosa.load(top.iloc[1]['audio_file'], sr=None)
Audio(data=data, rate=sr)

In [ ]: