notebook.community

Edit and run



In [ ]:

    
%matplotlib inline
# http://stackoverflow.com/questions/29452582/matplotlib-valueerror-width-and-height-must-each-be-below-32768-in-ipython-no
%config InlineBackend.print_figure_kwargs={'bbox_inches':None}

import numpy as np
import matplotlib.pyplot as plt
import librosa
from IPython.display import Image, display, Audio



In [ ]:

    
import os
import sys
cwd = os.getcwd()
preprocess_path = os.path.abspath(os.path.join(cwd, '../preprocess/scripts'))
sys.path.append(preprocess_path)
from featurize import preprocess, extract_chunks, get_local_maxima_idx



In [ ]:

    
def classify_rings(smooth_data, lmax_idx):
    '''classify chunks as rings or non-rings'''
    SAMPLE_RATE = 8000
    RING_SEP_THRESHOLD = 2000
    RING_AMP_DIFF_THRESHOLD = 0.02
    
    ring_ref_amp = 0
    rings = np.zeros_like(lmax_idx)
    local_maxima = smooth_data[lmax_idx]
    # Take the time difference between each local maxima of the signal
    diff = np.diff(lmax_idx, n=1)

    ring_candidates = np.where((diff >= (SAMPLE_RATE*5)-RING_SEP_THRESHOLD) &
                               (diff <= (SAMPLE_RATE*5)+RING_SEP_THRESHOLD))[0]
    print('ring candidates: {}'.format(len(ring_candidates)+1))
    for idx in ring_candidates:
        # Check differences between the local maximum amplitudes is small
        if not ring_ref_amp:
            ring_ref_amp = local_maxima[idx]
        amp_lower = (ring_ref_amp - (RING_AMP_DIFF_THRESHOLD))
        amp_upper = (ring_ref_amp + (RING_AMP_DIFF_THRESHOLD))
        if (amp_lower <= local_maxima[idx+1] <= amp_upper):
            rings[idx] = 1
            rings[idx+1] = 1
        else:
            print('chunk in pos {} with amp: {} did not meet the amplitude criteria'.format(lmax_idx[idx],
                                                                                            local_maxima[idx]))
    return rings



In [ ]:

    
# known to have good rings:
wav_path = '/Users/jjelosua/Developer/lanacion/ML_audio_classification/data/output/wavs/train/B-1009-2012-12-26-121627-4.wav'
img_path = '/Users/jjelosua/Developer/lanacion/ML_audio_classification/data/output/images/train/B-1009-2012-12-26-121627-4.png'



In [ ]:

    
im = Image(filename=img_path)
display(im)



In [ ]:

    
# load audio file with librosa
data, sr = librosa.load(wav_path, sr=None)



In [ ]:

    
# Play it back with IPython.display.Audio!
Audio(data=data, rate=sr)



In [ ]:

    
smooth_data = preprocess(wav_path)
silences, nonsilences = extract_chunks(smooth_data)
plt.plot(smooth_data)



In [ ]:

    
# Classify rings
lmax_idx = get_local_maxima_idx(nonsilences)
rings = classify_rings(smooth_data, lmax_idx)
rings



In [ ]:

    
# Plot audio with detected rings
smooth_data = preprocess(wav_path)
silences, nonsilences = extract_chunks(smooth_data)
plt.ylim([0, 1.2])
for i, (pos, ns) in enumerate(nonsilences):
    x = pos
    # Check if it is a ring
    if rings[i]:
        plt.text(x=x-8000, y=1.1, s='RING')
    else:
        plt.text(x=x+1000, y=1.1, s=i)
plt.plot(smooth_data)



In [ ]: