In [2]:
import matplotlib.pyplot as plt
from scipy.io import wavfile as wav
import numpy as np
import IPython
%matplotlib inline

In [18]:
DIR = 'data/'
FILENAME = 'Intro'
EXT = '.mp3'

In [5]:
# This file is part of audioread.
# Copyright 2011, Adrian Sampson.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
# 
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.

"""Command-line tool to decode audio files to WAV files."""
from __future__ import print_function
import audioread
import sys
import os
import wave
import contextlib

def decode(filename):
    filename = os.path.abspath(os.path.expanduser(filename))
    if not os.path.exists(filename):
        print("File not found.", file=sys.stderr)
        sys.exit(1)

    try:
        with audioread.audio_open(filename) as f:
            print('Input file: %i channels at %i Hz; %.1f seconds.' % \
                  (f.channels, f.samplerate, f.duration),
                  file=sys.stderr)
            print('Backend:', str(type(f).__module__).split('.')[1],
                  file=sys.stderr)

            with contextlib.closing(wave.open(filename + '.wav', 'w')) as of:
                of.setnchannels(f.channels)
                of.setframerate(f.samplerate)
                of.setsampwidth(2)

                for buf in f:
                    of.writeframes(buf)

    except audioread.DecodeError:
        print("File could not be decoded.", file=sys.stderr)

In [19]:
decode(DIR + FILENAME + EXT)


Input file: 2 channels at 44100 Hz; 44.0 seconds.
Backend: macca

In [20]:
WAV_FILE = DIR + FILENAME + '.wav'

In [8]:
rate, data = wav.read(WAV_FILE)

In [9]:
plt.plot(data)
plt.show()



In [11]:
print('audio clip is %f seconds' % (data.shape[0] // rate))


audio clip is 43.000000 seconds

In [12]:
IPython.display.Audio(WAV_FILE)


Out[12]:

In [13]:
# Generate a sound
framerate = 44100
t = np.linspace(0,5,framerate*5)
data2 = np.sin(2*np.pi*220*t) + np.sin(2*np.pi*224*t)
IPython.display.Audio(data2,rate=framerate)


Out[13]:

In [14]:
text = "Let's say you're a college freshman and you're choosing a major. You're"
words = text.split()
print("%d WORDS" % len(words))
IPython.display.Audio(data[0:framerate*4, 0], rate=framerate)


12 WORDS
Out[14]:

In [22]:
wav.write(DIR + FILENAME + '0-4-2' + '.wav', rate, data[0:framerate*4, :])

In [54]:
plt.plot(data[0:framerate*4, 0])
plt.show()



In [15]:
ac = data[0:framerate*4, 0]
print('Median: %f' % np.median(ac))
threshold = .3
print('Threshold: %f' % (threshold) )
markers = []
ON = False
for i in range(100, ac.shape[0] - 100, framerate//30):
    vals = np.mean(np.abs((ac[i-50:i+50] - np.mean(ac))/ np.std(ac)))
    if vals > threshold:
        if not ON:
            markers.append([i, ac[i] ])
            ON = True
    else:
        ON = False
print(markers)


Median: 0.000000
Threshold: 0.300000
[[10390, 2551], [19210, -992], [48610, 1365], [51550, 889], [64780, -949], [69190, 1110], [80950, -1146], [85360, -302], [88300, -949], [104470, 1152], [120640, -5200], [125050, -242], [169150, 81], [175030, -106]]

In [16]:
x, y = zip(*markers)
fig, ax = plt.subplots(1, 1, figsize=(10, 4))
ax.plot(ac)
ax.plot(x,y, 'ro')
fig.show()


/Library/Python/2.7/site-packages/matplotlib/figure.py:397: UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "

In [120]:
IPython.display.Audio(ac[11120-1000:18834-1000], rate=framerate)


Out[120]:

In [121]:
IPython.display.Audio(ac[19210-1000:48610], rate=framerate)


Out[121]:

In [148]:
IPython.display.Audio(ac[125050:169150], rate=framerate)


Out[148]:

In [59]:
rate, zdata = wav.read('data/Zach2.wav')

In [60]:
IPython.display.Audio(zdata[framerate*0:framerate*4, 0], rate=framerate)


Out[60]:

In [61]:
plt.plot(zdata[0:framerate*4, 0])
plt.show()



In [132]:
text = "Let's say you're a"
audioChunk = zdata[0:framerate*4.5, 0]
print('Median: %f' % np.median(audioChunk))
threshold = .3 # 61% for cumulative z values
print('Threshold: %f' % (threshold) )
markers = []
ON = False
for i in range(100, audioChunk.shape[0] - 100, framerate//50):
    vals = np.mean(np.abs((audioChunk[i-100:i+100] - np.mean(audioChunk))/ np.std(audioChunk)))
    if vals > threshold:
        if not ON:
            markers.append([i, audioChunk[i] ])
            ON = True
    else:
        ON = False
print(markers)


Median: 0.000011
Threshold: 0.300000
[[21268, -0.018286552], [68896, 0.043553524], [112114, -0.011792274], [162388, 0.011323638]]
/Library/Python/2.7/site-packages/ipykernel/__main__.py:2: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  from ipykernel import kernelapp as app

In [133]:
x, y = zip(*markers)
fig, ax = plt.subplots(1, 1, figsize=(10, 4))
ax.plot(audioChunk)
ax.plot(x,y, 'ro')
fig.show()



In [134]:
text = "Let's say you're a"
audioChunk = zdata[0:framerate*4.5, 0]
print('Median: %f' % np.median(audioChunk))
threshold = np.mean(audioChunk) + .3 * np.std(audioChunk)
print('Threshold: %f' % (threshold) )
markers = []
ON = False
for i in range(100, audioChunk.shape[0] - 100, framerate//40):
    vals = np.mean(np.abs(audioChunk[i-100:i+100]))
    if vals > threshold:
        if not ON:
            markers.append([i, audioChunk[i] ])
            ON = True
    else:
        ON = False
print(markers)


Median: 0.000011
Threshold: 0.006349
[[22140, 0.0023543711], [69526, 0.090782695], [111402, -0.0097981365], [163196, -0.010089604]]
/Library/Python/2.7/site-packages/ipykernel/__main__.py:2: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  from ipykernel import kernelapp as app

In [135]:
x, y = zip(*markers)
fig, ax = plt.subplots(1, 1, figsize=(10, 4))
ax.plot(audioChunk)
ax.plot(x,y, 'ro')
fig.show()



In [115]:
IPython.display.Audio(audioChunk[22140-10000:69526-10000], rate=framerate)


Out[115]:

In [116]:
IPython.display.Audio(audioChunk[69526-10000:112504-10000], rate=framerate)


Out[116]:

In [117]:
IPython.display.Audio(audioChunk[112504-10000: 163196-10000], rate=framerate)


Out[117]:

In [118]:
IPython.display.Audio(audioChunk[163196-10000: ], rate=framerate)


Out[118]:

In [ ]: