In [1]:
## Suppresses text output from this cell
#%%capture

## Install the latest version of Audio Tagging Toolkit
#!pip install -U git+git://github.com/hipstas/audio-tagging-toolkit.git

## Or ...
#!pip install -U attk

In [ ]:
import attk
import os
import random

In [3]:
## Download an MP3 audio file

!wget https://media.sas.upenn.edu/pennsound/authors/Armantrout/WPS1/Armantrout-Rae_06_Way_WPS1_NY_5-10-06.mp3

media_path='./Armantrout-Rae_06_Way_WPS1_NY_5-10-06.mp3'


--2017-06-29 00:30:42--  https://media.sas.upenn.edu/pennsound/authors/Armantrout/WPS1/Armantrout-Rae_06_Way_WPS1_NY_5-10-06.mp3
Resolving media.sas.upenn.edu (media.sas.upenn.edu)... 128.91.234.99
Connecting to media.sas.upenn.edu (media.sas.upenn.edu)|128.91.234.99|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 442220 (432K) [audio/mpeg]
Saving to: 'Armantrout-Rae_06_Way_WPS1_NY_5-10-06.mp3'

100%[======================================>] 442,220      533KB/s   in 0.8s   

2017-06-29 00:30:44 (533 KB/s) - 'Armantrout-Rae_06_Way_WPS1_NY_5-10-06.mp3' saved [442220/442220]


In [4]:
## Create a temporary WAV copy of the MP3 and assign its pathname to a variable

wav_pathname = attk.temp_wav_path(media_path)

In [5]:
## View the pathname of our temporary file

wav_pathname


Out[5]:
'/var/tmp/Armantrout-Rae_06_Way_WPS1_NY_5-10-06.mp3_temp_0889673246084.wav'

In [6]:
## Return file duration in seconds via ffprobe
## (accepts just about any audio or video format)

dur_secs = attk.duration(wav_pathname)

dur_secs


Out[6]:
27.506939

In [7]:
## Extract 12 Mel Frequency Ceptstral Coefficients (0th coefficient removed)
## and display 2 bins' worth of values

mfccs = attk.get_mfccs(wav_pathname)

mfccs[:2]


Out[7]:
[array([ 4.88912813,  4.86148326,  4.81562052,  4.75185603,  4.67062843,
         4.57249505,  4.45812703,  4.32830342,  4.18390437,  4.02590352,
         3.85535948,  3.67340672]),
 array([ 18.16073429,   8.14900296,   6.44380459,  12.19986734,
         15.50696027,  13.35756596,  11.44315126,  12.43646308,
         12.51991313,   9.88495127,   8.08871926,   9.10315133])]

In [8]:
## Extract MFCC + delta + delta delta features and display 2 bins' worth of values

mfcc_d = attk.get_mfccs_and_deltas(wav_pathname)

mfcc_d[:2]


Out[8]:
[[4.8891281260629844,
  4.8614832582619174,
  4.8156205244252135,
  4.7518560322128582,
  4.6706284303648511,
  4.5724950536127373,
  4.4581270325582452,
  4.3283034173542285,
  4.1839043737809876,
  4.0259035194587414,
  3.8553594763372412,
  3.6734067232016621,
  4.8133559775583272,
  1.191450850108303,
  -0.37226854968814715,
  0.70426170253225506,
  1.3104868391712103,
  3.1379218456983469,
  1.0520799309531368,
  1.2556289226073438,
  0.42873379881270718,
  1.6618632549441084,
  1.6488683559831299,
  0.37049697506437612,
  1.3496791503333647,
  9.3681404046226966,
  0.17473049804926169,
  -0.10540146256397029,
  -0.059892229159298711,
  0.096238164683471419,
  0.29356124001780193,
  0.11424387052939833,
  0.070216121476321974,
  0.021726291675793388,
  0.13357155846388596,
  0.12829685295503751,
  0.024527303256040835,
  0.094207834558294873],
 [18.160734290678111,
  8.1490029616752189,
  6.4438045925032785,
  12.19986734052689,
  15.506960266105509,
  13.357565960054863,
  11.443151264102273,
  12.43646307962644,
  12.519913128930284,
  9.8849512728161084,
  8.0887192596856661,
  9.1031513339433161,
  6.203789104693751,
  1.8987209627566199,
  -0.8861754537057045,
  1.6747019226932673,
  1.9702285274410731,
  2.7249624930001612,
  1.2145782036504609,
  0.83047540111262785,
  0.43505467547740806,
  2.0507632078055882,
  1.7536039422009744,
  0.43334766698622562,
  0.60663420154143632,
  6.213678633758243,
  0.2361300933597737,
  -0.1312648248802416,
  0.07807172078227452,
  0.19678022911582593,
  0.38335304269406217,
  0.15425327932382632,
  0.10377621611321114,
  0.039497946124638653,
  0.22757743342251724,
  0.20653843075831263,
  0.044364220090983536,
  0.10642860907378415]]

Working with ML classifier output values


In [9]:
## Apply a rolling average to a list of values
## default window size: 10

smoothed_vals = attk.smooth([1,2,3,4,3,2,3,4,19,3,2,3,4,3,2,1,1,1,8,1,1,1,2,3,3,4,4,4,4,2])

smoothed_vals


Out[9]:
array([ 1.        ,  1.73248847,  2.33019795,  2.721414  ,  2.9310428 ,
        3.43288806,  4.51203204,  5.7155214 ,  6.4802301 ,  6.44834244,
        5.634779  ,  4.42021517,  3.32096873,  2.721414  ,  2.35619301,
        2.0582609 ,  2.07100367,  2.31047366,  2.53464488,  2.53464488,
        2.31047366,  2.04500861,  1.99243909,  2.33333333,  2.88218807,
        3.35619301,  3.63753622,  3.64656421,  3.35305762,  2.76437614])

In [10]:
## Apply a rolling average to a list of binary classifier output values
## default window size: 10

smoothed_classes = attk.smooth([0.1, 0.01, 1.0, 0.7, 0.99, 0.2, 0.7, 0.9, 0.1, 0.1, 0.2, 0.1])

smoothed_classes


Out[10]:
array([ 0.1       ,  0.29369676,  0.46122841,  0.58139416,  0.65541621,
        0.66711531,  0.61101733,  0.51831019,  0.4139161 ,  0.30768963,
        0.20800317,  0.1331665 ])

In [11]:
## Round values to the nearest whole number using list comprehension notation

rounded_classes = [round(item) for item in smoothed_classes]

rounded_classes


Out[11]:
[0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0]

In [12]:
## Convert a list of labels to a list of (start, end) pairs corresponding 
## to a specified label

range_pairs = attk.labels_to_ranges(rounded_classes, label=0)

range_pairs


Out[12]:
[(0, 3), (8, 12)]

In [13]:
## Determine the approximate location of vowel sounds in speech recordings
## bin size: 512

vowel_ranges = attk.get_vowel_segments(wav_pathname)

vowel_ranges


Out[13]:
[False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 False,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 True,
 False,
 True,
 True,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 True,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 False,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 True,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 False,
 False,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 False,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 True,
 False,
 True,
 True,
 True,
 False,
 True,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 True,
 False,
 False,
 False,
 True,
 False,
 False,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 False]

In [14]:
## Quickly excerpt WAV segments from media files using moviepy

media_path = 'Armantrout-Rae_06_Way_WPS1_NY_5-10-06.mp3'

wav_path = attk.subclip(media_path,15,21,out_dir='./') # starting at 15 seconds, ending at 21 seconds


[MoviePy] Writing audio in ./Armantrout-Rae_06_Way_WPS1_NY_5-10-06__15.0_21.0.wav
100%|██████████| 133/133 [00:00<00:00, 211.15it/s]
[MoviePy] Done.


In [15]:
## Create a WAV excerpt without specifying a destination directory
## (writes excerpt to directory containing the specified media file)

attk.subclip(media_path, 0.5, 15.75)


[MoviePy] Writing audio in Armantrout-Rae_06_Way_WPS1_NY_5-10-06__0.5_15.75.wav
100%|██████████| 337/337 [00:01<00:00, 236.31it/s]
[MoviePy] Done.

Out[15]:
'Armantrout-Rae_06_Way_WPS1_NY_5-10-06__0.5_15.75.wav'

In [ ]:
## View files in the current working directory

!ls

In [16]:
## Move media files into a new directory

!mkdir test_dir
!mv Armantrout-Rae_06_Way_WPS1_NY_5-10-06* test_dir/

In [ ]:
!ls test_dir/

In [17]:
## Recursively scans a given directory and returns a list of pathnames for 
## every media file present

dir_path="./test_dir"

media_paths = attk.find_media_paths(dir_path)

media_paths


Out[17]:
['./test_dir/Armantrout-Rae_06_Way_WPS1_NY_5-10-06.mp3',
 './test_dir/Armantrout-Rae_06_Way_WPS1_NY_5-10-06__0.5_15.75.wav',
 './test_dir/Armantrout-Rae_06_Way_WPS1_NY_5-10-06__15.0_21.0.wav']

In [18]:
## Excerpting first 5 seconds from every media file in a directory,

import random

media_paths=[item for item in attk.find_media_paths(dir_path) if item[-4:].lower() in ('.mp3','.wav','.mp4')]

random.shuffle(media_paths)

for media_path in media_paths:
	attk.subclip(media_path, 1.3, 3.25, out_dir = './test_dir')


[MoviePy] Writing audio in ./test_dir/Armantrout-Rae_06_Way_WPS1_NY_5-10-06__15.0_21.0__1.3_3.25.wav
100%|██████████| 43/43 [00:00<00:00, 136.91it/s]
[MoviePy] Done.

[MoviePy] Writing audio in ./test_dir/Armantrout-Rae_06_Way_WPS1_NY_5-10-06__0.5_15.75__1.3_3.25.wav
100%|██████████| 43/43 [00:00<00:00, 111.20it/s]
[MoviePy] Done.

[MoviePy] Writing audio in ./test_dir/Armantrout-Rae_06_Way_WPS1_NY_5-10-06__1.3_3.25.wav
100%|██████████| 43/43 [00:00<00:00, 84.40it/s]
[MoviePy] Done.

In [ ]:
!ls

In [ ]:
## Be sure to delete your temp file when you're finished using it.

os.remove(temp_media_path)