Chapter 10: Sound Sharing and Retreival

a) Create Audio Database


In [ ]:
import os
import pandas as pd
import numpy as np
import freesound
from whoosh.fields import Schema, ID, TEXT, KEYWORD, NUMERIC
from whoosh.index import create_in
try:
    from freesound_apikey import FREESOUND_API_KEY
except ImportError:
    print('Can\'t load your Freesound API key!\nPlease request an API key at http://freesound.org/apiv2/apply/ and paste it in a file named \'freesound_apikey.py\'.')

In [ ]:
def create_text_index():
    schema = Schema(
        name = TEXT,
        description = TEXT,
        username = TEXT,
        tags = KEYWORD,
        duration = NUMERIC,
        license = TEXT,
        pandas_index = ID(stored=True),
    )
    text_index = create_in("text_index", schema)
    return text_index

In [ ]:
def replace_license_label(record):
    short_name = ''  # Required for using as whoosh facets
    if 'publicdomain' in record['license']: short_name = 'CC_0'
    elif 'by/3.0' in record['license']: short_name = 'CC_BY'
    elif 'by-nc/3.0' in record['license']: short_name = 'CC_BY_NC'
    elif 'sampling+' in record['license']: short_name = 'S_Plus'
    record['license'] = short_name
    return record

In [ ]:
def query_freesound(q):
    freesound_client = freesound.FreesoundClient()
    freesound_client.set_token(FREESOUND_API_KEY)
    pager = freesound_client.text_search(
        query = q,
        fields = "id,name,tags,username,analysis,duration," \
            + "description,license,previews",
        descriptors = "lowlevel.mfcc.mean,lowlevel.mfcc.var," \
            + "lowlevel.spectral_centroid.mean",
        group_by_pack = 1,
        page_size = 50
    )
    return [sound for sound in pager if sound.analysis]

In [ ]:
def make_pandas_record(fs_object):
    record = {key: fs_object.as_dict()[key]
              for key in metadata_fields}
    record["path"] = "files/" + \
        fs_object.previews.preview_lq_mp3.split("/")[-1]
    lowlevel = fs_object.analysis.lowlevel
    for index, val in enumerate(lowlevel.mfcc.mean):
        record["mfcc_mean_%i"%index] = val
    for index, val  in enumerate(lowlevel.mfcc.var):
        record["mfcc_var_%i"%index] = val
    record["spectral_centroid"] = lowlevel.spectral_centroid.mean
    return replace_license_label(record)

In [ ]:
def make_whoosh_record(fs_object, pandas_index):
    record = {key: fs_object.as_dict()[key]
              for key in metadata_fields}
    record["pandas_index"] = str(pandas_index)
    return replace_license_label(record)

In [ ]:
def make_db():
    sounds = sum([query_freesound(animal)
        for animal in animal_sounds],[])
    for sound in sounds:
        sound.retrieve_preview("files/")
    data_frame =  pd.DataFrame(
        [make_pandas_record(s) for s in sounds])
    text_index = create_text_index()
    writer = text_index.writer()
    for index, sound in enumerate(sounds):
        text_document = make_whoosh_record(sound, index)
        writer.add_document(**text_document)
    writer.commit()
    data_frame.to_csv('database.csv')
    print('Created dataset with %i sounds!' % len(sounds))

In [ ]:
# Make the db with animal sounds

animal_sounds = ["dog bark", "cat meow", "lion roar", "nightingale"]
metadata_fields = ["name", "tags", "username",
                  "description","duration", "license"]
if not os.path.exists("text_index"): os.mkdir("text_index")
if not os.path.exists("files"): os.mkdir("files")

make_db()