Fundamentals of audio and music analysis

Open source libraries

Python

  • librosa (ISC / MIT licensed)
  • pyaudio (MIT licensed)
    • portaudio

Prepare sound for analysis

NOTE: Either record your own voice or import a sample from file


In [ ]:
import pyaudio
import wave

[Optional] Recording sound


In [2]:
# In this step, find out the device id to be used in recording
#    Chose the device with input type and microphone in its name
p = pyaudio.PyAudio()
info = p.get_host_api_info_by_index(0)
nb_devices = info.get('deviceCount')
# List all devices ids and names
for i in range (0, nb_devices):
        if p.get_device_info_by_host_api_device_index(0,i).get('maxInputChannels') > 0:
                print "Id:[%d]\tType:[Input]\tName:[%s] " % (i, p.get_device_info_by_host_api_device_index(0,i).get('name'))
        if p.get_device_info_by_host_api_device_index(0,i).get('maxOutputChannels') > 0:
                print "Id:[%d]\tType:[Output]\tName:[%s] " % (i, p.get_device_info_by_host_api_device_index(0,i).get('name'))


Id:[0]	Type:[Input]	Name:[Built-in Microph] 
Id:[1]	Type:[Output]	Name:[Built-in Output] 

In [3]:
# Set the params for recorder
INPUT_DEVICE_ID = 0
CHUNK = 1024 # how many samples in a frame that stream will read
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100  # Sample rate
RECORD_SECONDS = 2
WAVE_OUTPUT_FILENAME = "recorded_audio.wav"

In [4]:
# Check if the params supported by your hardware
p = pyaudio.PyAudio()
devinfo = p.get_device_info_by_index(INPUT_DEVICE_ID)

if not p.is_format_supported(float(RATE),  
                         input_device=INPUT_DEVICE_ID,
                         input_channels=CHANNELS,
                         input_format=FORMAT):
    print "Parameters not supported, please try different values"
p.terminate()

In [6]:
# Record the audio and save it as a wave file
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                input_device_index=INPUT_DEVICE_ID,
                frames_per_buffer=CHUNK)

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()


* recording
* done recording

Importing sound file


In [7]:
SOUND_PATH = "recorded_audio.wav"

In [ ]: