In [ ]:
from __future__ import print_function
import msaf
import librosa
import seaborn as sns
# and IPython.display for audio output
import IPython.display
# Setup nice plots
sns.set(style="dark")
%matplotlib inline
This mode analyzes one audio file at a time.
Note: Make sure to download the datasets from https://github.com/urinieto/msaf-data/
In [ ]:
# Choose an audio file and listen to it
audio_file = "../datasets/Sargon/audio/01-Sargon-Mindless.mp3"
IPython.display.Audio(filename=audio_file)
In [ ]:
# Segment the file using the default MSAF parameters
boundaries, labels = msaf.process(audio_file)
print(boundaries)
In [ ]:
# Sonify boundaries
sonified_file = "my_boundaries.wav"
sr = 44100
boundaries, labels = msaf.process(audio_file, sonify_bounds=True,
out_bounds=sonified_file, out_sr=sr)
# Listen to results
audio = librosa.load(sonified_file, sr=sr)[0]
IPython.display.Audio(audio, rate=sr)
MSAF includes multiple algorithms both for boundary retrieval and structural grouping (or labeling). In this section we demonstrate how to try them out.
Note: more algorithms are available in msaf-gpl.
In [ ]:
# First, let's list all the available boundary algorithms
print(msaf.get_all_boundary_algorithms())
In [ ]:
# Try one of these boundary algorithms and print results
boundaries, labels = msaf.process(audio_file, boundaries_id="foote", plot=True)
In [ ]:
# Let's check all the structural grouping (label) algorithms available
print(msaf.get_all_label_algorithms())
In [ ]:
# Try one of these label algorithms
boundaries, labels = msaf.process(audio_file, boundaries_id="foote", labels_id="fmc2d")
print(boundaries)
print(labels)
In [ ]:
# If available, you can use previously annotated boundaries and a specific labels algorithm
# Set plot = True to plot the results
boundaries, labels = msaf.process(audio_file, boundaries_id="foote",
labels_id="fmc2d", plot=True)
In [ ]:
# Let's check what available features are there in MSAF
print(msaf.features_registry)
In [ ]:
# Segment the file using the Foote method for boundaries, C-NMF method for labels, and MFCC features
boundaries, labels = msaf.process(audio_file, feature="mfcc", boundaries_id="gt",
labels_id="fmc2d", plot=True)
MSAF can calculate the beats or use annotated ones. The annotations should be store in a jams file, for this notebook we used a simple jams example.
In [ ]:
sr = 44100
hop_length = 1024
beats_audio_file = "../datasets/Sargon/audio/02-Sargon-Shattered World.mp3"
audio = librosa.load(beats_audio_file, sr=sr)[0]
audio_harmonic, audio_percussive = librosa.effects.hpss(audio)
# Compute beats
tempo, frames = librosa.beat.beat_track(y=audio_percussive,
sr=sr, hop_length=hop_length)
# To times
beat_times = librosa.frames_to_time(frames, sr=sr,
hop_length=hop_length)
In [ ]:
# We will now save or beats to a JAMS file.
import jams
jam = jams.JAMS()
jam.file_metadata.duration = len(audio_file)/sr
beat_a = jams.Annotation(namespace='beat')
beat_a.annotation_metadata = jams.AnnotationMetadata(data_source='librosa beat tracker')
# Add beat timings to the annotation record.
# The beat namespace does not require value or confidence fields,
# so we can leave those blank.
for t in beat_times:
beat_a.append(time=t, duration=0.0)
# Store the new annotation in the jam file. This need to be located on the references folder
# and be named like the audio file except for the jams extension.
jam.annotations.append(beat_a)
jam.save('../datasets/Sargon/references/01-Sargon-Mindless.jams')
In [ ]:
# Using the annotated beats then is straight forward.
# Just be sure you don't have a temporary features file in the directory.
boundaries, labels = msaf.process(audio_file, boundaries_id="foote",
annot_beats=True, labels_id="fmc2d", plot=True)
In [ ]:
# Evaluate the results. It returns a pandas data frame.
evaluations = msaf.eval.process(audio_file, boundaries_id="foote", labels_id="fmc2d")
IPython.display.display(evaluations)
In [ ]:
# First, check which are foote's algorithm parameters:
print(msaf.algorithms.foote.config)
In [ ]:
# play around with IPython.Widgets
from ipywidgets import interact
# Obtain the default configuration
bid = "foote" # Boundaries ID
lid = None # Labels ID
feature = "pcp"
config = msaf.io.get_configuration(feature, annot_beats=False, framesync=False,
boundaries_id=bid, labels_id=lid)
# Sweep M_gaussian parameters
@interact(M_gaussian=(50, 500, 25))
def _run_msaf(M_gaussian):
# Set the configuration
config["M_gaussian"] = M_gaussian
# Segment the file using the Foote method, and Pitch Class Profiles for the features
results = msaf.process(audio_file, feature=feature, boundaries_id=bid,
config=config, plot=True)
# Evaluate the results. It returns a pandas data frame.
evaluations = msaf.eval.process(audio_file, feature=feature, boundaries_id=bid,
config=config)
IPython.display.display(evaluations)
In [ ]:
dataset = "../datasets/Sargon/"
results = msaf.process(dataset, n_jobs=1, boundaries_id="foote")
In [ ]:
# Evaluate in collection mode
evaluations = msaf.eval.process(dataset, n_jobs=4, boundaries_id="foote")
IPython.display.display(evaluations)
IPython.display.display(evaluations.mean())