In [1]:
import csv
import glob
import jams
import librosa
import os
import msaf
from msaf.input_output import FileStruct
In [2]:
# Set dataset paths
ds_dir = ".."
original_ref_dir = "../original_references/"
# Get audio files
audio_files = glob.glob(os.path.join(ds_dir, msaf.Dataset.audio_dir, "*.mp3"))
# Make sure that the references folder exist
msaf.utils.ensure_dir(os.path.join(ds_dir, msaf.Dataset.references_dir))
In [13]:
def get_duration(audio_file):
y, sr = librosa.load(audio_file)
return len(y) / float(sr)
def fill_global_metadata(jam, csv_file, dur):
"""Fills the global metada into the JAMS jam."""
jam.file_metadata.artist = "Sargon"
jam.file_metadata.duration = dur # In seconds
jam.file_metadata.title = os.path.basename(csv_file).split("-")[2].split(".")[0]
jam.file_metadata.release = "Mirage"
def fill_annotation_metadata(annot):
"""Fills the annotation metadata."""
annot.annotation_metadata.corpus = "Sargon"
annot.annotation_metadata.version = "1.0"
annot.annotation_metadata.annotation_tools = "Sonic Visualizer"
annot.annotation_metadata.annotation_rules = "SALAMI guidelines"
annot.annotation_metadata.data_source = "Jarlem All Studios"
annot.annotation_metadata.annotator.name = "Oriol Nieto"
annot.annotation_metadata.annotator.email = "oriol.nieto@gmail.com"
annot.annotation_metadata.curator.name = "Oriol Nieto"
annot.annotation_metadata.curator.email = "oriol.nieto@gmail.com"
def fill_section_annotation(csv_file, jam):
"""Fills the JAMS annot annotation given a csv file."""
# Create Annotation
annot = jams.Annotation(namespace='segment_open')
# Annotation Metadata
fill_annotation_metadata(annot)
# Add actual data
with open(orig_ref) as csv_file:
bounds = list(csv.reader(csv_file, delimiter=','))
for i, bound in enumerate(bounds[:-1]):
dur = float(bounds[i+1][0]) - float(bound[0])
annot.append(time=float(bound[0]), duration=dur,
value=str(bound[1]))
# Add Annotation to JAMS
jam.annotations.append(annot)
In [14]:
# Parse CSV
for audio_file in audio_files:
print("Parsing %s..." % audio_file)
file_struct = FileStruct(audio_file)
orig_ref = os.path.join(original_ref_dir, os.path.basename(audio_file)[:-3] + "csv")
# New JAMS and annotation
jam = jams.JAMS()
# Global file metadata
fill_global_metadata(jam, orig_ref, get_duration(audio_file))
# Create Section annotations
fill_section_annotation(orig_ref, jam)
# Save JAMS
jam.save(file_struct.ref_file)