In [1]:
import csv
import glob
import jams
import librosa
import os

import msaf
from msaf.input_output import FileStruct

In [2]:
# Set dataset paths
ds_dir = ".."
original_ref_dir = "../original_references/"

# Get audio files
audio_files = glob.glob(os.path.join(ds_dir, msaf.Dataset.audio_dir, "*.mp3"))

# Make sure that the references folder exist
msaf.utils.ensure_dir(os.path.join(ds_dir, msaf.Dataset.references_dir))

In [13]:
def get_duration(audio_file):
    y, sr = librosa.load(audio_file)
    return len(y) / float(sr)

def fill_global_metadata(jam, csv_file, dur):
    """Fills the global metada into the JAMS jam."""
    jam.file_metadata.artist = "Sargon"
    jam.file_metadata.duration = dur  # In seconds
    jam.file_metadata.title = os.path.basename(csv_file).split("-")[2].split(".")[0]
    jam.file_metadata.release = "Mirage"

def fill_annotation_metadata(annot):
    """Fills the annotation metadata."""
    annot.annotation_metadata.corpus = "Sargon"
    annot.annotation_metadata.version = "1.0"
    annot.annotation_metadata.annotation_tools = "Sonic Visualizer"
    annot.annotation_metadata.annotation_rules = "SALAMI guidelines"
    annot.annotation_metadata.data_source = "Jarlem All Studios"
    annot.annotation_metadata.annotator.name = "Oriol Nieto"
    annot.annotation_metadata.annotator.email = "oriol.nieto@gmail.com"
    annot.annotation_metadata.curator.name = "Oriol Nieto"
    annot.annotation_metadata.curator.email = "oriol.nieto@gmail.com"

def fill_section_annotation(csv_file, jam):
    """Fills the JAMS annot annotation given a csv file."""
    # Create Annotation
    annot = jams.Annotation(namespace='segment_open')
    
    # Annotation Metadata
    fill_annotation_metadata(annot)

    # Add actual data
    with open(orig_ref) as csv_file:
        bounds = list(csv.reader(csv_file, delimiter=','))
        for i, bound in enumerate(bounds[:-1]):
            dur = float(bounds[i+1][0]) - float(bound[0])
            annot.append(time=float(bound[0]), duration=dur,
                       value=str(bound[1]))
    
    # Add Annotation to JAMS
    jam.annotations.append(annot)

In [14]:
# Parse CSV
for audio_file in audio_files:
    print("Parsing %s..." % audio_file)
    file_struct = FileStruct(audio_file)
    orig_ref = os.path.join(original_ref_dir, os.path.basename(audio_file)[:-3] + "csv")
    
    # New JAMS and annotation
    jam = jams.JAMS()

    # Global file metadata
    fill_global_metadata(jam, orig_ref, get_duration(audio_file))
    
    # Create Section annotations
    fill_section_annotation(orig_ref, jam)
        
    # Save JAMS
    jam.save(file_struct.ref_file)


Parsing ../audio/01-Sargon-Mindless.mp3...
Parsing ../audio/02-Sargon-Shattered World.mp3...
Parsing ../audio/03-Sargon-Waiting For Silence.mp3...
Parsing ../audio/04-Sargon-The Curse Of Akkad.mp3...