In [4]:
import glob
import os
import json
import shutil

import msaf

annotator_names = ["Colin", "Eleni", "Evan", "John", "Shuli"]
spam_path = "/Users/uri/datasets/SPAM/"
spam_path = "/home/uri/datasets/SPAM/"
out_path = os.path.join(spam_path, "new_references")

In [9]:
# Read files
references = glob.glob(os.path.join(spam_path, 
                                    msaf.Dataset.references_dir,
                                    "*" + msaf.Dataset.references_ext))
audio_files = glob.glob(os.path.join(spam_path, 
                                     msaf.Dataset.audio_dir,
                                     "*.mp3"))
feat_files = glob.glob(os.path.join(spam_path, 
                                     msaf.Dataset.features_dir,
                                     "*" + msaf.Dataset.features_ext))

In [16]:
ref = msaf.jams2.load(references[0])
for annotations in ref["sections"]:
    print annotations["annotation_metadata"]["annotator"]["name"]


GT
GT
Colin
Eleni
Evan
John
Shuli
Weighted
Threshold

In [39]:
# Get only references by the names above
for ref_file in references:
    names = []
    ref = msaf.jams2.load(ref_file)
    new_jams = msaf.jams2.Jams()
    for annotation in ref["sections"]:
        if annotation["annotation_metadata"]["annotator"]["name"] in annotator_names:
            new_jams["sections"].append(annotation)
    out_file = os.path.join(out_path, os.path.basename(ref_file))
    with open(out_file, "w") as f:
        json.dump(new_jams, f, indent=2)

In [10]:
# Add SPAM prefix
def add_prefix(files, prefix):
    for f in files:
        dest = os.path.join(os.path.dirname(f), prefix + os.path.basename(f))
        shutil.move(f, dest)

prefix = "SPAM_"
# add_prefix(references, prefix)
add_prefix(audio_files, prefix)
add_prefix(feat_files, prefix)