In [4]:
import glob
import os
import json
import shutil
import msaf
annotator_names = ["Colin", "Eleni", "Evan", "John", "Shuli"]
spam_path = "/Users/uri/datasets/SPAM/"
spam_path = "/home/uri/datasets/SPAM/"
out_path = os.path.join(spam_path, "new_references")
In [9]:
# Read files
references = glob.glob(os.path.join(spam_path,
msaf.Dataset.references_dir,
"*" + msaf.Dataset.references_ext))
audio_files = glob.glob(os.path.join(spam_path,
msaf.Dataset.audio_dir,
"*.mp3"))
feat_files = glob.glob(os.path.join(spam_path,
msaf.Dataset.features_dir,
"*" + msaf.Dataset.features_ext))
In [16]:
ref = msaf.jams2.load(references[0])
for annotations in ref["sections"]:
print annotations["annotation_metadata"]["annotator"]["name"]
In [39]:
# Get only references by the names above
for ref_file in references:
names = []
ref = msaf.jams2.load(ref_file)
new_jams = msaf.jams2.Jams()
for annotation in ref["sections"]:
if annotation["annotation_metadata"]["annotator"]["name"] in annotator_names:
new_jams["sections"].append(annotation)
out_file = os.path.join(out_path, os.path.basename(ref_file))
with open(out_file, "w") as f:
json.dump(new_jams, f, indent=2)
In [10]:
# Add SPAM prefix
def add_prefix(files, prefix):
for f in files:
dest = os.path.join(os.path.dirname(f), prefix + os.path.basename(f))
shutil.move(f, dest)
prefix = "SPAM_"
# add_prefix(references, prefix)
add_prefix(audio_files, prefix)
add_prefix(feat_files, prefix)