notebook.community

Edit and run



In [1]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn

import minst.model
import minst.taxonomy

%matplotlib inline
seaborn.set()



In [2]:

    
dframe = pd.read_csv("/Users/ejhumphrey/data/minst/master_index.csv", index_col=0)
dframe = minst.taxonomy.normalize_instrument_names(dframe)
dframe.sample(5)









    Out[2]:






  
    
      
      audio_file
      dataset
      duration
      dynamic
      instrument
      note_number
      partition
      source_index
      start_time
    
  
  
    
      092CGAFMd06f21
      092CGAFM_13.flac
      rwc
      4.816327
      M
      guitar
      NaN
      NaN
      rwcf6a6e74f
      71.137642
    
    
      181CBPCFd2017b
      181CBPCF_16.flac
      rwc
      5.469388
      F
      double-bass
      NaN
      NaN
      rwc83f26c00
      80.443311
    
    
      173VCNVM93f0dc
      173VCNVM_45.flac
      rwc
      3.950113
      M
      cello
      NaN
      NaN
      rwc0a193b22
      190.457143
    
    
      221TBC1P57114c
      221TBC1P_19.flac
      rwc
      3.519274
      P
      trombone
      NaN
      NaN
      rwc81799527
      67.568254
    
    
      211TRW3P0a1368
      211TRW3P_0.flac
      rwc
      3.600907
      P
      trumpet
      NaN
      NaN
      rwcae809d4c
      0.048980



In [9]:

    
uiowa = dframe[dframe.dataset == 'uiowa']
rwc = dframe[dframe.dataset == 'rwc']
philz = dframe[dframe.dataset == 'philharmonia']



In [12]:

    
fig, ax = plt.subplots(figsize=(14, 4))
instruments = sorted(dframe.instrument.unique())

x_axis = np.arange(len(instruments))
datasets = sorted(dframe.dataset.unique())
width = 1. / (len(datasets) + 1)
colors = seaborn.color_palette()
for n, (dset, c) in enumerate(zip(datasets, colors)):
    dset_df = dframe[dframe.dataset == dset]
    print(dset, len(dset_df))
    counts = dset_df.groupby(["instrument"])
    ax.bar(x_axis + n * width, counts.instrument.count().values, 
           width=width, label=dset, fc=c)

ax.set_xticks(x_axis + 0.4)
ax.set_xticklabels(instruments, rotation=20);
ax.set_xlabel("Instrument Class")
ax.set_ylabel("Counts")
ax.set_title("Note Observation Counts")
plt.legend(loc='best')









    



('philharmonia', 7923)
('rwc', 27557)
('uiowa', 3417)






    Out[12]:





<matplotlib.legend.Legend at 0x119328c10>



In [21]:



In [ ]:



In [ ]:

	audio_file	dataset	duration	dynamic	instrument	note_number	partition	source_index	start_time
092CGAFMd06f21	092CGAFM_13.flac	rwc	4.816327	M	guitar	NaN	NaN	rwcf6a6e74f	71.137642
181CBPCFd2017b	181CBPCF_16.flac	rwc	5.469388	F	double-bass	NaN	NaN	rwc83f26c00	80.443311
173VCNVM93f0dc	173VCNVM_45.flac	rwc	3.950113	M	cello	NaN	NaN	rwc0a193b22	190.457143
221TBC1P57114c	221TBC1P_19.flac	rwc	3.519274	P	trombone	NaN	NaN	rwc81799527	67.568254
211TRW3P0a1368	211TRW3P_0.flac	rwc	3.600907	P	trumpet	NaN	NaN	rwcae809d4c	0.048980