notebook.community

Edit and run



In [1]:

    
import json
import numpy as np 
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.svm import SVC
import csv

#un-comment line below to print whole numpy arrays (note that computing cost will be much higher)
#np.set_printoptions(threshold=np.nan)

#this opens_loads the json files and assign them to their corresponding variables

dict_bright = json.load(open("json_files/bright_analysis.json", 'rb'))
dict_metal = json.load(open("json_files/metal_analysis.json",'rb'))
dict_hard = json.load(open("json_files/hard_analysis.json",'rb'))
dict_reverb = json.load(open("json_files/reverb_analysis.json",'rb'))
dict_rough = json.load(open("json_files/rough_analysis.json",'rb'))



In [2]:

    
#dict_bright #printing to see comment



In [3]:

    
#importing CSV files 

with open('csv_files/bright.csv', 'rb') as f:
    reader = csv.reader(f)
    bright_list_csv = list(reader)
    #removing brackets from list
    bright_list = [l[0] for l in bright_list_csv]
    
with open('csv_files/warm.csv', 'rb') as f:
    reader = csv.reader(f)
    warm_list_csv = list(reader)
    warm_list = [l[0] for l in warm_list_csv]
    
with open('csv_files/rough.csv', 'rb') as f:
    reader = csv.reader(f)
    rough_list_csv = map(tuple, reader)
    rough_list = [l[0] for l in rough_list_csv]
    
with open('csv_files/reverb.csv', 'rb') as f:
    reader = csv.reader(f)
    reverb_list_csv = map(tuple, reader)
    reverb_list = [l[0] for l in reverb_list_csv]
    
with open('csv_files/clear.csv', 'rb') as f:
    reader = csv.reader(f)
    clear_list_csv = list(reader)
    clear_list = [l[0] for l in clear_list_csv]
    
with open('csv_files/hollow.csv', 'rb') as f:
    reader = csv.reader(f)
    hollow_list_csv = list(reader)
    hollow_list = [l[0] for l in hollow_list_csv]
    
with open('csv_files/deep.csv', 'rb') as f:
    reader = csv.reader(f)
    deep_list_csv = list(reader)
    deep_list = [l[0] for l in deep_list_csv]
    
with open('csv_files/punchy.csv', 'rb') as f:
    reader = csv.reader(f)
    punchy_list_csv = list(reader)
    punchy_list = [l[0] for l in punchy_list_csv]
    
with open('csv_files/metallic.csv', 'rb') as f:
    reader = csv.reader(f)
    metallic_list_csv = list(reader)
    metallic_list = [l[0] for l in metallic_list_csv]
    
with open('csv_files/sharp.csv', 'rb') as f:
    reader = csv.reader(f)
    sharp_list_csv = list(reader)
    sharp_list = [l[0] for l in sharp_list_csv]
    
with open('csv_files/hard.csv', 'rb') as f:
    reader = csv.reader(f)
    hard_list_csv = list(reader) 
    hard_list = [l[0] for l in hard_list_csv]



In [4]:

    
#hard_list #print if want to see content.change accordingly for other lists



In [5]:

    
len(hard_list)









    Out[5]:





502



In [6]:

    
#El número de sonidos en tu dataset:
nb_sounds = len(set(bright_list + hard_list +warm_list + rough_list + reverb_list + clear_list + hollow_list + deep_list + punchy_list + metallic_list + sharp_list))



In [7]:

    
sounds_list = (set(bright_list + hard_list +warm_list + rough_list + reverb_list + clear_list + hollow_list + deep_list + punchy_list + metallic_list + sharp_list))



In [8]:

    
with open("sounds_list.txt", "w") as output:
    output.write(str(sounds_list))



In [9]:

    
print "bright length:",len(dict_bright)
print "metal length:",len(dict_metal)
print "hard length:",len(dict_hard)
print "reverb length:",len(dict_reverb)
print "rough length:",len(dict_rough)









    



bright length: 2745
metal length: 2645
hard length: 2735
reverb length: 2422
rough length: 2736



In [10]:

    
#Cleaning_removing "nan" values from the dicts
clean_dict_bright = filter(lambda k: not np.isnan(dict_bright[k]), dict_bright)
clean_dict_metal = filter(lambda k: not np.isnan(dict_metal[k]), dict_metal)
clean_dict_hard = filter(lambda k: not np.isnan(dict_hard[k]), dict_hard)
clean_dict_reverb = filter(lambda k: not np.isnan(dict_reverb[k]), dict_reverb)
clean_dict_rough = filter(lambda k: not np.isnan(dict_rough[k]), dict_rough)



In [11]:

    
print "bright length:",len(clean_dict_bright)
print "metal length:",len(clean_dict_metal)
print "hard length:",len(clean_dict_hard)
print "reverb length:",len(clean_dict_reverb)
print "rough length:",len(clean_dict_rough)









    



bright length: 2745
metal length: 2562
hard length: 2721
reverb length: 2422
rough length: 2736



In [12]:

    
#clean_dict_bright #checking out one of them to see content



In [13]:

    
#applying intersection to all the lists
all_ids_intersection=list(set(clean_dict_bright) & set(clean_dict_metal) & set(clean_dict_hard) & set(clean_dict_rough))
all_ids_intersection
len(all_ids_intersection)









    Out[13]:





2550



In [14]:

    
#creating matrix X
X = []

for fs_id in all_ids_intersection:
    #print fs_id
    feature_vector = [dict_bright[fs_id], dict_metal[fs_id], dict_hard[fs_id],dict_rough[fs_id]]
    X.append(feature_vector)
len(feature_vector)    
X = np.array(X)    
#X  #printing out matrix



In [15]:

    
len(X)









    Out[15]:





2550



In [16]:

    
X.shape









    Out[16]:





(2550, 4)



In [17]:

    
#confirming it matches in size as supposed to.
print len(all_ids_intersection)



In [18]:

    
y = []
NB_SOUNDS = len(X)  #here will get same result if using "all_ids_intersection" instead of "X"
NB_LABELS = len(feature_vector)

y = np.zeros((NB_SOUNDS, NB_LABELS), dtype=int)

for idx, sound_id in enumerate(all_ids_intersection): # recorro todos los sonidos (lineas)
    if sound_id in bright_list: # si el sonido es bright
        y[idx][0] = 1 # add a 1 for each line (soundid) "idx" and the columns (label) 0....
    if sound_id in metallic_list: 
        y[idx][1] = 1 # add a 1 for each line (soundid) "idx" and the columns (label) 1....
    if sound_id in hard_list: 
        y[idx][2] = 1 # add a 1 for each line (soundid) "idx" and the columns (label) 2....
    if sound_id in rough_list: 
        y[idx][3] = 1 # add a 1 for each line (soundid) "idx" and the columns (label) 3....´

        
#Y = np.array(y)



In [19]:

    
y.shape









    Out[19]:





(2550, 4)



In [20]:

    
#y  #printing out y matrix



In [21]:

    
#SPLIT
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)



In [22]:

    
X_train.shape, y_train.shape
X_test.shape, y_test.shape









    Out[22]:





((510, 4), (510, 4))



In [151]:

    
#X_train  #cheking X_train matrix values



In [23]:

    
#TRAIN
clf = OneVsRestClassifier(SVC(kernel='rbf'))
clf.fit(X, y)









    Out[23]:





OneVsRestClassifier(estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          n_jobs=1)



In [25]:

    
clf.score(X, y, sample_weight=None)









    Out[25]:





0.76313725490196083



In [26]:

    
#clf.predict(X_test)



In [27]:

    
#The recall is the ratio tp / (tp + fn) 
#where tp is the number of true positives and fn the number of false negatives.
#The recall is intuitively the ability of the classifier to find all the positive samples.
y_true = y_test
y_pred = clf.predict(X_test)
 
x1 = recall_score(y_true, y_pred, average='micro') 
x2 = recall_score(y_true, y_pred, average='macro') 
x3 = recall_score(y_true, y_pred, average='weighted')
x1,x2,x3









    Out[27]:





(0.68449197860962563, 0.68469416227912205, 0.68449197860962563)



In [28]:

    
#The precision is the ratio tp / (tp + fp) 
#where tp is the number of true positives and fp the number of false positives. 
#The precision is intuitively the ability of the classifier not to label as positive a sample that is negative.

macro_prec=precision_score(y_true, y_pred, average='macro') 
weigh_prec= precision_score(y_true, y_pred, average='weighted')
micro_prec=precision_score(y_true, y_pred, average='micro')

micro_prec, macro_prec, weigh_prec









    Out[28]:





(0.98841698841698844, 0.98584905660377364, 0.98713550600343059)



In [29]:

    
#ACCURACY

y_true = X_train.shape
y_pred = X_test.shape
accuracy_score(y_true, y_pred)









    Out[29]:





0.5



In [30]:

    
#Classification report


#from sklearn.metrics import classification_report
#y_test = clf.predict(y_test) takes it from above
y_pred = clf.predict(X_test)
categories = ['bright', 'metal', 'hard', 'rough']
print(classification_report(y_test, y_pred, target_names=categories))









    



             precision    recall  f1-score   support

     bright       1.00      0.84      0.91        87
      metal       1.00      0.70      0.82       107
       hard       0.94      0.59      0.72        85
      rough       1.00      0.61      0.76        95

avg / total       0.99      0.68      0.81       374