In [1]:
import json
import numpy as np
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.svm import SVC
import csv
#un-comment line below to print whole numpy arrays (note that computing cost will be much higher)
#np.set_printoptions(threshold=np.nan)
#this opens_loads the json files and assign them to their corresponding variables
dict_bright = json.load(open("json_files/bright_analysis.json", 'rb'))
dict_metal = json.load(open("json_files/metal_analysis.json",'rb'))
dict_hard = json.load(open("json_files/hard_analysis.json",'rb'))
dict_reverb = json.load(open("json_files/reverb_analysis.json",'rb'))
dict_rough = json.load(open("json_files/rough_analysis.json",'rb'))
In [2]:
#dict_bright #printing to see comment
In [3]:
#importing CSV files
with open('csv_files/bright.csv', 'rb') as f:
reader = csv.reader(f)
bright_list_csv = list(reader)
#removing brackets from list
bright_list = [l[0] for l in bright_list_csv]
with open('csv_files/warm.csv', 'rb') as f:
reader = csv.reader(f)
warm_list_csv = list(reader)
warm_list = [l[0] for l in warm_list_csv]
with open('csv_files/rough.csv', 'rb') as f:
reader = csv.reader(f)
rough_list_csv = map(tuple, reader)
rough_list = [l[0] for l in rough_list_csv]
with open('csv_files/reverb.csv', 'rb') as f:
reader = csv.reader(f)
reverb_list_csv = map(tuple, reader)
reverb_list = [l[0] for l in reverb_list_csv]
with open('csv_files/clear.csv', 'rb') as f:
reader = csv.reader(f)
clear_list_csv = list(reader)
clear_list = [l[0] for l in clear_list_csv]
with open('csv_files/hollow.csv', 'rb') as f:
reader = csv.reader(f)
hollow_list_csv = list(reader)
hollow_list = [l[0] for l in hollow_list_csv]
with open('csv_files/deep.csv', 'rb') as f:
reader = csv.reader(f)
deep_list_csv = list(reader)
deep_list = [l[0] for l in deep_list_csv]
with open('csv_files/punchy.csv', 'rb') as f:
reader = csv.reader(f)
punchy_list_csv = list(reader)
punchy_list = [l[0] for l in punchy_list_csv]
with open('csv_files/metallic.csv', 'rb') as f:
reader = csv.reader(f)
metallic_list_csv = list(reader)
metallic_list = [l[0] for l in metallic_list_csv]
with open('csv_files/sharp.csv', 'rb') as f:
reader = csv.reader(f)
sharp_list_csv = list(reader)
sharp_list = [l[0] for l in sharp_list_csv]
with open('csv_files/hard.csv', 'rb') as f:
reader = csv.reader(f)
hard_list_csv = list(reader)
hard_list = [l[0] for l in hard_list_csv]
In [4]:
#hard_list #print if want to see content.change accordingly for other lists
In [5]:
len(hard_list)
Out[5]:
In [6]:
#El número de sonidos en tu dataset:
nb_sounds = len(set(bright_list + hard_list +warm_list + rough_list + reverb_list + clear_list + hollow_list + deep_list + punchy_list + metallic_list + sharp_list))
In [7]:
sounds_list = (set(bright_list + hard_list +warm_list + rough_list + reverb_list + clear_list + hollow_list + deep_list + punchy_list + metallic_list + sharp_list))
In [8]:
with open("sounds_list.txt", "w") as output:
output.write(str(sounds_list))
In [9]:
print "bright length:",len(dict_bright)
print "metal length:",len(dict_metal)
print "hard length:",len(dict_hard)
print "reverb length:",len(dict_reverb)
print "rough length:",len(dict_rough)
In [10]:
#Cleaning_removing "nan" values from the dicts
clean_dict_bright = filter(lambda k: not np.isnan(dict_bright[k]), dict_bright)
clean_dict_metal = filter(lambda k: not np.isnan(dict_metal[k]), dict_metal)
clean_dict_hard = filter(lambda k: not np.isnan(dict_hard[k]), dict_hard)
clean_dict_reverb = filter(lambda k: not np.isnan(dict_reverb[k]), dict_reverb)
clean_dict_rough = filter(lambda k: not np.isnan(dict_rough[k]), dict_rough)
In [11]:
print "bright length:",len(clean_dict_bright)
print "metal length:",len(clean_dict_metal)
print "hard length:",len(clean_dict_hard)
print "reverb length:",len(clean_dict_reverb)
print "rough length:",len(clean_dict_rough)
In [12]:
#clean_dict_bright #checking out one of them to see content
In [13]:
#applying intersection to all the lists
all_ids_intersection=list(set(clean_dict_bright) & set(clean_dict_metal) & set(clean_dict_hard) & set(clean_dict_rough))
all_ids_intersection
len(all_ids_intersection)
Out[13]:
In [14]:
#creating matrix X
X = []
for fs_id in all_ids_intersection:
#print fs_id
feature_vector = [dict_bright[fs_id], dict_metal[fs_id], dict_hard[fs_id],dict_rough[fs_id]]
X.append(feature_vector)
len(feature_vector)
X = np.array(X)
#X #printing out matrix
In [15]:
len(X)
Out[15]:
In [16]:
X.shape
Out[16]:
In [17]:
#confirming it matches in size as supposed to.
print len(all_ids_intersection)
In [18]:
y = []
NB_SOUNDS = len(X) #here will get same result if using "all_ids_intersection" instead of "X"
NB_LABELS = len(feature_vector)
y = np.zeros((NB_SOUNDS, NB_LABELS), dtype=int)
for idx, sound_id in enumerate(all_ids_intersection): # recorro todos los sonidos (lineas)
if sound_id in bright_list: # si el sonido es bright
y[idx][0] = 1 # add a 1 for each line (soundid) "idx" and the columns (label) 0....
if sound_id in metallic_list:
y[idx][1] = 1 # add a 1 for each line (soundid) "idx" and the columns (label) 1....
if sound_id in hard_list:
y[idx][2] = 1 # add a 1 for each line (soundid) "idx" and the columns (label) 2....
if sound_id in rough_list:
y[idx][3] = 1 # add a 1 for each line (soundid) "idx" and the columns (label) 3....´
#Y = np.array(y)
In [19]:
y.shape
Out[19]:
In [20]:
#y #printing out y matrix
In [21]:
#SPLIT
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
In [22]:
X_train.shape, y_train.shape
X_test.shape, y_test.shape
Out[22]:
In [151]:
#X_train #cheking X_train matrix values
In [23]:
#TRAIN
clf = OneVsRestClassifier(SVC(kernel='rbf'))
clf.fit(X, y)
Out[23]:
In [25]:
clf.score(X, y, sample_weight=None)
Out[25]:
In [26]:
#clf.predict(X_test)
In [27]:
#The recall is the ratio tp / (tp + fn)
#where tp is the number of true positives and fn the number of false negatives.
#The recall is intuitively the ability of the classifier to find all the positive samples.
y_true = y_test
y_pred = clf.predict(X_test)
x1 = recall_score(y_true, y_pred, average='micro')
x2 = recall_score(y_true, y_pred, average='macro')
x3 = recall_score(y_true, y_pred, average='weighted')
x1,x2,x3
Out[27]:
In [28]:
#The precision is the ratio tp / (tp + fp)
#where tp is the number of true positives and fp the number of false positives.
#The precision is intuitively the ability of the classifier not to label as positive a sample that is negative.
macro_prec=precision_score(y_true, y_pred, average='macro')
weigh_prec= precision_score(y_true, y_pred, average='weighted')
micro_prec=precision_score(y_true, y_pred, average='micro')
micro_prec, macro_prec, weigh_prec
Out[28]:
In [29]:
#ACCURACY
y_true = X_train.shape
y_pred = X_test.shape
accuracy_score(y_true, y_pred)
Out[29]:
In [30]:
#Classification report
#from sklearn.metrics import classification_report
#y_test = clf.predict(y_test) takes it from above
y_pred = clf.predict(X_test)
categories = ['bright', 'metal', 'hard', 'rough']
print(classification_report(y_test, y_pred, target_names=categories))