In [3]:
#-------------------------------------------------------------------------------
# Name:        CSC492 - Coding Assignment #1
# Purpose:
#
# Author:      Marion
#
# Created:     13/10/2017
# Copyright:   (c) Marion 2017
# Licence:     <your licence>
#-------------------------------------------------------------------------------

import csv
import numpy as np
from numpy.random import randn

#seed the random numbers to help debbugging

np.random.seed(1)

#define hyperparameters

LEARNING_RATE = 0.01
NB_FEATURES = 26
NB_TRAININGEX = 13000
NB_CLASSES = 13
NB_HIDDEN_NEURONS = 16
NB_TEST = 10400

#getting the test data from the csv file
testDatatemp = np.loadtxt(open("../../arasdar-DL-git/data/kaggle-music-genre/test.x.csv","rb"), dtype =np.float16,delimiter = ',',skiprows=1, usecols=range(1,27))

#invert testData to get a 26 * 13000 matrix
testDatatemp = testDatatemp.T

#getting the input data from the csv file
input = np.loadtxt(open("../../arasdar-DL-git/data/kaggle-music-genre/train.x.csv","rb"), dtype =np.float16,delimiter = ',',skiprows=1, usecols=range(1,27))

# #invert input to get a 26 * 10400 matrix
# input = input.T

#adding a line of ones for the bias term in the input training array, we get a 27 * 13000 matrix
# inputBias = np.ones((1,NB_TRAININGEX))
# inputFinal = np.vstack((inputBias,input))
inputFinal = input.T

#adding a line of ones for the bias term in the input testing array, we get a 27 * 13000 matrix
testBias = np.ones((1,NB_TEST))
testData = np.vstack((testBias,testDatatemp))

#getting the output data from the csv file
outputtemp = np.genfromtxt(open("../../arasdar-DL-git/data/kaggle-music-genre/train.y.csv","rb"), dtype = 'str', delimiter=',',skip_header=1, usecols=(1))
output = np.zeros((NB_CLASSES,NB_TRAININGEX))

In [13]:
output.shape, inputFinal.shape, inputBias.shape, testBias.shape, testDatatemp.shape, testData.shape


Out[13]:
((13, 13000), (26, 13000), (1, 13000), (1, 10400), (26, 10400), (27, 10400))

In [14]:
inputFinal.shape


Out[14]:
(26, 13000)

In [23]:
#initializing all the weigths randomly
syn1 = np.random.random((NB_FEATURES,NB_HIDDEN_NEURONS))
syn2 = np.random.random((NB_HIDDEN_NEURONS, NB_CLASSES))
# bias1 = np.zeros()

In [24]:
syn1.shape, syn2.shape, #syn2, syn1


Out[24]:
((26, 16), (16, 13))

In [25]:
#initializing the output matrix for the training data, we map the classes, we get a 13 * 13000 matrix, 1 for the good class, 0 for the others
j = 0
while j < NB_TRAININGEX:
	str = outputtemp[j]
	if str == 'International':
		output[0,j] = 1
	if str == 'Vocal':
		output[1,j] = 1
	if str == 'Latin':
		output[2,j] = 1
	if str == 'Blues':
		output[3,j] = 1
	if str == 'Country':
		output[4,j] = 1
	if str == 'Electronic':
		output[5,j] = 1
	if str == 'Folk':
		output[6,j] = 1
	if str == 'Jazz':
		output[7,j] = 1
	if str == 'New_Age':
		output[8,j] = 1
	if str == 'Pop_Rock':
		output[9,j] = 1
	if str == 'Rap':
		output[10,j] = 1
	if str == 'Reggae':
		output[11,j] = 1
	if str == 'RnB':
		output[12,j] = 1
	j = j+1

#values for reference
#international = 0
#vocal = 1
#latin = 2
#blues = 3
#country = 4
#electronic = 5
#folk = 6
#jazz= 7
#new-age=8
#pop_rock = 9
#rap = 10
#reggae = 11
#rnb = 12

In [26]:
def sigmoid(x):
	return 1/(1 + np.exp(-x))

def sigmoidDeriv(x):
	return x *(1 - x)

In [ ]:
#1 hidden layer = 2 synapses = 2 
def forwardPass (inputLayer, weights1, weigths2):

	hiddenLayer = weights1.T.dot(inputLayer)

	# apply sigmoid on all activations
	#no sigmoid on bias term so initialize i at 1
	i = 1 
	while i < NB_HIDDEN_NEURONS:
		hiddenLayer[i] = sigmoid(hiddenLayer[i])
		i = i + 1
	
	#add line of ones for the bias term	
	bias = np.ones((1, inputLayer.shape[1]))
	hiddenLayer = np.vstack((hiddenLayer,bias))
	result = weigths2.T.dot(hiddenLayer)

	# apply sigmoid on all activations
	#no sigmoid on bias term so initialize i at 1
	i = 1 
	while i < NB_CLASSES:
		result[i] = sigmoid(result[i])
		i = i + 1
		
	#normalize the data	
	i = 0
	sum = np.sum(result)
	while i < NB_CLASSES:
		result[i] = result[i]/sum
		i = i+1
	return result

result2 = forwardPass(inputFinal,syn1,syn2)

#calculate the error

#def errorCalcul(desiredOutput, algorithmOutput, numberClasses):
#	error = np.zeros((1,NB_TRAININGEX))
#	for j in range(numberClasses):
#		error[j]= desiredOutput[1,j]*np.log(algorithmOutput[j])
#	return error

#error2 = errorCalcul(output,result2,NB_CLASSES)

#testing

testing = forwardPass(testData,syn1,syn2)
testing = testing.T

#add index column
finalOutput = np.zeros((NB_TEST,NB_CLASSES+1))
i = 0
while i < NB_TEST:
	finalOutput[i] = np.hstack((i+1,testing[i]))
	i = i + 1

finalOutput.astype(np.int32)

#test data
with open('submission.csv','a') as f_handle:
	np.savetxt(f_handle, finalOutput, fmt='%i,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f',delimiter=",")