In [3]:
import numpy as np
import pandas as pd
import scipy.io as sio
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
%matplotlib inline
data = pd.read_csv('fer2013/fer2013.csv')
X = data['pixels']
y = data['emotion']
X = pd.Series([np.array(x.split()).astype(int) for x in X])
# convert one column as list of ints into dataframe where each item in array is a column
X = pd.DataFrame(np.matrix(X.tolist()))
In [4]:
# since the data we have is one big array, we want to split it into training
# and testing sets, the split is 70% goes to training and 30% of data for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# for this excersize we are using MLPClassifier with lbfgs optimizer (the family of quasi-Newton methods). In my simple
# experiments it produces good quality outcome
clf = MLPRegressor(random_state=1)
clf.fit(X_train, y_train.ravel())
# So after the classifier is trained, lets see what it predicts on the test data
prediction = clf.predict(X_test)
quality = np.where(prediction == y_test.ravel(),1,0)
print ("Percentage of correct results is {:.04f}".format(np.count_nonzero(quality)/len(quality)))
In [9]:
df = pd.DataFrame(prediction.astype(int),columns=['a'])
df.loc[:,'-'] = pd.Series(1, index=df.index)
df.groupby('a').count()
Out[9]: