In [3]:
import numpy as np
import pandas as pd
import scipy.io as sio
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split


%matplotlib inline

data = pd.read_csv('fer2013/fer2013.csv')
X = data['pixels']
y = data['emotion']

X = pd.Series([np.array(x.split()).astype(int) for x in X])
# convert one column as list of ints into dataframe where each item in array is a column
X = pd.DataFrame(np.matrix(X.tolist()))

In [4]:
# since the data we have is one big array, we want to split it into training
# and testing sets, the split is 70% goes to training and 30% of data for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# for this excersize we are using MLPClassifier with lbfgs optimizer (the family of quasi-Newton methods). In my simple
# experiments it produces good quality outcome
clf = MLPRegressor(random_state=1)
clf.fit(X_train, y_train.ravel())

# So after the classifier is trained, lets see what it predicts on the test data
prediction = clf.predict(X_test)

quality = np.where(prediction == y_test.ravel(),1,0)
print ("Percentage of correct results is {:.04f}".format(np.count_nonzero(quality)/len(quality)))


Percentage of correct results is 0.0000

In [9]:
df = pd.DataFrame(prediction.astype(int),columns=['a'])
df.loc[:,'-'] = pd.Series(1, index=df.index)
df.groupby('a').count()


Out[9]:
-
a
-10 1
-9 1
-8 2
-7 2
-6 3
-5 1
-4 4
-3 21
-2 12
-1 29
0 189
1 157
2 305
3 396
4 566
5 743
6 923
7 1006
8 1094
9 1018
10 1009
11 848
12 695
13 544
14 413
15 264
16 184
17 131
18 79
19 57
20 23
21 16
22 10
23 5
24 2
25 7
26 2
27 2
28 1
36 1
39 1