In [1]:
import pandas as pd
import time

In [33]:
X = pd.read_csv('E:/Github/DAT210x-Lab/Module6/Datasets/PUC.csv', sep=';', index_col=None, decimal=',') #, , na_values='?'
X.gender = X.gender.map({'Man':0, 'Woman': 1})
X.z4 = pd.to_numeric(X.z4, errors='coerce')
X.head(5)


D:\anaconda\lib\site-packages\IPython\core\interactiveshell.py:2723: DtypeWarning: Columns (17) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Out[33]:
user gender age how_tall_in_meters weight body_mass_index x1 y1 z1 x2 y2 z2 x3 y3 z3 x4 y4 z4 class
0 debora 1 46 1.62 75 28.6 -3 92 -63 -23 18 -19 5 104 -92 -150 -103 -147.0 sitting
1 debora 1 46 1.62 75 28.6 -3 94 -64 -21 18 -18 -14 104 -90 -149 -104 -145.0 sitting
2 debora 1 46 1.62 75 28.6 -1 97 -61 -12 20 -15 -13 104 -90 -151 -104 -144.0 sitting
3 debora 1 46 1.62 75 28.6 -2 96 -57 -15 21 -16 -13 104 -89 -153 -103 -142.0 sitting
4 debora 1 46 1.62 75 28.6 -1 96 -61 -13 20 -15 -13 104 -89 -153 -104 -143.0 sitting

In [34]:
X.columns


Out[34]:
Index(['user', 'gender', 'age', 'how_tall_in_meters', 'weight',
       'body_mass_index', 'x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x3', 'y3', 'z3',
       'x4', 'y4', 'z4', 'class'],
      dtype='object')

In [35]:
y = X['class']
y = pd.get_dummies(y)

In [36]:
X.drop(['class','user'], axis=1, inplace=True)
X.head(1)


Out[36]:
gender age how_tall_in_meters weight body_mass_index x1 y1 z1 x2 y2 z2 x3 y3 z3 x4 y4 z4
0 1 46 1.62 75 28.6 -3 92 -63 -23 18 -19 5 104 -92 -150 -103 -147.0

In [37]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=7)

In [ ]:


In [ ]:


In [ ]: