In [1]:
import pandas as pd
import time
In [33]:
X = pd.read_csv('E:/Github/DAT210x-Lab/Module6/Datasets/PUC.csv', sep=';', index_col=None, decimal=',') #, , na_values='?'
X.gender = X.gender.map({'Man':0, 'Woman': 1})
X.z4 = pd.to_numeric(X.z4, errors='coerce')
X.head(5)
Out[33]:
In [34]:
X.columns
Out[34]:
In [35]:
y = X['class']
y = pd.get_dummies(y)
In [36]:
X.drop(['class','user'], axis=1, inplace=True)
X.head(1)
Out[36]:
In [37]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=7)
In [ ]:
In [ ]:
In [ ]: