In [301]:
import numpy as np
import pandas as pd
In [302]:
np.set_printoptions(precision=3, suppress=True)
In [303]:
dataset = pd.read_csv('Data.csv')
In [304]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 3].values
In [305]:
from sklearn.preprocessing import Imputer
In [306]:
imputer = Imputer(missing_values='NaN', strategy='mean', axis=0)
imputer = imputer.fit(X[:, 1:3])
In [307]:
X[:, 1:3] = imputer.transform(X[:, 1:3])
In [308]:
X
Out[308]:
In [309]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
In [310]:
X[:, 0] = LabelEncoder().fit_transform(X[:, 0])
In [311]:
X = OneHotEncoder(categorical_features=[0]).fit_transform(X).toarray()
In [312]:
X
Out[312]:
In [313]:
y = LabelEncoder().fit_transform(y)
In [314]:
y
Out[314]:
In [315]:
from sklearn.model_selection import train_test_split
In [316]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
In [317]:
X_train, X_test, y_train, y_test
Out[317]:
In [318]:
from sklearn.preprocessing import StandardScaler
In [319]:
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
In [320]:
X_train, X_test
Out[320]: