Run PCA on the Bank Marketing Data
In [2]:
import numpy as np
import pandas as pd
In [3]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (10, 6)
In [4]:
bank = pd.read_csv("data/bank.csv")
In [5]:
bank.head()
Out[5]:
In [6]:
from sklearn import preprocessing
In [7]:
le = preprocessing.LabelEncoder()
In [8]:
df = bank.apply(le.fit_transform)
In [9]:
df.head()
Out[9]:
In [10]:
X = df.iloc[:,:16]
In [11]:
y = df.iloc[:,16]
In [12]:
from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X)
In [13]:
from sklearn.decomposition import PCA
In [14]:
pca = PCA(n_components=2)
In [15]:
pca.fit(X)
Out[15]:
In [16]:
X_trans = pca.transform(X)
In [17]:
X_trans
Out[17]:
In [18]:
X_trans[:,0]
Out[18]:
In [19]:
plt.scatter(X_trans[:,0], X_trans[:,1], c = y, s = 40, alpha = 0.5, cmap = plt.cm.viridis)
Out[19]:
In [ ]: