Q2


In [7]:
import numpy as np
from sklearn.decomposition import PCA
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [3]:
with open ('DM2016_org.csv') as f:
    d = {}
    headers = f.readline().split(' ')
    values = map(lambda x:x.split(),f.readlines())
    for i in range(len(values[0])):
        d[i]=[]
        for v in values:
            d[i].append(v[i])
data = pd.DataFrame(d)
npdata = data[data.columns[1:]].as_matrix()

In [4]:
pca = PCA(n_components=3)
pca.fit(npdata)
output = pca.transform(npdata)

In [8]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
n = 100
for x, y, z in output:
    ax.scatter(x, y, z)

ax.set_xlabel('PCA 1')
ax.set_ylabel('PCA 2')
ax.set_zlabel('PCA 3')

plt.show()