In [1]:
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
import pandas as pd
import numpy as np
import math
import random

In [2]:
a1=[]   #Creating Data Sets #2-D Dataset
a2=[]
for i in range(100):
    a1.append(random.randint(5,60))
    a1.append(random.randint(-56,-1))
    a2.append(random.randint(0,40))
    a2.append(random.randint(-37,-3))
plt.scatter(a1,a2,color='c')
plt.show()



In [3]:
#Applying PCA and Finding Eigen Vectors
a1=np.array(a1)
a2=np.array(a2)
m1=np.mean(a1)
m2=np.mean(a2)
a1=a1-m1
a2=a2-m2
k=[]
for i in range(200):
    k.append([a1[i],a2[i]])
k=np.array(k)
s =np.matmul(k.T,k)
s=s/200

In [4]:
print (s[0],s[1]) #These are the Eigen Vectors


[ 1192.1     566.473] [ 566.473     439.781975]

In [5]:
#Taking the top Eigen Vector
#Applying it and converting 2-D -> 1-D
#Getting a High Variance Plot
t1=[]
t2=[]
for i in k:
    t1.append(np.dot(s[0],i))
    t2.append(0)
plt.scatter(t1,t2,color='r')
plt.show()



In [ ]: