In [12]:
# Import python plotting interface
using PyPlot
# Generate simple 2-dimensional data that is rotated slightly
# We use complex numbers so that rotation is easy, via complex exponentials
x_complex = .1*randn(1000) + 1im*randn(1000)
x_complex .*= exp(.7im)
# Plot the real and imaginary axes
x_2d = [real(x_complex)', imag(x_complex)'];
scatter(x_2d[1,:], x_2d[2,:])
axis([-3, 3, -3, 3])
title("Original 2D data");
In [2]:
# Perform SVD, normalize the singular values and get matrix T, which contains our principle components
(U, S, V) = svd(x_2d)
S ./= norm(S)
T = U*diagm(S)
# Grab the principle components
comp1 = T[:,1]
comp2 = T[:,2]
# Plot the original data
scatter(x_2d[1,:], x_2d[2,:])
axis([-3, 3, -3, 3])
# Plot the principle components
gain = 2
plot([gain*comp1[1], 0, gain*comp2[1] ], [gain*comp1[2], 0, gain*comp2[2]], "r")
legend(["principle components", "data"])
title("2D Data with detected principle components");
In [3]:
# Generate a single "column" of data
function generate_data(N)
return sin(.005*pi*[1:N]) + .1*randn(N)
end
# Make the data 1000 points long
N = 1000
plot(generate_data(N))
title("Single instance of N-dimensional data (N = $(N))");
In [4]:
# Now, let's generate a 1000x1000 matrix where each column
# is a single instance of data, generated as above
N = 1000
x_sin = zeros(N,N)
for idx in 1:N
x_sin[:,idx] = generate_data(N)
end
imshow(x_sin)
colorbar()
title("X matrix, each column is sinusoidal");
In [5]:
# Let's do PCA on that data, get the principle components!
(U, S, V) = svd(x_sin)
S ./= norm(S)
T = U*diagm(S)
# Then we'll plot the first principle component:
plot( T[:,1] )
title( "First principle component" )
figure()
plot( T[:,2] )
title( "Second principle component" );
In [6]:
# Maybe this is just doing sinusoidal decomposition. Let's try putting two sinusoids in, and see if they come out separately!
x_2sin = zeros(N,N)
for idx = 1:N
x_2sin[:,idx] = sin(.005*pi*[1:N]) + sin(0.011*pi*[1:N]) + .1*randn(N)
end
# First, plot just a single column
plot( x_2sin[:,1] )
title("Single column of data")
figure()
imshow(x_2sin)
title("X matrix")
colorbar()
Out[6]:
In [7]:
# Do actual SVD to get the first two principle components
(U, S, V) = svd(x_2sin)
S ./= norm(S)
T = U*diagm(S)
plot( T[:,1] )
title( "First principle component" )
figure()
plot( T[:,2] )
title( "Second principle component" );
In [8]:
# SVD didn't separate out the two sinusoids. This is because they were not linearly independent.
# SVD saw them as the same component. In order to separate them out, we need to have them vary with respect to eachother
# We're going to do this by randomizing the amplitude of each sinusoid across instances:
x_3sin = zeros(N,N)
for idx = 1:N
a1 = 1.0*randn()
a2 = 0.3*randn()
a3 = 0.1*randn()
sin1 = sin(0.005*pi*[1:N])
sin2 = sin(0.011*pi*[1:N])
sin3 = sin(0.007*pi*[1:N])
noise = 0.1*randn(N)
x_3sin[:,idx] = a1*sin1 + a2*sin2 + a3*sin3 + noise
end
imshow(x_3sin)
colorbar()
title("X matrix with random amplitude sinusoidal components");
In [9]:
# Do actual SVD to get the first two principle components
(U, S, V) = svd(x_3sin)
S ./= norm(S)
T = U*diagm(S)
plot( T[:,1] )
title( "First principle component" )
figure()
plot( T[:,2] )
title( "Second principle component" );
In [10]:
plot( T[:,3] )
title( "Third principle component" );
figure()
plot( T[:,4] )
title( "Fourth principle component" );
In [11]:
# We can try to figure out what principle components are interesting by looking at the value of that "S" vector:
# We're only going to plot the first 40 since we only really care about the first 3
stem(1:40, S[1:40])
xlim([0, 40])
title("Singular values of X");