In [1]:
# Import python plotting interface
using PyPlot

# Generate simple 2-dimensional data that is rotated slightly
# We use complex numbers so that rotation is easy, via complex exponentials
x_complex = .1*randn(1000) + 1im*randn(1000)
x_complex .*= exp(.7im)

# Plot the real and imaginary axes
x_2d = [real(x_complex)', imag(x_complex)'];
scatter(x_2d[1,:], x_2d[2,:])
axis([-3, 3, -3, 3])
title("Original 2D data");


INFO: Loading help data...

In [2]:
# Perform SVD, normalize the singular values and get matrix T, which contains our principle components
(U, S, V) = svd(x_2d)
S ./= norm(S)
T = U*diagm(S)

# Grab the principle components
comp1 = T[:,1]
comp2 = T[:,2]

# Plot the original data
scatter(x_2d[1,:], x_2d[2,:])
axis([-3, 3, -3, 3])

# Plot the principle components
gain = 2
plot([gain*comp1[1], 0, gain*comp2[1] ], [gain*comp1[2], 0, gain*comp2[2]], "r")
legend(["principle components", "data"])
title("2D Data with detected principle components");



In [5]:



Out[5]:
2x2 Array{Float64,2}:
 -0.642139  0.766588
  0.766588  0.642139

In [6]:
# Generate a single "column" of data
function generate_data(N)
    return sin(.005*pi*[1:N]) + .1*randn(N)
end

# Make the data 1000 points long
N = 1000
plot(generate_data(N))
title("Single instance of N-dimensional data (N = $(N))");



In [7]:
# Now, let's generate a 1000x1000 matrix where each column
# is a single instance of data, generated as above
N = 1000
x_sin = zeros(N,N)
for idx in 1:N
    x_sin[:,idx] = generate_data(N)
end

imshow(x_sin)
colorbar()
title("X matrix, each column is sinusoidal");



In [8]:
# Let's do PCA on that data, get the principle components!
(U, S, V) = svd(x_sin)
S ./= norm(S)
T = U*diagm(S)

# Then we'll plot the first principle component:
plot( T[:,1] )
title( "First principle component" )

figure()
plot( T[:,2] )
title( "Second principle component" );



In [9]:
# Maybe this is just doing sinusoidal decomposition.  Let's try putting two sinusoids in, and see if they come out separately!
x_2sin = zeros(N,N)
for idx = 1:N
    x_2sin[:,idx] = sin(.005*pi*[1:N]) + sin(0.011*pi*[1:N])  + .1*randn(N)
end

# First, plot just a single column
plot( x_2sin[:,1] )
title("Single column of data")
figure()

imshow(x_2sin)
title("X matrix")
colorbar()


Out[9]:
PyObject <matplotlib.colorbar.Colorbar instance at 0x113159ea8>

In [10]:
# Do actual SVD to get the first two principle components
(U, S, V) = svd(x_2sin)
S ./= norm(S)
T = U*diagm(S)

plot( T[:,1] )
title( "First principle component" )

figure()
plot( T[:,2] )
title( "Second principle component" );



In [17]:
plot(S)
xlim(-100, 1000)

S


Out[17]:
1000-element Array{Float64,1}:
 0.995045  
 0.00629586
 0.00624013
 0.00620241
 0.00618755
 0.00614221
 0.00611888
 0.00608994
 0.00607492
 0.00604822
 0.00602216
 0.00601993
 0.00598455
 ⋮         
 5.05985e-5
 4.64949e-5
 4.57034e-5
 3.75169e-5
 3.35376e-5
 3.23794e-5
 2.41543e-5
 1.97525e-5
 1.37247e-5
 1.11447e-5
 6.87003e-6
 7.297e-7  

In [18]:
# SVD didn't separate out the two sinusoids.  This is because they were not linearly independent.
# SVD saw them as the same component.  In order to separate them out, we need to have them vary with respect to eachother
# We're going to do this by randomizing the amplitude of each sinusoid across instances:

x_3sin = zeros(N,N)
for idx = 1:N
    a1 = 1.0*randn()
    a2 = 0.3*randn()
    a3 = 0.1*randn()
    
    sin1 = sin(0.005*pi*[1:N])
    sin2 = sin(0.011*pi*[1:N])
    sin3 = sin(0.007*pi*[1:N])
    noise = 0.1*randn(N)
    
    x_3sin[:,idx] = a1*sin1 + a2*sin2 + a3*sin3 + noise
end

imshow(x_3sin)
colorbar()
title("X matrix with random amplitude sinusoidal components");



In [25]:


In [26]:
# Do actual SVD to get the first two principle components
(U, S, V) = svd(x_3sin)
S ./= norm(S)
T = U*diagm(S)

plot( T[:,1] )
title( "First principle component" )

figure()
plot( T[:,2] )
title( "Second principle component" );



In [27]:
plot( T[:,3] )
title( "Third principle component" );

figure()
plot( T[:,4] )
title( "Fourth principle component" );



In [28]:
# We can try to figure out what principle components are interesting by looking at the value of that "S" vector:
# We're only going to plot the first 40 since we only really care about the first 3
stem(1:40, S[1:40])
xlim([0, 40])
title("Singular values of X");