notebook.community

Edit and run



In [12]:

    
# Import python plotting interface
using PyPlot

# Generate simple 2-dimensional data that is rotated slightly
# We use complex numbers so that rotation is easy, via complex exponentials
x_complex = .1*randn(1000) + 1im*randn(1000)
x_complex .*= exp(.7im)

# Plot the real and imaginary axes
x_2d = [real(x_complex)', imag(x_complex)'];
scatter(x_2d[1,:], x_2d[2,:])
axis([-3, 3, -3, 3])
title("Original 2D data");



In [2]:

    
# Perform SVD, normalize the singular values and get matrix T, which contains our principle components
(U, S, V) = svd(x_2d)
S ./= norm(S)
T = U*diagm(S)

# Grab the principle components
comp1 = T[:,1]
comp2 = T[:,2]

# Plot the original data
scatter(x_2d[1,:], x_2d[2,:])
axis([-3, 3, -3, 3])

# Plot the principle components
gain = 2
plot([gain*comp1[1], 0, gain*comp2[1] ], [gain*comp1[2], 0, gain*comp2[2]], "r")
legend(["principle components", "data"])
title("2D Data with detected principle components");



In [3]:

    
# Generate a single "column" of data
function generate_data(N)
    return sin(.005*pi*[1:N]) + .1*randn(N)
end

# Make the data 1000 points long
N = 1000
plot(generate_data(N))
title("Single instance of N-dimensional data (N = $(N))");



In [4]:

    
# Now, let's generate a 1000x1000 matrix where each column
# is a single instance of data, generated as above
N = 1000
x_sin = zeros(N,N)
for idx in 1:N
    x_sin[:,idx] = generate_data(N)
end

imshow(x_sin)
colorbar()
title("X matrix, each column is sinusoidal");



In [5]:

    
# Let's do PCA on that data, get the principle components!
(U, S, V) = svd(x_sin)
S ./= norm(S)
T = U*diagm(S)

# Then we'll plot the first principle component:
plot( T[:,1] )
title( "First principle component" )

figure()
plot( T[:,2] )
title( "Second principle component" );



In [6]:

    
# Maybe this is just doing sinusoidal decomposition.  Let's try putting two sinusoids in, and see if they come out separately!
x_2sin = zeros(N,N)
for idx = 1:N
    x_2sin[:,idx] = sin(.005*pi*[1:N]) + sin(0.011*pi*[1:N])  + .1*randn(N)
end

# First, plot just a single column
plot( x_2sin[:,1] )
title("Single column of data")
figure()

imshow(x_2sin)
title("X matrix")
colorbar()









    












    












    Out[6]:





PyObject <matplotlib.colorbar.Colorbar instance at 0x10e0c71b8>



In [7]:

    
# Do actual SVD to get the first two principle components
(U, S, V) = svd(x_2sin)
S ./= norm(S)
T = U*diagm(S)

plot( T[:,1] )
title( "First principle component" )

figure()
plot( T[:,2] )
title( "Second principle component" );



In [8]:

    
# SVD didn't separate out the two sinusoids.  This is because they were not linearly independent.
# SVD saw them as the same component.  In order to separate them out, we need to have them vary with respect to eachother
# We're going to do this by randomizing the amplitude of each sinusoid across instances:

x_3sin = zeros(N,N)
for idx = 1:N
    a1 = 1.0*randn()
    a2 = 0.3*randn()
    a3 = 0.1*randn()
    
    sin1 = sin(0.005*pi*[1:N])
    sin2 = sin(0.011*pi*[1:N])
    sin3 = sin(0.007*pi*[1:N])
    noise = 0.1*randn(N)
    
    x_3sin[:,idx] = a1*sin1 + a2*sin2 + a3*sin3 + noise
end

imshow(x_3sin)
colorbar()
title("X matrix with random amplitude sinusoidal components");



In [9]:

    
# Do actual SVD to get the first two principle components
(U, S, V) = svd(x_3sin)
S ./= norm(S)
T = U*diagm(S)

plot( T[:,1] )
title( "First principle component" )

figure()
plot( T[:,2] )
title( "Second principle component" );



In [10]:

    
plot( T[:,3] )
title( "Third principle component" );

figure()
plot( T[:,4] )
title( "Fourth principle component" );



In [11]:

    
# We can try to figure out what principle components are interesting by looking at the value of that "S" vector:
# We're only going to plot the first 40 since we only really care about the first 3
stem(1:40, S[1:40])
xlim([0, 40])
title("Singular values of X");