PCA

Run in on Jupyter Notebook.


In [1]:
#r "nuget:MathNet.Numerics"
#r "nuget:MathNet.Numerics.FSharp"
#r "nuget:XPlot.Plotly"

In [2]:
open MathNet.Numerics
open MathNet.Numerics.LinearAlgebra
open MathNet.Numerics.LinearAlgebra.Factorization
open MathNet.Numerics.Statistics
open XPlot.Plotly


Installing package MathNet.Numerics.done!
Installing package MathNet.Numerics.FSharp.done!
Installing package XPlot.Plotly.done!

In [3]:
// Some helper function related to building charts.
let line   nm clr (xs, ys) = Scatter (x = xs, y = ys, name = nm, line = Line(color = clr))
let points nm clr (xs, ys) = Scatter (x = xs, y = ys, mode = "markers", name = nm, marker = Marker (color=clr, size = 8))

In [4]:
// The data set
let xs = vector [2.5; 0.5; 2.2; 1.9; 3.1; 2.3; 2.0; 1.0; 1.5; 1.1]
let ys = vector [2.4; 0.7; 2.9; 2.2; 3.0; 2.7; 1.6; 1.1; 1.6; 0.9]

points "XY" "red" (xs, ys) 
|> Chart.Plot 
|> Chart.WithWidth 700 
|> Chart.WithHeight 500 
|> Chart.WithTitle "XY"


Out[4]:

In [5]:
// Compute the mean and normalize the data
let mx = xs |> Statistics.Mean
let my = ys |> Statistics.Mean

let nxs = xs |> Vector.map (fun x -> x - mx)
let nys = ys |> Vector.map (fun y -> y - my)

[points "NXY" "red" (nxs, nys)] 
|> Chart.Plot 
|> Chart.WithWidth 700 
|> Chart.WithHeight 500 
|> Chart.WithTitle "Normalized"


Out[5]:

In [6]:
// Compute the covariance matrix

let cxx = Statistics.Covariance(nxs, nxs)
let cyy = Statistics.Covariance(nys, nys)
let cxy = Statistics.Covariance(nxs, nys)
let cyx = Statistics.Covariance(nys, nxs)
let cov = matrix [[cxx; cxy]; [cyx; cyy]]

printfn "cov=%O" cov


cov=DenseMatrix 2x2-Double
0.616556  0.615444
0.615444  0.716556

Out[6]:

In [7]:
// Compute the Eigen values and vectors
let evd = cov.Evd()

let evals = evd.EigenValues |> Vector.map (fun x -> x.Real)
let evecs = evd.EigenVectors

printfn "evals=%O" evals
printfn "evecs=%O" evecs


evals=DenseVector 2-Double
0.0490834
  1.28403

evecs=DenseMatrix 2x2-Double
-0.735179  0.677873
 0.677873  0.735179

Out[7]:

In [8]:
// Compute the principal components (the ones with lasger eigen values)
let idx = if evals.[0] < evals.[1] then 1 else 0

printfn "idx=%d" idx


idx=1
Out[8]:

In [9]:
// Compute the weight.
let fvec = evecs.Row(idx)
    
printfn "X=%.4f" fvec.[0]
printfn "Y=%.4f" fvec.[1]

let w = fvec.[1] / fvec.[0]
printfn "w=%.4f" w


X=0.6779
Y=0.7352
w=1.0845
Out[9]:

In [10]:
// Plot the principal line
let pxs = [-2.; -1.; 0.; 1.; 2.]
let pys = pxs |> List.map (fun x -> x * w)

let trace1 = line "pca" "blue" (pxs, pys)
let trace2 = points "nxy" "red" (nxs, nys)

[trace1; trace2]
|> Chart.Plot
|> Chart.WithWidth 700
|> Chart.WithHeight 500
|> Chart.WithTitle "PCA"


Out[10]:

In [11]:
// Project the points to the PCA
let ndt = matrix [nxs; nys]
let res = evecs * ndt 

printfn "xs\n%O" res

let rxs = res.Row(idx)
let rys = res.Row(0)

let trace2 = points "rxy" "red" (rxs, rys)

[trace2]
|> Chart.Plot
|> Chart.WithWidth 700
|> Chart.WithHeight 500
|> Chart.WithTitle "PCA"


xs
DenseMatrix 2x10-Double
-0.175115  0.142857  0.384375  0.130417  -0.209498  ..  0.0177646  -0.162675
  0.82797  -1.77758  0.992197   0.27421     1.6758  ..  -0.438046   -1.22382

Out[11]:

In [12]:
let fxs = rxs |> Vector.toList
let fys = fxs |> List.map (fun _ -> 0.)

let trace2 = points "fxy" "red" (fxs, fys)

[trace2]
|> Chart.Plot
|> Chart.WithWidth 700
|> Chart.WithHeight 500
|> Chart.WithTitle "PCA"


Out[12]:

In [ ]: