``````

In [1]:

from __future__ import division
import numpy as np

``````
``````

In [2]:

# You know we'll want to plot this stuff
%matplotlib inline
import matplotlib.pyplot as plt
import prettyplotlib as ppl

``````

The following is "what's important" for the gaussians (we don't actually care if things sum to one...)

``````

In [3]:

# pdf == "Probability Density Function"
def lazy_normal_pdf(x, means, covs):
# This only does a conversion if it's not already an array
x = np.asarray(x)
means = np.asarray(means)
covs = np.asarray(covs)

inv_covs = np.linalg.inv(covs)
# Note - no transposition is necessary in numpy
# And I don't care about all them normalizing constants for now...
return np.exp(-(x - means).dot(inv_covs).dot(x - means))

``````
``````

In [ ]:

means = [1, 2]
covs = [[1, 0], [0, .75]]
lazy_normal_pdf([1, 2], means, covs)

``````
``````

In [ ]:

lazy_normal_pdf([2,2], means, covs)

``````
``````

In [ ]:

lazy_normal_pdf([1,1], means, covs)

``````
``````

In [ ]:

# You can have a full-rank matrix
covs = [[1, 0.5], [0.5, .75]]
lazy_normal_pdf([1,1], means, covs)

``````

Maybe you'd prefer a class?

``````

In [ ]:

class LazyNormal:
def __init__(self, means, covs):
self.means = np.asarray(means)
covs = np.asarray(covs)
# This means we'll be a little more efficient
self.inv_covs = np.linalg.inv(covs)

def pdf(self, x):
x = np.asarray(x)
return np.exp(-(x - means).dot(self.inv_covs).dot(x - means))

``````

Here we create a specific, parameterized distribution

``````

In [ ]:

ln = LazyNormal(means, covs)

``````
``````

In [ ]:

ln.pdf([1, 2])

``````
``````

In [ ]:

ln.pdf([1, 1])

``````
``````

In [ ]:

ln.pdf([2, 2])

``````
``````

In [ ]:

ln.inv_covs

``````

so, what do these multivariate gaussians look like?

``````

In [ ]:

# This is one way to make some clusters...
from sklearn import cluster, datasets
coords, labels = datasets.make_blobs()

``````
``````

In [ ]:

ppl.scatter(coords[:,0], coords[:,1], c=labels)

``````

How would we make distributions with these covariance matrices?

## Built in facilities for working with multivariate normal

First - the pdf. What's the probability of finding this point given certain parameters?

``````

In [4]:

from scipy.stats import multivariate_normal as mn

``````
``````

In [5]:

means = [1, 2]
covs = [[1, 0],
[0, .75]]
lazy_normal_pdf([1, 2], means, covs)

``````
``````

Out[5]:

1.0

``````
``````

In [6]:

mn.pdf([1,2], means, covs)

``````
``````

Out[6]:

0.1837762984739307

``````
``````

In [7]:

mn.pdf([2,2], means, covs)

``````
``````

Out[7]:

0.11146595955293902

``````
``````

In [8]:

mn.pdf([1,3], means, covs)

``````
``````

Out[8]:

0.094353897708959245

``````

How is this implemented?

``````

In [ ]:

mn.pdf??

``````
``````

In [ ]:

mn._logpdf??

``````

Now, generating random values

``````

In [9]:

examples = mn.rvs(means, covs, 1000)
ppl.scatter(examples[:,0], examples[:,1])
plt.axis('equal')

``````
``````

Out[9]:

(-3.0, 5.0, -1.0, 6.0)

``````
``````

In [11]:

covs = [[1,  .5],
[.5, 0.5]]
examples = mn.rvs(means, covs, 1000)
ppl.scatter(examples[:,0], examples[:,1])
plt.axis('equal')

``````
``````

Out[11]:

(-3.0, 5.0, -1.0, 5.0)

``````
``````

In [18]:

# what about truly one-dimensional data?
means = [1, 2]
covs = [[1, .5], [.5, 0]]
examples = mn.rvs(means, covs, 1000)
ppl.scatter(examples[:,0], examples[:,1])

``````
``````

/opt/anaconda/envs/ML/lib/python2.7/site-packages/scipy/stats/_multivariate.py:406: RuntimeWarning: covariance is not positive-semidefinite.
out = np.random.multivariate_normal(mean, cov, size)

Out[18]:

<matplotlib.axes.AxesSubplot at 0x1098c29d0>

``````
``````

In [19]:

basic_pca = PCA()
transformed = basic_pca.fit_transform(examples)
transformed[:,1]

``````
``````

Out[19]:

array([ -4.66914867e-15,  -2.86945052e-15,  -3.06693639e-15,
-3.23297172e-15,  -3.00618797e-15,  -3.22431901e-15,
-3.00818550e-15,  -3.06000705e-15,  -3.14091949e-15,
-2.84861149e-15,  -3.03080174e-15,  -3.06377876e-15,
-2.91783092e-15,  -3.14548653e-15,  -3.13335746e-15,
-2.98797243e-15,  -3.02208435e-15,  -3.21387165e-15,
-3.05456627e-15,  -3.07407459e-15,  -3.23505808e-15,
-3.02410984e-15,  -3.24379384e-15,  -3.22142196e-15,
-3.19493870e-15,  -3.20321211e-15,  -2.92812755e-15,
-3.28104813e-15,  -3.02578112e-15,  -2.90462183e-15,
-3.04882740e-15,  -3.21336251e-15,  -3.27722685e-15,
-2.98988397e-15,  -3.15259881e-15,  -3.01791402e-15,
-3.04133122e-15,  -2.83162997e-15,  -3.00141807e-15,
-3.11736352e-15,  -3.03327681e-15,  -3.16616592e-15,
-3.15357799e-15,  -2.98582672e-15,  -3.23546984e-15,
-3.03632026e-15,  -3.20259833e-15,  -3.11044394e-15,
-3.06869399e-15,  -3.14275149e-15,  -3.12865734e-15,
-3.03502681e-15,  -2.98561992e-15,  -3.21108845e-15,
-3.05884887e-15,  -3.15288367e-15,  -3.01282162e-15,
-3.03056688e-15,  -3.14527007e-15,  -3.06967559e-15,
-2.97779048e-15,  -2.90777298e-15,  -3.16748775e-15,
-2.94315307e-15,  -3.07463691e-15,  -2.91948485e-15,
-3.26619507e-15,  -2.95124199e-15,  -3.12201566e-15,
-3.10420313e-15,  -3.21190023e-15,  -2.94912122e-15,
-3.06413681e-15,  -3.08535702e-15,  -2.83975837e-15,
-3.06475234e-15,  -3.11991935e-15,  -3.07510631e-15,
-3.19010509e-15,  -3.08539682e-15,  -3.18693644e-15,
-3.25762507e-15,  -3.29085847e-15,  -2.95282277e-15,
-3.08663691e-15,  -2.89025626e-15,  -2.87974093e-15,
-3.15161478e-15,  -2.95139634e-15,  -3.05483417e-15,
-2.97701280e-15,  -3.08947193e-15,  -2.99184891e-15,
-3.00856459e-15,  -3.07632176e-15,  -3.12684479e-15,
-3.28119580e-15,  -2.89070597e-15,  -3.03175668e-15,
-3.09719572e-15,  -3.23539013e-15,  -3.00857357e-15,
-3.13054441e-15,  -3.06165919e-15,  -3.17327197e-15,
-3.12509435e-15,  -3.10319153e-15,  -3.00772493e-15,
-3.22275429e-15,  -3.20242807e-15,  -3.08116758e-15,
-3.11566399e-15,  -2.89296797e-15,  -3.16825874e-15,
-3.13252146e-15,  -3.14641485e-15,  -3.06194411e-15,
-3.17938177e-15,  -3.28733911e-15,  -3.19801360e-15,
-3.18556956e-15,  -3.17592883e-15,  -3.07211613e-15,
-3.11532352e-15,  -2.85593893e-15,  -3.00491611e-15,
-3.05685075e-15,  -2.92611216e-15,  -2.94845104e-15,
-3.08721635e-15,  -2.94174282e-15,  -3.00202167e-15,
-3.08871043e-15,  -3.06254272e-15,  -3.31344259e-15,
-3.11514231e-15,  -3.07435466e-15,  -3.29631218e-15,
-3.12971598e-15,  -3.00939767e-15,  -3.13677165e-15,
-3.00287024e-15,  -3.09300721e-15,  -3.00163024e-15,
-3.09635917e-15,  -3.11454744e-15,  -3.16943549e-15,
-3.05897652e-15,  -3.16754665e-15,  -3.18944008e-15,
-2.86810570e-15,  -2.89420197e-15,  -3.06765438e-15,
-3.10107574e-15,  -3.29399262e-15,  -3.15338577e-15,
-3.26917351e-15,  -3.12152044e-15,  -3.13546639e-15,
-3.15745706e-15,  -2.97292625e-15,  -3.14181966e-15,
-3.09756557e-15,  -3.28431601e-15,  -3.08856170e-15,
-3.20077667e-15,  -3.14108934e-15,  -3.07719068e-15,
-3.03872734e-15,  -3.08115410e-15,  -3.18531806e-15,
-3.16725376e-15,  -3.07096622e-15,  -3.14181399e-15,
-2.86071061e-15,  -3.14498433e-15,  -3.08634206e-15,
-3.12359615e-15,  -3.03551703e-15,  -2.82903003e-15,
-3.06299451e-15,  -3.17531691e-15,  -3.04892073e-15,
-2.88318668e-15,  -3.14955733e-15,  -3.17648223e-15,
-2.97148979e-15,  -3.10776065e-15,  -3.37470508e-15,
-3.15009233e-15,  -3.10465425e-15,  -2.99479887e-15,
-3.17660432e-15,  -3.16115926e-15,  -3.04968615e-15,
-3.38319310e-15,  -3.18302037e-15,  -3.18062029e-15,
-3.18831685e-15,  -3.12350443e-15,  -3.16835523e-15,
-2.80098169e-15,  -2.85030047e-15,  -3.07661182e-15,
-3.09309520e-15,  -3.29930123e-15,  -3.13115225e-15,
-3.01458072e-15,  -2.92094313e-15,  -2.91657654e-15,
-2.97417829e-15,  -2.92158194e-15,  -3.04309553e-15,
-3.01286008e-15,  -3.02981615e-15,  -3.13218388e-15,
-3.16141166e-15,  -3.05747067e-15,  -3.23452183e-15,
-3.05660116e-15,  -3.15711049e-15,  -2.93928604e-15,
-2.94777392e-15,  -3.26828786e-15,  -3.09652537e-15,
-2.99453553e-15,  -2.99532995e-15,  -2.99076965e-15,
-3.06269942e-15,  -3.00545003e-15,  -3.09973296e-15,
-2.98693099e-15,  -3.15632062e-15,  -3.28894864e-15,
-3.23448055e-15,  -3.06479522e-15,  -3.04674573e-15,
-2.99784913e-15,  -3.02075530e-15,  -3.19097625e-15,
-3.24998869e-15,  -3.15723989e-15,  -3.10601341e-15,
-2.83499047e-15,  -3.25771394e-15,  -3.01814145e-15,
-3.01391239e-15,  -3.28937871e-15,  -3.01340995e-15,
-3.01166975e-15,  -3.00694567e-15,  -2.99110421e-15,
-3.01350487e-15,  -3.05381224e-15,  -3.21401462e-15,
-2.94270034e-15,  -3.07388562e-15,  -3.16461806e-15,
-3.12956727e-15,  -3.02950768e-15,  -3.30215280e-15,
-3.02279985e-15,  -3.24506830e-15,  -2.97611828e-15,
-2.95690239e-15,  -3.18495047e-15,  -3.02752042e-15,
-3.14199779e-15,  -2.99138672e-15,  -3.12216869e-15,
-3.19349652e-15,  -3.10440644e-15,  -3.19500936e-15,
-2.98357711e-15,  -3.09209554e-15,  -3.16700759e-15,
-3.07490242e-15,  -3.10650885e-15,  -3.05554552e-15,
-3.03459121e-15,  -3.20921495e-15,  -3.01616949e-15,
-3.15827213e-15,  -3.04213424e-15,  -3.06759437e-15,
-3.00860787e-15,  -3.06702226e-15,  -2.98604808e-15,
-2.93924578e-15,  -3.14588127e-15,  -3.05750691e-15,
-3.13853451e-15,  -3.01409038e-15,  -3.12417176e-15,
-3.14513375e-15,  -3.09173003e-15,  -3.04756469e-15,
-3.04445504e-15,  -3.30450496e-15,  -3.01684920e-15,
-2.87403737e-15,  -3.09791948e-15,  -3.20664206e-15,
-3.25797328e-15,  -2.99308476e-15,  -3.07308720e-15,
-3.11681844e-15,  -2.92038747e-15,  -3.05702521e-15,
-3.10981156e-15,  -3.18899495e-15,  -3.08799660e-15,
-3.13212319e-15,  -2.91458385e-15,  -3.01489710e-15,
-3.06713972e-15,  -3.09830045e-15,  -3.15932985e-15,
-3.10664150e-15,  -3.20915108e-15,  -3.15845348e-15,
-3.34297966e-15,  -3.04735246e-15,  -3.22208577e-15,
-2.96142925e-15,  -3.16074191e-15,  -2.91707430e-15,
-3.08456209e-15,  -2.98240370e-15,  -3.28837382e-15,
-3.09860612e-15,  -3.15503836e-15,  -3.22703227e-15,
-3.33285741e-15,  -3.16300386e-15,  -3.02621263e-15,
-2.87765993e-15,  -3.48000889e-15,  -3.05268800e-15,
-3.07098043e-15,  -3.13881176e-15,  -3.14738400e-15,
-3.00369418e-15,  -3.12841700e-15,  -2.99044030e-15,
-3.16323830e-15,  -3.14765149e-15,  -3.04469847e-15,
-3.17179807e-15,  -2.91738249e-15,  -3.39936947e-15,
-3.13103759e-15,  -3.16770964e-15,  -3.05662115e-15,
-3.18150406e-15,  -3.05633199e-15,  -3.08259863e-15,
-2.94256087e-15,  -3.29480346e-15,  -2.96831759e-15,
-3.04746608e-15,  -3.03959837e-15,  -3.16991294e-15,
-3.11445039e-15,  -3.20683296e-15,  -3.09983319e-15,
-2.99647520e-15,  -3.10099782e-15,  -3.07153306e-15,
-3.23529477e-15,  -3.00493164e-15,  -2.95130634e-15,
-3.08081916e-15,  -3.05938250e-15,  -3.07121647e-15,
-3.01570065e-15,  -3.05287174e-15,  -3.29511842e-15,
-3.15499331e-15,  -3.08686897e-15,  -3.13401664e-15,
-3.13673113e-15,  -2.95150442e-15,  -3.17019376e-15,
-3.06862875e-15,  -3.26068643e-15,  -3.07918499e-15,
-3.09881834e-15,  -3.06785487e-15,  -3.09152956e-15,
-3.22466590e-15,  -2.86329113e-15,  -3.02294613e-15,
-3.13267874e-15,  -3.07245379e-15,  -2.83568869e-15,
-3.16240925e-15,  -3.22921996e-15,  -3.14721420e-15,
-3.07642819e-15,  -3.00819137e-15,  -3.09606702e-15,
-3.08159428e-15,  -3.05519191e-15,  -3.26964266e-15,
-3.15177461e-15,  -3.11971644e-15,  -3.20396321e-15,
-3.02351332e-15,  -3.04632379e-15,  -3.13370044e-15,
-2.89628366e-15,  -3.13782269e-15,  -3.11056932e-15,
-2.89701954e-15,  -3.09339072e-15,  -3.13091307e-15,
-3.04831258e-15,  -3.21646898e-15,  -3.10904715e-15,
-2.97609244e-15,  -3.22164347e-15,  -3.09490747e-15,
-3.48635909e-15,  -3.05128679e-15,  -3.11231015e-15,
-3.05777908e-15,  -3.14789462e-15,  -3.09412738e-15,
-3.01911218e-15,  -3.12282811e-15,  -3.14828967e-15,
-2.91377822e-15,  -3.15740557e-15,  -3.11272263e-15,
-3.08355258e-15,  -3.07093583e-15,  -3.11866189e-15,
-3.00196143e-15,  -3.07165100e-15,  -3.19562771e-15,
-3.00258976e-15,  -3.04296774e-15,  -3.14255089e-15,
-3.03942040e-15,  -3.06623320e-15,  -3.16115609e-15,
-3.08681974e-15,  -3.13869866e-15,  -2.97293263e-15,
-2.94912630e-15,  -3.12947165e-15,  -3.40496156e-15,
-2.98638219e-15,  -3.26014944e-15,  -2.91915165e-15,
-3.14109130e-15,  -3.06833180e-15,  -3.06044590e-15,
-3.27653861e-15,  -2.93447016e-15,  -3.02057569e-15,
-3.16990866e-15,  -3.11435910e-15,  -3.12422212e-15,
-3.02477237e-15,  -3.04498530e-15,  -3.24900294e-15,
-3.14626438e-15,  -3.13128223e-15,  -3.14559436e-15,
-3.00149060e-15,  -2.98508917e-15,  -3.05293200e-15,
-3.06474250e-15,  -2.99690786e-15,  -3.01215600e-15,
-2.95086721e-15,  -3.10554632e-15,  -3.07897550e-15,
-3.06093071e-15,  -3.13911043e-15,  -3.02179861e-15,
-3.11287183e-15,  -3.04132186e-15,  -3.13689525e-15,
-2.98898107e-15,  -3.11689892e-15,  -3.08903880e-15,
-3.14494732e-15,  -3.07735456e-15,  -3.03446047e-15,
-3.02292800e-15,  -2.85563413e-15,  -3.05025356e-15,
-2.98902121e-15,  -3.08563925e-15,  -3.11701576e-15,
-2.84436342e-15,  -2.91989400e-15,  -3.13362124e-15,
-3.13991108e-15,  -3.07420625e-15,  -3.17314209e-15,
-3.18812657e-15,  -3.11359591e-15,  -3.08825310e-15,
-3.11047414e-15,  -3.32665287e-15,  -3.04715443e-15,
-3.01300395e-15,  -2.97630714e-15,  -3.01603804e-15,
-2.98993500e-15,  -2.98232760e-15,  -3.04953327e-15,
-3.21356092e-15,  -3.11380924e-15,  -3.15686341e-15,
-3.23588917e-15,  -3.14169563e-15,  -3.06709575e-15,
-3.13679726e-15,  -3.06183666e-15,  -3.08235299e-15,
-2.97726579e-15,  -2.99791854e-15,  -3.02429520e-15,
-3.20319753e-15,  -3.21403809e-15,  -3.05332617e-15,
-3.14761198e-15,  -2.86102020e-15,  -2.91666366e-15,
-2.92207237e-15,  -3.26904504e-15,  -3.22167582e-15,
-3.02953264e-15,  -2.91109177e-15,  -2.99191849e-15,
-3.00725750e-15,  -3.16689129e-15,  -3.08798043e-15,
-3.01907568e-15,  -3.21635591e-15,  -3.17384185e-15,
-3.01866806e-15,  -3.17041045e-15,  -3.00106538e-15,
-2.97018782e-15,  -3.11360893e-15,  -3.12990469e-15,
-3.08610377e-15,  -3.32257094e-15,  -3.13775856e-15,
-3.06556641e-15,  -3.18580731e-15,  -3.13111310e-15,
-3.21902017e-15,  -2.88788062e-15,  -3.21858395e-15,
-3.06583155e-15,  -3.01331447e-15,  -2.93225234e-15,
-3.14630505e-15,  -2.93065375e-15,  -3.04942144e-15,
-3.13152187e-15,  -2.98121829e-15,  -3.18247200e-15,
-3.17843454e-15,  -3.03905757e-15,  -3.14580572e-15,
-3.16474452e-15,  -3.07712453e-15,  -3.25698295e-15,
-3.22581252e-15,  -3.15641123e-15,  -3.07861425e-15,
-3.08201825e-15,  -3.31444070e-15,  -2.94485156e-15,
-3.02875912e-15,  -3.06940716e-15,  -2.95327299e-15,
-3.15477708e-15,  -3.01919662e-15,  -3.19033309e-15,
-3.04391764e-15,  -3.12945179e-15,  -3.06797763e-15,
-3.07221902e-15,  -3.17825667e-15,  -2.97721928e-15,
-3.18837734e-15,  -3.08179956e-15,  -3.24607632e-15,
-2.93987881e-15,  -3.05882684e-15,  -3.18579108e-15,
-2.95671415e-15,  -3.20162199e-15,  -3.17402599e-15,
-3.16306907e-15,  -3.06880334e-15,  -2.96304329e-15,
-2.90827245e-15,  -3.11322643e-15,  -3.07274118e-15,
-3.18764641e-15,  -3.06710018e-15,  -3.11254101e-15,
-2.96995932e-15,  -3.10585106e-15,  -2.93237760e-15,
-3.05763925e-15,  -3.26809863e-15,  -3.17765712e-15,
-2.86361226e-15,  -3.03225971e-15,  -3.00091479e-15,
-3.10606495e-15,  -3.09882265e-15,  -3.15678692e-15,
-3.09762026e-15,  -2.97686498e-15,  -2.96486699e-15,
-2.95249463e-15,  -3.11791444e-15,  -3.26345090e-15,
-3.08795919e-15,  -3.12829105e-15,  -3.18458394e-15,
-3.15722041e-15,  -2.89384365e-15,  -3.03330051e-15,
-3.22932379e-15,  -3.01477788e-15,  -3.21274453e-15,
-3.37022997e-15,  -3.18282373e-15,  -3.27404825e-15,
-3.05091732e-15,  -2.79300279e-15,  -3.24426165e-15,
-3.07108512e-15,  -2.94338117e-15,  -3.15999075e-15,
-2.89827902e-15,  -3.10309659e-15,  -2.88216500e-15,
-3.17141883e-15,  -3.09982128e-15,  -3.02096389e-15,
-3.08854749e-15,  -3.20566115e-15,  -3.33018320e-15,
-3.23118446e-15,  -3.09739879e-15,  -2.96575689e-15,
-3.02906540e-15,  -3.12416931e-15,  -2.99431971e-15,
-3.09773056e-15,  -3.27903659e-15,  -3.02563052e-15,
-3.31398915e-15,  -3.09034014e-15,  -3.17237082e-15,
-3.03520852e-15,  -3.08365956e-15,  -3.07735276e-15,
-3.06588114e-15,  -3.12286176e-15,  -3.00105998e-15,
-3.04551023e-15,  -3.14697237e-15,  -3.30654186e-15,
-3.05367626e-15,  -3.15235455e-15,  -3.09652062e-15,
-3.17651855e-15,  -2.97930560e-15,  -3.11957630e-15,
-3.07104573e-15,  -3.07436170e-15,  -3.05919393e-15,
-2.96454302e-15,  -3.00549225e-15,  -3.02097708e-15,
-3.21392382e-15,  -3.06147001e-15,  -3.05795356e-15,
-3.20331894e-15,  -3.10589263e-15,  -2.90625388e-15,
-3.10455579e-15,  -2.86905610e-15,  -3.25326235e-15,
-3.11756795e-15,  -3.25737214e-15,  -3.14049234e-15,
-3.07174757e-15,  -2.99486012e-15,  -2.86116476e-15,
-3.05067222e-15,  -3.16760490e-15,  -3.04113382e-15,
-3.22870406e-15,  -3.08781733e-15,  -3.01015037e-15,
-3.18281879e-15,  -3.16719523e-15,  -2.92453756e-15,
-3.15376639e-15,  -3.09990655e-15,  -3.15187798e-15,
-3.15262604e-15,  -3.24771348e-15,  -3.28566315e-15,
-3.15683014e-15,  -3.14947162e-15,  -3.19947714e-15,
-3.17629317e-15,  -3.18270561e-15,  -3.19907287e-15,
-3.15919614e-15,  -3.02125109e-15,  -3.07979814e-15,
-3.03523488e-15,  -2.95143319e-15,  -3.04651829e-15,
-2.98604247e-15,  -3.12025491e-15,  -2.84576061e-15,
-3.13569426e-15,  -3.34318683e-15,  -3.08022350e-15,
-2.86914810e-15,  -3.15531145e-15,  -3.11326486e-15,
-3.06914972e-15,  -3.06375249e-15,  -2.93009266e-15,
-2.89203331e-15,  -2.96379726e-15,  -3.17252528e-15,
-3.12716274e-15,  -3.13821012e-15,  -3.05451609e-15,
-3.04040489e-15,  -3.08221559e-15,  -2.88751950e-15,
-3.00719001e-15,  -2.88558820e-15,  -2.96864904e-15,
-3.05184558e-15,  -2.99811734e-15,  -2.92872856e-15,
-3.22903125e-15,  -3.11339951e-15,  -3.22449980e-15,
-3.34735341e-15,  -3.03973492e-15,  -2.94942590e-15,
-3.05679977e-15,  -3.00086733e-15,  -3.14066375e-15,
-3.20988847e-15,  -3.09118657e-15,  -3.00611857e-15,
-2.96748822e-15,  -2.92212458e-15,  -3.24280209e-15,
-3.14481949e-15,  -3.10587143e-15,  -2.96580252e-15,
-3.00184644e-15,  -3.02205139e-15,  -3.04209866e-15,
-3.17682266e-15,  -3.19970706e-15,  -3.14141950e-15,
-3.27896936e-15,  -3.03008423e-15,  -3.12215859e-15,
-3.23222598e-15,  -3.17102100e-15,  -3.09398678e-15,
-2.82910577e-15,  -3.29274263e-15,  -2.97647588e-15,
-3.04539073e-15,  -3.16800900e-15,  -3.15398542e-15,
-3.03363930e-15,  -3.21641022e-15,  -3.20047084e-15,
-3.12869594e-15,  -3.01823651e-15,  -2.96690131e-15,
-3.05426933e-15,  -3.11940490e-15,  -2.94540496e-15,
-3.19187907e-15,  -3.18676988e-15,  -3.17780975e-15,
-2.97435437e-15,  -2.87090222e-15,  -3.07938900e-15,
-3.21643493e-15,  -3.12799855e-15,  -2.99081599e-15,
-3.40431243e-15,  -3.09438332e-15,  -3.00133121e-15,
-3.22541202e-15,  -3.12285902e-15,  -3.09491736e-15,
-3.14172999e-15,  -3.04146648e-15,  -2.98455121e-15,
-3.11852052e-15,  -2.98138111e-15,  -3.22790170e-15,
-3.09158955e-15,  -3.00765412e-15,  -3.01363603e-15,
-3.23649586e-15,  -3.04874280e-15,  -2.96326761e-15,
-3.10363763e-15,  -3.27121357e-15,  -3.05046621e-15,
-3.15021815e-15,  -3.31612281e-15,  -3.19997058e-15,
-3.28221699e-15,  -3.15858034e-15,  -3.05530193e-15,
-3.13218244e-15,  -3.04847053e-15,  -3.04736499e-15,
-3.02517380e-15,  -3.03929168e-15,  -3.09761736e-15,
-3.11078663e-15,  -3.09408052e-15,  -3.10944939e-15,
-3.14508499e-15,  -3.00168453e-15,  -2.99894704e-15,
-3.16797341e-15,  -3.12890046e-15,  -3.14879379e-15,
-3.02608926e-15,  -3.04493359e-15,  -2.81890271e-15,
-3.22478061e-15,  -2.98906826e-15,  -2.90395459e-15,
-3.06478996e-15,  -2.99974975e-15,  -3.03271133e-15,
-2.98675847e-15,  -3.31188082e-15,  -3.35152951e-15,
-3.10881772e-15,  -3.14049458e-15,  -3.22502470e-15,
-3.19152899e-15,  -3.10680470e-15,  -2.94541738e-15,
-2.93021258e-15,  -3.25246951e-15,  -2.99130599e-15,
-3.16680177e-15,  -2.95567337e-15,  -3.17710900e-15,
-3.03461729e-15,  -2.93682320e-15,  -3.14108561e-15,
-3.23426607e-15,  -3.10995862e-15,  -3.03164403e-15,
-2.91328691e-15,  -2.92198067e-15,  -3.11786321e-15,
-3.10652264e-15,  -3.28234045e-15,  -3.15126143e-15,
-3.11958887e-15,  -3.08322359e-15,  -3.28274742e-15,
-2.88735846e-15,  -3.09030785e-15,  -2.96413909e-15,
-3.19089568e-15,  -3.13597752e-15,  -2.87163097e-15,
-3.18900539e-15,  -3.12385159e-15,  -3.00907129e-15,
-3.03427099e-15,  -3.11621215e-15,  -3.18953051e-15,
-3.12245765e-15,  -3.13245121e-15,  -3.30754366e-15,
-3.22175638e-15,  -3.00734873e-15,  -2.90595519e-15,
-3.24180649e-15,  -3.07649638e-15,  -3.02882168e-15,
-3.17965710e-15,  -3.02491181e-15,  -3.06035283e-15,
-3.08184216e-15,  -3.13974860e-15,  -2.95592732e-15,
-3.19289449e-15,  -3.01864437e-15,  -3.27824040e-15,
-3.17588276e-15,  -3.01967635e-15,  -3.00016924e-15,
-3.05279213e-15,  -2.98398852e-15,  -3.21212825e-15,
-2.87948825e-15,  -3.06549326e-15,  -3.06517632e-15,
-3.16757786e-15,  -3.17419557e-15,  -3.10551440e-15,
-3.09957943e-15,  -3.06217454e-15,  -3.08624267e-15,
-3.05507874e-15,  -3.06430561e-15,  -3.21416208e-15,
-2.97723491e-15,  -2.94508887e-15,  -2.99988946e-15,
-2.96651729e-15,  -3.08798815e-15,  -3.17197965e-15,
-3.10473305e-15,  -3.31133821e-15,  -3.09629689e-15,
-3.28739554e-15,  -3.03890738e-15,  -3.29411560e-15,
-3.25808893e-15,  -3.01262428e-15,  -3.08657938e-15,
-3.06715920e-15,  -3.12601369e-15,  -3.02643418e-15,
-3.26229046e-15,  -3.07422003e-15,  -3.05312986e-15,
-3.17495682e-15,  -3.20440216e-15,  -3.17552893e-15,
-3.04130964e-15,  -2.93569540e-15,  -3.04914670e-15,
-3.07767961e-15,  -3.14510150e-15,  -3.09349422e-15,
-3.25941813e-15,  -3.18590891e-15,  -3.11553625e-15,
-3.24229958e-15,  -3.26827295e-15,  -2.96902738e-15,
-3.15400097e-15,  -3.13847921e-15,  -3.03834017e-15,
-3.10236631e-15,  -2.88903472e-15,  -3.06622225e-15,
-2.92935184e-15,  -3.12110572e-15,  -3.05539234e-15,
-3.24691834e-15,  -3.15187200e-15,  -2.94430679e-15,
-3.14623973e-15,  -3.11710904e-15,  -3.12711077e-15,
-3.21706430e-15,  -2.98278572e-15,  -3.07374042e-15,
-2.95233197e-15,  -3.04262903e-15,  -3.00055851e-15,
-3.16846765e-15])

``````
``````

In [20]:

basic_pca.explained_variance_ratio_

``````
``````

Out[20]:

array([  1.00000000e+00,   7.33245551e-30])

``````

## What does PCA do to these multivariate normal distributions?

``````

In [12]:

from sklearn.decomposition import PCA
PCA??

``````
``````

In [13]:

basic_pca = PCA()
transformed = basic_pca.fit_transform(examples)

``````
``````

In [14]:

ppl.scatter(transformed[:,0], transformed[:,1])
plt.axis('equal')

``````
``````

Out[14]:

(-4.0, 5.0, -2.0, 2.0)

``````
``````

In [15]:

np.cov(transformed, rowvar=False)

``````
``````

Out[15]:

array([[  1.33872176e+00,  -8.95735493e-17],
[ -8.95735493e-17,   2.03924160e-01]])

``````

What if we whiten the PCA components?

``````

In [16]:

whitened_pca = PCA(whiten=True)
transformed = whitened_pca.fit_transform(examples)
ppl.scatter(transformed[:,0], transformed[:,1])
plt.axis('equal')

``````
``````

Out[16]:

(-4.0, 4.0, -4.0, 4.0)

``````
``````

In [17]:

np.cov(transformed.T)

``````
``````

Out[17]:

array([[  1.00100100e+00,  -1.50252405e-16],
[ -1.50252405e-16,   1.00100100e+00]])

``````

What if we wanted to work with a variety of multivariate normal clusters?

``````

In [21]:

# But maybe we'd like some variety covariance matrices?
means = [np.array([-2, -5]), np.array([0, 1]), np.array([-3,1])]
gen_covs = [np.array([[1, .5], [.5, .75]]),
np.array([[1, -.5], [-.5, .75]]),
np.array([[1, 0], [0, .75]])]

``````
``````

In [22]:

examples = []
for m, c in zip(means, gen_covs):
examples.append(mn.rvs(m, c, 1000))

``````
``````

In [23]:

for e in examples:
ppl.scatter(e[:,0], e[:,1])
plt.axis('equal')

``````
``````

Out[23]:

(-8.0, 4.0, -10.0, 6.0)

``````
``````

In [24]:

combined = np.concatenate(examples)

``````
``````

In [26]:

pca = PCA()
transformed = pca.fit_transform(combined)

``````
``````

In [27]:

ppl.scatter(transformed[:,0], transformed[:,1])
plt.axis('equal')

``````
``````

Out[27]:

(-8.0, 6.0, -6.0, 6.0)

``````
``````

In [28]:

pca1 = PCA(n_components=1)
transformed = pca1.fit_transform(combined)

``````
``````

In [29]:

transformed.shape

``````
``````

Out[29]:

(3000, 1)

``````
``````

In [31]:

ppl.scatter(transformed, np.zeros_like(transformed))

``````
``````

Out[31]:

<matplotlib.axes.AxesSubplot at 0x10a23b210>

``````
``````

In [35]:

pca.components_[0].dot(<something>)

``````
``````

Out[35]:

array([ 0.09794438,  0.99519189])

``````

# Here is a place to play with GMM

``````

In [ ]:

``````