In [1]:
from __future__ import division
import numpy as np

In [2]:
# You know we'll want to plot this stuff
%matplotlib inline
import matplotlib.pyplot as plt
import prettyplotlib as ppl

The following is "what's important" for the gaussians (we don't actually care if things sum to one...)


In [3]:
# pdf == "Probability Density Function"
def lazy_normal_pdf(x, means, covs):
    # This only does a conversion if it's not already an array
    x = np.asarray(x)
    means = np.asarray(means)
    covs = np.asarray(covs)
    
    inv_covs = np.linalg.inv(covs)
    # Note - no transposition is necessary in numpy
    # And I don't care about all them normalizing constants for now...
    return np.exp(-(x - means).dot(inv_covs).dot(x - means))

In [ ]:
means = [1, 2]
covs = [[1, 0], [0, .75]]
lazy_normal_pdf([1, 2], means, covs)

In [ ]:
lazy_normal_pdf([2,2], means, covs)

In [ ]:
lazy_normal_pdf([1,1], means, covs)

In [ ]:
# You can have a full-rank matrix
covs = [[1, 0.5], [0.5, .75]]
lazy_normal_pdf([1,1], means, covs)

Maybe you'd prefer a class?


In [ ]:
class LazyNormal:
    def __init__(self, means, covs):
        self.means = np.asarray(means)
        covs = np.asarray(covs)
        # This means we'll be a little more efficient
        self.inv_covs = np.linalg.inv(covs)
        
    def pdf(self, x):
        x = np.asarray(x)
        return np.exp(-(x - means).dot(self.inv_covs).dot(x - means))

Here we create a specific, parameterized distribution


In [ ]:
ln = LazyNormal(means, covs)

In [ ]:
ln.pdf([1, 2])

In [ ]:
ln.pdf([1, 1])

In [ ]:
ln.pdf([2, 2])

In [ ]:
ln.inv_covs

so, what do these multivariate gaussians look like?


In [ ]:
# This is one way to make some clusters...
from sklearn import cluster, datasets
coords, labels = datasets.make_blobs()

In [ ]:
ppl.scatter(coords[:,0], coords[:,1], c=labels)

How would we make distributions with these covariance matrices?

Built in facilities for working with multivariate normal

First - the pdf. What's the probability of finding this point given certain parameters?


In [4]:
from scipy.stats import multivariate_normal as mn

In [5]:
means = [1, 2]
covs = [[1, 0], 
        [0, .75]]
lazy_normal_pdf([1, 2], means, covs)


Out[5]:
1.0

In [6]:
mn.pdf([1,2], means, covs)


Out[6]:
0.1837762984739307

In [7]:
mn.pdf([2,2], means, covs)


Out[7]:
0.11146595955293902

In [8]:
mn.pdf([1,3], means, covs)


Out[8]:
0.094353897708959245

How is this implemented?


In [ ]:
mn.pdf??

In [ ]:
mn._logpdf??

Now, generating random values


In [9]:
examples = mn.rvs(means, covs, 1000)
ppl.scatter(examples[:,0], examples[:,1])
plt.axis('equal')


Out[9]:
(-3.0, 5.0, -1.0, 6.0)

In [11]:
# what about off-diagonal covariance?
covs = [[1,  .5], 
        [.5, 0.5]]
examples = mn.rvs(means, covs, 1000)
ppl.scatter(examples[:,0], examples[:,1])
plt.axis('equal')


Out[11]:
(-3.0, 5.0, -1.0, 5.0)

In [18]:
# what about truly one-dimensional data?
means = [1, 2]
covs = [[1, .5], [.5, 0]]
examples = mn.rvs(means, covs, 1000)
ppl.scatter(examples[:,0], examples[:,1])


/opt/anaconda/envs/ML/lib/python2.7/site-packages/scipy/stats/_multivariate.py:406: RuntimeWarning: covariance is not positive-semidefinite.
  out = np.random.multivariate_normal(mean, cov, size)
Out[18]:
<matplotlib.axes.AxesSubplot at 0x1098c29d0>

In [19]:
basic_pca = PCA()
transformed = basic_pca.fit_transform(examples)
transformed[:,1]


Out[19]:
array([ -4.66914867e-15,  -2.86945052e-15,  -3.06693639e-15,
        -3.23297172e-15,  -3.00618797e-15,  -3.22431901e-15,
        -3.00818550e-15,  -3.06000705e-15,  -3.14091949e-15,
        -2.84861149e-15,  -3.03080174e-15,  -3.06377876e-15,
        -2.91783092e-15,  -3.14548653e-15,  -3.13335746e-15,
        -2.98797243e-15,  -3.02208435e-15,  -3.21387165e-15,
        -3.05456627e-15,  -3.07407459e-15,  -3.23505808e-15,
        -3.02410984e-15,  -3.24379384e-15,  -3.22142196e-15,
        -3.19493870e-15,  -3.20321211e-15,  -2.92812755e-15,
        -3.28104813e-15,  -3.02578112e-15,  -2.90462183e-15,
        -3.04882740e-15,  -3.21336251e-15,  -3.27722685e-15,
        -2.98988397e-15,  -3.15259881e-15,  -3.01791402e-15,
        -3.04133122e-15,  -2.83162997e-15,  -3.00141807e-15,
        -3.11736352e-15,  -3.03327681e-15,  -3.16616592e-15,
        -3.15357799e-15,  -2.98582672e-15,  -3.23546984e-15,
        -3.03632026e-15,  -3.20259833e-15,  -3.11044394e-15,
        -3.06869399e-15,  -3.14275149e-15,  -3.12865734e-15,
        -3.03502681e-15,  -2.98561992e-15,  -3.21108845e-15,
        -3.05884887e-15,  -3.15288367e-15,  -3.01282162e-15,
        -3.03056688e-15,  -3.14527007e-15,  -3.06967559e-15,
        -2.97779048e-15,  -2.90777298e-15,  -3.16748775e-15,
        -2.94315307e-15,  -3.07463691e-15,  -2.91948485e-15,
        -3.26619507e-15,  -2.95124199e-15,  -3.12201566e-15,
        -3.10420313e-15,  -3.21190023e-15,  -2.94912122e-15,
        -3.06413681e-15,  -3.08535702e-15,  -2.83975837e-15,
        -3.06475234e-15,  -3.11991935e-15,  -3.07510631e-15,
        -3.19010509e-15,  -3.08539682e-15,  -3.18693644e-15,
        -3.25762507e-15,  -3.29085847e-15,  -2.95282277e-15,
        -3.08663691e-15,  -2.89025626e-15,  -2.87974093e-15,
        -3.15161478e-15,  -2.95139634e-15,  -3.05483417e-15,
        -2.97701280e-15,  -3.08947193e-15,  -2.99184891e-15,
        -3.00856459e-15,  -3.07632176e-15,  -3.12684479e-15,
        -3.28119580e-15,  -2.89070597e-15,  -3.03175668e-15,
        -3.09719572e-15,  -3.23539013e-15,  -3.00857357e-15,
        -3.13054441e-15,  -3.06165919e-15,  -3.17327197e-15,
        -3.12509435e-15,  -3.10319153e-15,  -3.00772493e-15,
        -3.22275429e-15,  -3.20242807e-15,  -3.08116758e-15,
        -3.11566399e-15,  -2.89296797e-15,  -3.16825874e-15,
        -3.13252146e-15,  -3.14641485e-15,  -3.06194411e-15,
        -3.17938177e-15,  -3.28733911e-15,  -3.19801360e-15,
        -3.18556956e-15,  -3.17592883e-15,  -3.07211613e-15,
        -3.11532352e-15,  -2.85593893e-15,  -3.00491611e-15,
        -3.05685075e-15,  -2.92611216e-15,  -2.94845104e-15,
        -3.08721635e-15,  -2.94174282e-15,  -3.00202167e-15,
        -3.08871043e-15,  -3.06254272e-15,  -3.31344259e-15,
        -3.11514231e-15,  -3.07435466e-15,  -3.29631218e-15,
        -3.12971598e-15,  -3.00939767e-15,  -3.13677165e-15,
        -3.00287024e-15,  -3.09300721e-15,  -3.00163024e-15,
        -3.09635917e-15,  -3.11454744e-15,  -3.16943549e-15,
        -3.05897652e-15,  -3.16754665e-15,  -3.18944008e-15,
        -2.86810570e-15,  -2.89420197e-15,  -3.06765438e-15,
        -3.10107574e-15,  -3.29399262e-15,  -3.15338577e-15,
        -3.26917351e-15,  -3.12152044e-15,  -3.13546639e-15,
        -3.15745706e-15,  -2.97292625e-15,  -3.14181966e-15,
        -3.09756557e-15,  -3.28431601e-15,  -3.08856170e-15,
        -3.20077667e-15,  -3.14108934e-15,  -3.07719068e-15,
        -3.03872734e-15,  -3.08115410e-15,  -3.18531806e-15,
        -3.16725376e-15,  -3.07096622e-15,  -3.14181399e-15,
        -2.86071061e-15,  -3.14498433e-15,  -3.08634206e-15,
        -3.12359615e-15,  -3.03551703e-15,  -2.82903003e-15,
        -3.06299451e-15,  -3.17531691e-15,  -3.04892073e-15,
        -2.88318668e-15,  -3.14955733e-15,  -3.17648223e-15,
        -2.97148979e-15,  -3.10776065e-15,  -3.37470508e-15,
        -3.15009233e-15,  -3.10465425e-15,  -2.99479887e-15,
        -3.17660432e-15,  -3.16115926e-15,  -3.04968615e-15,
        -3.38319310e-15,  -3.18302037e-15,  -3.18062029e-15,
        -3.18831685e-15,  -3.12350443e-15,  -3.16835523e-15,
        -2.80098169e-15,  -2.85030047e-15,  -3.07661182e-15,
        -3.09309520e-15,  -3.29930123e-15,  -3.13115225e-15,
        -3.01458072e-15,  -2.92094313e-15,  -2.91657654e-15,
        -2.97417829e-15,  -2.92158194e-15,  -3.04309553e-15,
        -3.01286008e-15,  -3.02981615e-15,  -3.13218388e-15,
        -3.16141166e-15,  -3.05747067e-15,  -3.23452183e-15,
        -3.05660116e-15,  -3.15711049e-15,  -2.93928604e-15,
        -2.94777392e-15,  -3.26828786e-15,  -3.09652537e-15,
        -2.99453553e-15,  -2.99532995e-15,  -2.99076965e-15,
        -3.06269942e-15,  -3.00545003e-15,  -3.09973296e-15,
        -2.98693099e-15,  -3.15632062e-15,  -3.28894864e-15,
        -3.23448055e-15,  -3.06479522e-15,  -3.04674573e-15,
        -2.99784913e-15,  -3.02075530e-15,  -3.19097625e-15,
        -3.24998869e-15,  -3.15723989e-15,  -3.10601341e-15,
        -2.83499047e-15,  -3.25771394e-15,  -3.01814145e-15,
        -3.01391239e-15,  -3.28937871e-15,  -3.01340995e-15,
        -3.01166975e-15,  -3.00694567e-15,  -2.99110421e-15,
        -3.01350487e-15,  -3.05381224e-15,  -3.21401462e-15,
        -2.94270034e-15,  -3.07388562e-15,  -3.16461806e-15,
        -3.12956727e-15,  -3.02950768e-15,  -3.30215280e-15,
        -3.02279985e-15,  -3.24506830e-15,  -2.97611828e-15,
        -2.95690239e-15,  -3.18495047e-15,  -3.02752042e-15,
        -3.14199779e-15,  -2.99138672e-15,  -3.12216869e-15,
        -3.19349652e-15,  -3.10440644e-15,  -3.19500936e-15,
        -2.98357711e-15,  -3.09209554e-15,  -3.16700759e-15,
        -3.07490242e-15,  -3.10650885e-15,  -3.05554552e-15,
        -3.03459121e-15,  -3.20921495e-15,  -3.01616949e-15,
        -3.15827213e-15,  -3.04213424e-15,  -3.06759437e-15,
        -3.00860787e-15,  -3.06702226e-15,  -2.98604808e-15,
        -2.93924578e-15,  -3.14588127e-15,  -3.05750691e-15,
        -3.13853451e-15,  -3.01409038e-15,  -3.12417176e-15,
        -3.14513375e-15,  -3.09173003e-15,  -3.04756469e-15,
        -3.04445504e-15,  -3.30450496e-15,  -3.01684920e-15,
        -2.87403737e-15,  -3.09791948e-15,  -3.20664206e-15,
        -3.25797328e-15,  -2.99308476e-15,  -3.07308720e-15,
        -3.11681844e-15,  -2.92038747e-15,  -3.05702521e-15,
        -3.10981156e-15,  -3.18899495e-15,  -3.08799660e-15,
        -3.13212319e-15,  -2.91458385e-15,  -3.01489710e-15,
        -3.06713972e-15,  -3.09830045e-15,  -3.15932985e-15,
        -3.10664150e-15,  -3.20915108e-15,  -3.15845348e-15,
        -3.34297966e-15,  -3.04735246e-15,  -3.22208577e-15,
        -2.96142925e-15,  -3.16074191e-15,  -2.91707430e-15,
        -3.08456209e-15,  -2.98240370e-15,  -3.28837382e-15,
        -3.09860612e-15,  -3.15503836e-15,  -3.22703227e-15,
        -3.33285741e-15,  -3.16300386e-15,  -3.02621263e-15,
        -2.87765993e-15,  -3.48000889e-15,  -3.05268800e-15,
        -3.07098043e-15,  -3.13881176e-15,  -3.14738400e-15,
        -3.00369418e-15,  -3.12841700e-15,  -2.99044030e-15,
        -3.16323830e-15,  -3.14765149e-15,  -3.04469847e-15,
        -3.17179807e-15,  -2.91738249e-15,  -3.39936947e-15,
        -3.13103759e-15,  -3.16770964e-15,  -3.05662115e-15,
        -3.18150406e-15,  -3.05633199e-15,  -3.08259863e-15,
        -2.94256087e-15,  -3.29480346e-15,  -2.96831759e-15,
        -3.04746608e-15,  -3.03959837e-15,  -3.16991294e-15,
        -3.11445039e-15,  -3.20683296e-15,  -3.09983319e-15,
        -2.99647520e-15,  -3.10099782e-15,  -3.07153306e-15,
        -3.23529477e-15,  -3.00493164e-15,  -2.95130634e-15,
        -3.08081916e-15,  -3.05938250e-15,  -3.07121647e-15,
        -3.01570065e-15,  -3.05287174e-15,  -3.29511842e-15,
        -3.15499331e-15,  -3.08686897e-15,  -3.13401664e-15,
        -3.13673113e-15,  -2.95150442e-15,  -3.17019376e-15,
        -3.06862875e-15,  -3.26068643e-15,  -3.07918499e-15,
        -3.09881834e-15,  -3.06785487e-15,  -3.09152956e-15,
        -3.22466590e-15,  -2.86329113e-15,  -3.02294613e-15,
        -3.13267874e-15,  -3.07245379e-15,  -2.83568869e-15,
        -3.16240925e-15,  -3.22921996e-15,  -3.14721420e-15,
        -3.07642819e-15,  -3.00819137e-15,  -3.09606702e-15,
        -3.08159428e-15,  -3.05519191e-15,  -3.26964266e-15,
        -3.15177461e-15,  -3.11971644e-15,  -3.20396321e-15,
        -3.02351332e-15,  -3.04632379e-15,  -3.13370044e-15,
        -2.89628366e-15,  -3.13782269e-15,  -3.11056932e-15,
        -2.89701954e-15,  -3.09339072e-15,  -3.13091307e-15,
        -3.04831258e-15,  -3.21646898e-15,  -3.10904715e-15,
        -2.97609244e-15,  -3.22164347e-15,  -3.09490747e-15,
        -3.48635909e-15,  -3.05128679e-15,  -3.11231015e-15,
        -3.05777908e-15,  -3.14789462e-15,  -3.09412738e-15,
        -3.01911218e-15,  -3.12282811e-15,  -3.14828967e-15,
        -2.91377822e-15,  -3.15740557e-15,  -3.11272263e-15,
        -3.08355258e-15,  -3.07093583e-15,  -3.11866189e-15,
        -3.00196143e-15,  -3.07165100e-15,  -3.19562771e-15,
        -3.00258976e-15,  -3.04296774e-15,  -3.14255089e-15,
        -3.03942040e-15,  -3.06623320e-15,  -3.16115609e-15,
        -3.08681974e-15,  -3.13869866e-15,  -2.97293263e-15,
        -2.94912630e-15,  -3.12947165e-15,  -3.40496156e-15,
        -2.98638219e-15,  -3.26014944e-15,  -2.91915165e-15,
        -3.14109130e-15,  -3.06833180e-15,  -3.06044590e-15,
        -3.27653861e-15,  -2.93447016e-15,  -3.02057569e-15,
        -3.16990866e-15,  -3.11435910e-15,  -3.12422212e-15,
        -3.02477237e-15,  -3.04498530e-15,  -3.24900294e-15,
        -3.14626438e-15,  -3.13128223e-15,  -3.14559436e-15,
        -3.00149060e-15,  -2.98508917e-15,  -3.05293200e-15,
        -3.06474250e-15,  -2.99690786e-15,  -3.01215600e-15,
        -2.95086721e-15,  -3.10554632e-15,  -3.07897550e-15,
        -3.06093071e-15,  -3.13911043e-15,  -3.02179861e-15,
        -3.11287183e-15,  -3.04132186e-15,  -3.13689525e-15,
        -2.98898107e-15,  -3.11689892e-15,  -3.08903880e-15,
        -3.14494732e-15,  -3.07735456e-15,  -3.03446047e-15,
        -3.02292800e-15,  -2.85563413e-15,  -3.05025356e-15,
        -2.98902121e-15,  -3.08563925e-15,  -3.11701576e-15,
        -2.84436342e-15,  -2.91989400e-15,  -3.13362124e-15,
        -3.13991108e-15,  -3.07420625e-15,  -3.17314209e-15,
        -3.18812657e-15,  -3.11359591e-15,  -3.08825310e-15,
        -3.11047414e-15,  -3.32665287e-15,  -3.04715443e-15,
        -3.01300395e-15,  -2.97630714e-15,  -3.01603804e-15,
        -2.98993500e-15,  -2.98232760e-15,  -3.04953327e-15,
        -3.21356092e-15,  -3.11380924e-15,  -3.15686341e-15,
        -3.23588917e-15,  -3.14169563e-15,  -3.06709575e-15,
        -3.13679726e-15,  -3.06183666e-15,  -3.08235299e-15,
        -2.97726579e-15,  -2.99791854e-15,  -3.02429520e-15,
        -3.20319753e-15,  -3.21403809e-15,  -3.05332617e-15,
        -3.14761198e-15,  -2.86102020e-15,  -2.91666366e-15,
        -2.92207237e-15,  -3.26904504e-15,  -3.22167582e-15,
        -3.02953264e-15,  -2.91109177e-15,  -2.99191849e-15,
        -3.00725750e-15,  -3.16689129e-15,  -3.08798043e-15,
        -3.01907568e-15,  -3.21635591e-15,  -3.17384185e-15,
        -3.01866806e-15,  -3.17041045e-15,  -3.00106538e-15,
        -2.97018782e-15,  -3.11360893e-15,  -3.12990469e-15,
        -3.08610377e-15,  -3.32257094e-15,  -3.13775856e-15,
        -3.06556641e-15,  -3.18580731e-15,  -3.13111310e-15,
        -3.21902017e-15,  -2.88788062e-15,  -3.21858395e-15,
        -3.06583155e-15,  -3.01331447e-15,  -2.93225234e-15,
        -3.14630505e-15,  -2.93065375e-15,  -3.04942144e-15,
        -3.13152187e-15,  -2.98121829e-15,  -3.18247200e-15,
        -3.17843454e-15,  -3.03905757e-15,  -3.14580572e-15,
        -3.16474452e-15,  -3.07712453e-15,  -3.25698295e-15,
        -3.22581252e-15,  -3.15641123e-15,  -3.07861425e-15,
        -3.08201825e-15,  -3.31444070e-15,  -2.94485156e-15,
        -3.02875912e-15,  -3.06940716e-15,  -2.95327299e-15,
        -3.15477708e-15,  -3.01919662e-15,  -3.19033309e-15,
        -3.04391764e-15,  -3.12945179e-15,  -3.06797763e-15,
        -3.07221902e-15,  -3.17825667e-15,  -2.97721928e-15,
        -3.18837734e-15,  -3.08179956e-15,  -3.24607632e-15,
        -2.93987881e-15,  -3.05882684e-15,  -3.18579108e-15,
        -2.95671415e-15,  -3.20162199e-15,  -3.17402599e-15,
        -3.16306907e-15,  -3.06880334e-15,  -2.96304329e-15,
        -2.90827245e-15,  -3.11322643e-15,  -3.07274118e-15,
        -3.18764641e-15,  -3.06710018e-15,  -3.11254101e-15,
        -2.96995932e-15,  -3.10585106e-15,  -2.93237760e-15,
        -3.05763925e-15,  -3.26809863e-15,  -3.17765712e-15,
        -2.86361226e-15,  -3.03225971e-15,  -3.00091479e-15,
        -3.10606495e-15,  -3.09882265e-15,  -3.15678692e-15,
        -3.09762026e-15,  -2.97686498e-15,  -2.96486699e-15,
        -2.95249463e-15,  -3.11791444e-15,  -3.26345090e-15,
        -3.08795919e-15,  -3.12829105e-15,  -3.18458394e-15,
        -3.15722041e-15,  -2.89384365e-15,  -3.03330051e-15,
        -3.22932379e-15,  -3.01477788e-15,  -3.21274453e-15,
        -3.37022997e-15,  -3.18282373e-15,  -3.27404825e-15,
        -3.05091732e-15,  -2.79300279e-15,  -3.24426165e-15,
        -3.07108512e-15,  -2.94338117e-15,  -3.15999075e-15,
        -2.89827902e-15,  -3.10309659e-15,  -2.88216500e-15,
        -3.17141883e-15,  -3.09982128e-15,  -3.02096389e-15,
        -3.08854749e-15,  -3.20566115e-15,  -3.33018320e-15,
        -3.23118446e-15,  -3.09739879e-15,  -2.96575689e-15,
        -3.02906540e-15,  -3.12416931e-15,  -2.99431971e-15,
        -3.09773056e-15,  -3.27903659e-15,  -3.02563052e-15,
        -3.31398915e-15,  -3.09034014e-15,  -3.17237082e-15,
        -3.03520852e-15,  -3.08365956e-15,  -3.07735276e-15,
        -3.06588114e-15,  -3.12286176e-15,  -3.00105998e-15,
        -3.04551023e-15,  -3.14697237e-15,  -3.30654186e-15,
        -3.05367626e-15,  -3.15235455e-15,  -3.09652062e-15,
        -3.17651855e-15,  -2.97930560e-15,  -3.11957630e-15,
        -3.07104573e-15,  -3.07436170e-15,  -3.05919393e-15,
        -2.96454302e-15,  -3.00549225e-15,  -3.02097708e-15,
        -3.21392382e-15,  -3.06147001e-15,  -3.05795356e-15,
        -3.20331894e-15,  -3.10589263e-15,  -2.90625388e-15,
        -3.10455579e-15,  -2.86905610e-15,  -3.25326235e-15,
        -3.11756795e-15,  -3.25737214e-15,  -3.14049234e-15,
        -3.07174757e-15,  -2.99486012e-15,  -2.86116476e-15,
        -3.05067222e-15,  -3.16760490e-15,  -3.04113382e-15,
        -3.22870406e-15,  -3.08781733e-15,  -3.01015037e-15,
        -3.18281879e-15,  -3.16719523e-15,  -2.92453756e-15,
        -3.15376639e-15,  -3.09990655e-15,  -3.15187798e-15,
        -3.15262604e-15,  -3.24771348e-15,  -3.28566315e-15,
        -3.15683014e-15,  -3.14947162e-15,  -3.19947714e-15,
        -3.17629317e-15,  -3.18270561e-15,  -3.19907287e-15,
        -3.15919614e-15,  -3.02125109e-15,  -3.07979814e-15,
        -3.03523488e-15,  -2.95143319e-15,  -3.04651829e-15,
        -2.98604247e-15,  -3.12025491e-15,  -2.84576061e-15,
        -3.13569426e-15,  -3.34318683e-15,  -3.08022350e-15,
        -2.86914810e-15,  -3.15531145e-15,  -3.11326486e-15,
        -3.06914972e-15,  -3.06375249e-15,  -2.93009266e-15,
        -2.89203331e-15,  -2.96379726e-15,  -3.17252528e-15,
        -3.12716274e-15,  -3.13821012e-15,  -3.05451609e-15,
        -3.04040489e-15,  -3.08221559e-15,  -2.88751950e-15,
        -3.00719001e-15,  -2.88558820e-15,  -2.96864904e-15,
        -3.05184558e-15,  -2.99811734e-15,  -2.92872856e-15,
        -3.22903125e-15,  -3.11339951e-15,  -3.22449980e-15,
        -3.34735341e-15,  -3.03973492e-15,  -2.94942590e-15,
        -3.05679977e-15,  -3.00086733e-15,  -3.14066375e-15,
        -3.20988847e-15,  -3.09118657e-15,  -3.00611857e-15,
        -2.96748822e-15,  -2.92212458e-15,  -3.24280209e-15,
        -3.14481949e-15,  -3.10587143e-15,  -2.96580252e-15,
        -3.00184644e-15,  -3.02205139e-15,  -3.04209866e-15,
        -3.17682266e-15,  -3.19970706e-15,  -3.14141950e-15,
        -3.27896936e-15,  -3.03008423e-15,  -3.12215859e-15,
        -3.23222598e-15,  -3.17102100e-15,  -3.09398678e-15,
        -2.82910577e-15,  -3.29274263e-15,  -2.97647588e-15,
        -3.04539073e-15,  -3.16800900e-15,  -3.15398542e-15,
        -3.03363930e-15,  -3.21641022e-15,  -3.20047084e-15,
        -3.12869594e-15,  -3.01823651e-15,  -2.96690131e-15,
        -3.05426933e-15,  -3.11940490e-15,  -2.94540496e-15,
        -3.19187907e-15,  -3.18676988e-15,  -3.17780975e-15,
        -2.97435437e-15,  -2.87090222e-15,  -3.07938900e-15,
        -3.21643493e-15,  -3.12799855e-15,  -2.99081599e-15,
        -3.40431243e-15,  -3.09438332e-15,  -3.00133121e-15,
        -3.22541202e-15,  -3.12285902e-15,  -3.09491736e-15,
        -3.14172999e-15,  -3.04146648e-15,  -2.98455121e-15,
        -3.11852052e-15,  -2.98138111e-15,  -3.22790170e-15,
        -3.09158955e-15,  -3.00765412e-15,  -3.01363603e-15,
        -3.23649586e-15,  -3.04874280e-15,  -2.96326761e-15,
        -3.10363763e-15,  -3.27121357e-15,  -3.05046621e-15,
        -3.15021815e-15,  -3.31612281e-15,  -3.19997058e-15,
        -3.28221699e-15,  -3.15858034e-15,  -3.05530193e-15,
        -3.13218244e-15,  -3.04847053e-15,  -3.04736499e-15,
        -3.02517380e-15,  -3.03929168e-15,  -3.09761736e-15,
        -3.11078663e-15,  -3.09408052e-15,  -3.10944939e-15,
        -3.14508499e-15,  -3.00168453e-15,  -2.99894704e-15,
        -3.16797341e-15,  -3.12890046e-15,  -3.14879379e-15,
        -3.02608926e-15,  -3.04493359e-15,  -2.81890271e-15,
        -3.22478061e-15,  -2.98906826e-15,  -2.90395459e-15,
        -3.06478996e-15,  -2.99974975e-15,  -3.03271133e-15,
        -2.98675847e-15,  -3.31188082e-15,  -3.35152951e-15,
        -3.10881772e-15,  -3.14049458e-15,  -3.22502470e-15,
        -3.19152899e-15,  -3.10680470e-15,  -2.94541738e-15,
        -2.93021258e-15,  -3.25246951e-15,  -2.99130599e-15,
        -3.16680177e-15,  -2.95567337e-15,  -3.17710900e-15,
        -3.03461729e-15,  -2.93682320e-15,  -3.14108561e-15,
        -3.23426607e-15,  -3.10995862e-15,  -3.03164403e-15,
        -2.91328691e-15,  -2.92198067e-15,  -3.11786321e-15,
        -3.10652264e-15,  -3.28234045e-15,  -3.15126143e-15,
        -3.11958887e-15,  -3.08322359e-15,  -3.28274742e-15,
        -2.88735846e-15,  -3.09030785e-15,  -2.96413909e-15,
        -3.19089568e-15,  -3.13597752e-15,  -2.87163097e-15,
        -3.18900539e-15,  -3.12385159e-15,  -3.00907129e-15,
        -3.03427099e-15,  -3.11621215e-15,  -3.18953051e-15,
        -3.12245765e-15,  -3.13245121e-15,  -3.30754366e-15,
        -3.22175638e-15,  -3.00734873e-15,  -2.90595519e-15,
        -3.24180649e-15,  -3.07649638e-15,  -3.02882168e-15,
        -3.17965710e-15,  -3.02491181e-15,  -3.06035283e-15,
        -3.08184216e-15,  -3.13974860e-15,  -2.95592732e-15,
        -3.19289449e-15,  -3.01864437e-15,  -3.27824040e-15,
        -3.17588276e-15,  -3.01967635e-15,  -3.00016924e-15,
        -3.05279213e-15,  -2.98398852e-15,  -3.21212825e-15,
        -2.87948825e-15,  -3.06549326e-15,  -3.06517632e-15,
        -3.16757786e-15,  -3.17419557e-15,  -3.10551440e-15,
        -3.09957943e-15,  -3.06217454e-15,  -3.08624267e-15,
        -3.05507874e-15,  -3.06430561e-15,  -3.21416208e-15,
        -2.97723491e-15,  -2.94508887e-15,  -2.99988946e-15,
        -2.96651729e-15,  -3.08798815e-15,  -3.17197965e-15,
        -3.10473305e-15,  -3.31133821e-15,  -3.09629689e-15,
        -3.28739554e-15,  -3.03890738e-15,  -3.29411560e-15,
        -3.25808893e-15,  -3.01262428e-15,  -3.08657938e-15,
        -3.06715920e-15,  -3.12601369e-15,  -3.02643418e-15,
        -3.26229046e-15,  -3.07422003e-15,  -3.05312986e-15,
        -3.17495682e-15,  -3.20440216e-15,  -3.17552893e-15,
        -3.04130964e-15,  -2.93569540e-15,  -3.04914670e-15,
        -3.07767961e-15,  -3.14510150e-15,  -3.09349422e-15,
        -3.25941813e-15,  -3.18590891e-15,  -3.11553625e-15,
        -3.24229958e-15,  -3.26827295e-15,  -2.96902738e-15,
        -3.15400097e-15,  -3.13847921e-15,  -3.03834017e-15,
        -3.10236631e-15,  -2.88903472e-15,  -3.06622225e-15,
        -2.92935184e-15,  -3.12110572e-15,  -3.05539234e-15,
        -3.24691834e-15,  -3.15187200e-15,  -2.94430679e-15,
        -3.14623973e-15,  -3.11710904e-15,  -3.12711077e-15,
        -3.21706430e-15,  -2.98278572e-15,  -3.07374042e-15,
        -2.95233197e-15,  -3.04262903e-15,  -3.00055851e-15,
        -3.16846765e-15])

In [20]:
basic_pca.explained_variance_ratio_


Out[20]:
array([  1.00000000e+00,   7.33245551e-30])

What does PCA do to these multivariate normal distributions?


In [12]:
from sklearn.decomposition import PCA
PCA??

In [13]:
basic_pca = PCA()
transformed = basic_pca.fit_transform(examples)

In [14]:
ppl.scatter(transformed[:,0], transformed[:,1])
plt.axis('equal')


Out[14]:
(-4.0, 5.0, -2.0, 2.0)

In [15]:
np.cov(transformed, rowvar=False)


Out[15]:
array([[  1.33872176e+00,  -8.95735493e-17],
       [ -8.95735493e-17,   2.03924160e-01]])

What if we whiten the PCA components?


In [16]:
whitened_pca = PCA(whiten=True)
transformed = whitened_pca.fit_transform(examples)
ppl.scatter(transformed[:,0], transformed[:,1])
plt.axis('equal')


Out[16]:
(-4.0, 4.0, -4.0, 4.0)

In [17]:
np.cov(transformed.T)


Out[17]:
array([[  1.00100100e+00,  -1.50252405e-16],
       [ -1.50252405e-16,   1.00100100e+00]])

What if we wanted to work with a variety of multivariate normal clusters?


In [21]:
# But maybe we'd like some variety covariance matrices?
means = [np.array([-2, -5]), np.array([0, 1]), np.array([-3,1])]
gen_covs = [np.array([[1, .5], [.5, .75]]),
            np.array([[1, -.5], [-.5, .75]]),
            np.array([[1, 0], [0, .75]])]

In [22]:
examples = []
for m, c in zip(means, gen_covs):
    examples.append(mn.rvs(m, c, 1000))

In [23]:
for e in examples:
        ppl.scatter(e[:,0], e[:,1])
plt.axis('equal')


Out[23]:
(-8.0, 4.0, -10.0, 6.0)

In [24]:
combined = np.concatenate(examples)

In [26]:
pca = PCA()
transformed = pca.fit_transform(combined)

In [27]:
ppl.scatter(transformed[:,0], transformed[:,1])
plt.axis('equal')


Out[27]:
(-8.0, 6.0, -6.0, 6.0)

In [28]:
pca1 = PCA(n_components=1)
transformed = pca1.fit_transform(combined)

In [29]:
transformed.shape


Out[29]:
(3000, 1)

In [31]:
ppl.scatter(transformed, np.zeros_like(transformed))


Out[31]:
<matplotlib.axes.AxesSubplot at 0x10a23b210>

In [35]:
pca.components_[0].dot(<something>)


Out[35]:
array([ 0.09794438,  0.99519189])

Here is a place to play with GMM


In [ ]: