In [1]:
# Imports
import os
import sys
import pandas as pd
import seaborn as sb

# Custom Imports
sys.path.insert(0, '../../')
import stats_toolbox as st
from stats_toolbox.utils.data_loaders import load_fem_preg_2002

# Graphics setup 
%pylab inline --no-import-all


Populating the interactive namespace from numpy and matplotlib

In [2]:
# Load and Clean Data
df = load_fem_preg_2002('../data')
full_term = df[df['prglngth'] >= 37]
weights = df.birthwgt_kg.dropna()

Constructing PMFs

As twith histograms, and list like object or pandas Series can be converted to a Pmf object. Hist objects can also be converted using the Pmf constructor or with their to_pmf() method


In [4]:
# Convert to PMF
pmf = st.Pmf(full_term.totalwgt_lb, label='Total Birth Weight')
H = st.Hist(full_term.totalwgt_lb, label='Total Birth Weight')

In [5]:
pmf == H.to_pmf()


Out[5]:
True

In [ ]:
Individual probabilities can be looked up

In [7]:
pmf[8]
# same as pmf.prob(8)


Out[7]:
0.031344792719919114

Methods

Summary stats


In [ ]:
pmf.mean()
pmf.var()
pmf.std()
pmf.maximum_likelihood()

Calculate Probabilities


In [ ]:
pmf.prob_less(3)
pmf.prob_greater(4)

In [9]:
# Arithmatic 
pmf_first = st.Pmf(full_term.prglngth[full_term.birthord == 1], label='1st born')
pmf_other = st.Pmf(full_term.prglngth[full_term.birthord != 1], label='other')

In [10]:
(pmf_first - pmf_other).plot()


Out[10]:
[<matplotlib.lines.Line2D at 0x10bc17048>]

In [24]:
fig = st.multiplot((pmf_first, pmf_other))



In [20]:
pmf_other


Out[20]:
Pmf({48: 0.00094161958568738226, 50: 0.00047080979284369113, 37: 0.058615819209039542, 38: 0.079331450094161954, 39: 0.61911487758945383, 40: 0.13747645951035781, 41: 0.054378531073446326, 42: 0.028954802259887006, 43: 0.014359698681732579, 44: 0.0054143126177024483, 45: 0.00094161958568738226})

In [21]:
pmf_first


Out[21]:
Pmf({37: 0.054507337526205457, 38: 0.071278825995807135, 39: 0.55398322851153048, 40: 0.14046121593291405, 41: 0.094339622641509441, 42: 0.053721174004192872, 43: 0.022798742138364782, 44: 0.0060272536687631028, 45: 0.0015723270440251573, 46: 0.00026205450733752622, 47: 0.00026205450733752622, 48: 0.00078616352201257866})

In [ ]: