In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats

np.random.seed(10)

# Sample data randomly at fixed probabilities
voter_race = np.random.choice(a= ["asian","black","hispanic","other","white"],
                              p = [0.05, 0.15 ,0.25, 0.05, 0.5],
                              size=1000)

# Sample data randomly at fixed probabilities
voter_party = np.random.choice(a= ["democrat","independent","republican"],
                              p = [0.4, 0.2, 0.4],
                              size=1000)

voters = pd.DataFrame({"race":voter_race, 
                       "party":voter_party})

voter_tab = pd.crosstab(voters.race, voters.party, margins = True)

voter_tab.columns = ["democrat","independent","republican","row_totals"]

voter_tab.index = ["asian","black","hispanic","other","white","col_totals"]

observed = voter_tab.ix[0:5,0:3]   # Get table without totals for later use
voter_tab


Out[1]:
democrat independent republican row_totals
asian 21 7 32 60
black 65 25 64 154
hispanic 107 50 94 251
other 15 8 15 38
white 189 96 212 497
col_totals 397 186 417 1000

In [2]:
stats.chi2_contingency(observed= observed)


Out[2]:
(7.1693212801620589,
 0.51847939294884204,
 8,
 array([[  23.82 ,   11.16 ,   25.02 ],
        [  61.138,   28.644,   64.218],
        [  99.647,   46.686,  104.667],
        [  15.086,    7.068,   15.846],
        [ 197.309,   92.442,  207.249]]))

In [4]:
observed


Out[4]:
democrat independent republican
asian 21 7 32
black 65 25 64
hispanic 107 50 94
other 15 8 15
white 189 96 212

In [ ]: