In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
np.random.seed(10)
# Sample data randomly at fixed probabilities
voter_race = np.random.choice(a= ["asian","black","hispanic","other","white"],
p = [0.05, 0.15 ,0.25, 0.05, 0.5],
size=1000)
# Sample data randomly at fixed probabilities
voter_party = np.random.choice(a= ["democrat","independent","republican"],
p = [0.4, 0.2, 0.4],
size=1000)
voters = pd.DataFrame({"race":voter_race,
"party":voter_party})
voter_tab = pd.crosstab(voters.race, voters.party, margins = True)
voter_tab.columns = ["democrat","independent","republican","row_totals"]
voter_tab.index = ["asian","black","hispanic","other","white","col_totals"]
observed = voter_tab.ix[0:5,0:3] # Get table without totals for later use
voter_tab
Out[1]:
In [2]:
stats.chi2_contingency(observed= observed)
Out[2]:
In [4]:
observed
Out[4]:
In [ ]: