In [16]:
import pandas
import scipy.stats
import json

In [22]:
import pywikibot
#Tranforming QIDs into English labels.
enwp = pywikibot.Site('en','wikipedia')
wikidata = enwp.data_repository()

retrieved = dict()

def english_label(qid):
    if type(qid) is float:
        if math.isnan(qid):
            return None
    #first see if we've done it
    try:
        return retrieved[qid]
    except KeyError:
        try:
            page = pywikibot.ItemPage(wikidata, qid)
            data = page.get()
            lab = data['labels']['en']
            retrieved[qid] = lab
            return lab
        except KeyError:
            retrieved[qid] = qid
            return qid


VERBOSE:pywiki:Starting 1 threads...

In [25]:
# gen_cult = pandas.read_csv('helpers/Chi_Squared_Test_Data.csv') was doing it this way but no longer.

In [17]:
allrecs = pandas.DataFrame.from_dict(json.load(open('helpers/world_cultures_shortcut.json','r')))

In [19]:
obs = pandas.crosstab(allrecs['culture'], allrecs['gender'])

In [23]:
obs.columns = map(english_label, obs.columns)


VERBOSE:pywiki:Found 1 wikidata:wikidata processes running, including this one.

In [24]:
obs


Out[24]:
transgender female intersex fa'afafine transgender male male animal woman genderqueer female male kathoey
culture
africa 0 0 0 0 0 0 0 2256 13915 0
catholic european 7 2 0 1 4 0 0 38267 262253 0
confucian 8 0 0 0 0 0 0 5618 14534 0
english-speaking 38 7 1 9 0 0 4 60753 223374 0
islamic 3 0 0 0 0 0 0 4119 22693 0
latin america 7 0 0 0 0 0 0 11555 64539 0
orthodox 1 0 0 0 1 1 0 14178 81513 0
protestant european 3 1 0 2 0 0 0 49801 244301 0
south asia 5 0 0 0 0 0 0 5531 23594 1

In [29]:
chi2, p, dof, expected = scipy.stats.chi2_contingency(obs)

In [30]:
chi2, p, dof


Out[30]:
(10430.455963736977, 0.0, 72)

In [28]:
pandas.DataFrame(expected)


Out[28]:
0 1 2 3 4 5 6 7 8 9
0 1.018735 0.141491 0.014149 0.169789 0.070745 0.014149 0.056596 2717.729756 13451.770440 0.014149
1 18.932932 2.629574 0.262957 3.155489 1.314787 0.262957 1.051830 50508.329383 249997.797134 0.262957
2 1.270032 0.176393 0.017639 0.211672 0.088197 0.017639 0.070557 3388.128865 16770.001365 0.017639
3 17.903047 2.486534 0.248653 2.983841 1.243267 0.248653 0.994614 47760.852663 236398.790074 0.248653
4 1.689282 0.234622 0.023462 0.281547 0.117311 0.023462 0.093849 4506.581127 22305.931875 0.023462
5 4.794183 0.665859 0.066586 0.799031 0.332929 0.066586 0.266344 12789.682280 63304.259617 0.066586
6 6.028496 0.837291 0.083729 1.004749 0.418646 0.083729 0.334916 16082.520021 79602.604693 0.083729
7 18.528109 2.573348 0.257335 3.088018 1.286674 0.257335 1.029339 49428.363307 244652.359199 0.257335
8 1.835184 0.254887 0.025489 0.305864 0.127443 0.025489 0.101955 4895.812598 24232.485603 0.025489

In [ ]:
np.ndarr