In [16]:
import pandas
import scipy.stats
import json
In [22]:
import pywikibot
#Tranforming QIDs into English labels.
enwp = pywikibot.Site('en','wikipedia')
wikidata = enwp.data_repository()
retrieved = dict()
def english_label(qid):
if type(qid) is float:
if math.isnan(qid):
return None
#first see if we've done it
try:
return retrieved[qid]
except KeyError:
try:
page = pywikibot.ItemPage(wikidata, qid)
data = page.get()
lab = data['labels']['en']
retrieved[qid] = lab
return lab
except KeyError:
retrieved[qid] = qid
return qid
In [25]:
# gen_cult = pandas.read_csv('helpers/Chi_Squared_Test_Data.csv') was doing it this way but no longer.
In [17]:
allrecs = pandas.DataFrame.from_dict(json.load(open('helpers/world_cultures_shortcut.json','r')))
In [19]:
obs = pandas.crosstab(allrecs['culture'], allrecs['gender'])
In [23]:
obs.columns = map(english_label, obs.columns)
In [24]:
obs
Out[24]:
In [29]:
chi2, p, dof, expected = scipy.stats.chi2_contingency(obs)
In [30]:
chi2, p, dof
Out[30]:
In [28]:
pandas.DataFrame(expected)
Out[28]:
In [ ]:
np.ndarr