In [1]:
from __future__ import division
from scipy import stats
import random, pymongo
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import pandas as pd
import rpy2.robjects as robjects
from rpy2.robjects.functions import SignatureTranslatedFunction
%matplotlib inline
In [2]:
r = robjects.r
wilr = r['wilcox.test']
wilr = SignatureTranslatedFunction(wilr,init_prm_translate = {'conf_int': 'conf.int'})
In [3]:
connection = pymongo.MongoClient('localhost', 27017)
results_db = connection['results']['question_2']
cursor = results_db.find({'community':{'$nin':["ham", "startups", "poker"]}}, {u'_id': False, u'community':True,
u'mean_utility_pvalue':True,u'acc_rate_pvalue':True,
u'questions_avg_pvalue':True})
stats_df = pd.DataFrame(list(cursor))
In [4]:
for index, row in stats_df.iterrows():
community = row['community']
community_db = connection[community]['statistics']
cursor = community_db.find({'contributions_total': {'$gt':0}},
{u'_id': False, u'accepted_rate':True, u'gender':True,
u'mean_utility':True, u'questions_avg':True})
df = pd.DataFrame(list(cursor))
females = df.query("gender == 'Female'")
males = df.query("gender == 'Male'")
accepted_rate = wilr(robjects.FloatVector(list(females['accepted_rate'])),
robjects.FloatVector(list(males['accepted_rate'])),
conf_int=True,
alternative="t", correct=True, exact=False)[8][0]
mean_utility = wilr(robjects.FloatVector(list(females['mean_utility'])),
robjects.FloatVector(list(males['mean_utility'])),
conf_int=True,
alternative="t", correct=True, exact=False)[8][0]
questions_avg = wilr(robjects.FloatVector(list(females['questions_avg'])),
robjects.FloatVector(list(males['questions_avg'])),
conf_int=True,
alternative="t", correct=True, exact=False)[8][0]
results_db.update({'community': community}, {'$set': {'acc_rate_HL': accepted_rate,
'mean_utility_HL': mean_utility,
'questions_avg_HL': questions_avg}})
In [ ]: