In [1]:
from __future__ import division
from scipy import stats
import random, pymongo
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import pandas as pd
import rpy2.robjects as robjects
from rpy2.robjects.functions import SignatureTranslatedFunction

%matplotlib inline

In [2]:
r = robjects.r
wilr = r['wilcox.test']
wilr = SignatureTranslatedFunction(wilr,init_prm_translate = {'conf_int': 'conf.int'})

In [3]:
connection = pymongo.MongoClient('localhost', 27017)

results_db = connection['results']['question_3']
cursor = results_db.find({}, {u'_id': False, u'community':True, 
                              u'frequency_pvalue':True})

stats_df = pd.DataFrame(list(cursor))

In [4]:
for index, row in stats_df.iterrows():
    community = row['community']
    community_db = connection[community]['statistics']
    cursor = community_db.find({'contributions_total': {'$gt':0}}, 
                                {u'_id': False, u'activity_freq':True, u'gender':True})

    df =  pd.DataFrame(list(cursor))
    females = df.query("gender == 'Female'")
    males = df.query("gender == 'Male'")
    
    frequency = wilr(robjects.FloatVector(list(females['activity_freq'])),
                                                          robjects.FloatVector(list(males['activity_freq'])),
                                                          alternative="t", conf_int=True,
                                                          correct=True, exact=False)[8][0]

    results_db.update({'community': community}, {'$set': {'frequency_HL': frequency}})

In [ ]: