In [1]:
from __future__ import division
from scipy import stats
import random, pymongo
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import pandas as pd
import rpy2.robjects as robjects

%matplotlib inline


During startup - Warning messages:
1: Setting LC_COLLATE failed, using "C" 
2: Setting LC_TIME failed, using "C" 
3: Setting LC_MESSAGES failed, using "C" 
4: Setting LC_MONETARY failed, using "C" 

In [2]:
r = robjects.r

In [22]:
connection = pymongo.MongoClient('localhost', 27017)

results_db = connection['results']['question_2']
cursor = results_db.find({'community':{'$nin':["ham", "startups", "poker"]}}, {u'_id': False, u'community':True, 
                              u'mean_utility_pvalue':True,u'acc_rate_pvalue':True,
                              u'questions_avg_pvalue':True})

stats_df = pd.DataFrame(list(cursor))

In [23]:
for index, row in stats_df.iterrows():
    community = row['community']
    community_db = connection[community]['statistics']
    cursor = community_db.find({'contributions_total': {'$gt':0}}, 
                                {u'_id': False, u'accepted_rate':True, u'gender':True,
                                    u'mean_utility':True, u'questions_avg':True})

    df =  pd.DataFrame(list(cursor))
    females = df.query("gender == 'Female'")
    males = df.query("gender == 'Male'")
    
    accepted_rate = r['wilcox.test'](robjects.FloatVector(list(females['accepted_rate'])),
                                                          robjects.FloatVector(list(males['accepted_rate'])),
                                                          alternative="g", correct=True, exact=False)[2][0]
    
    mean_utility = r['wilcox.test'](robjects.FloatVector(list(females['mean_utility'])),
                                                          robjects.FloatVector(list(males['mean_utility'])),
                                                          alternative="g", correct=True, exact=False)[2][0]
    
    questions_avg = r['wilcox.test'](robjects.FloatVector(list(females['questions_avg'])),
                                                          robjects.FloatVector(list(males['questions_avg'])),
                                                          alternative="g", correct=True, exact=False)[2][0]
    
    results_db.update({'community': community}, {'$set': {'acc_rate_pvalue_greater': accepted_rate,
                                                         'mean_utility_pvalue_greater': mean_utility,
                                                         'questions_avg_pvalue_greater': questions_avg}})

In [ ]: