In [40]:
from __future__ import division
from scipy import stats
import random, pymongo
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import pandas as pd
import rpy2.robjects as robjects

%matplotlib inline

In [41]:
r = robjects.r

In [42]:
connection = pymongo.MongoClient('localhost', 27017)

results_db = connection['results']['question_1']
cursor = results_db.find({}, {u'_id': False, u'community':True, 
                              u'comments_pvalue':True,u'questions_pvalue':True,
                              u'answers_pvalue':True, u'contributions_pvalue':True})

stats_df = pd.DataFrame(list(cursor))
communities = list(stats_df['community'])

In [46]:
for index, row in stats_df.iterrows():
    community = row['community']
    community_db = connection[community]['statistics']
    cursor = community_db.find({'contributions_total': {'$gt':0}}, 
                                {u'_id': False, u'comments_total':True, u'gender':True,
                                    u'questions_total':True, u'contributions_total':True,
                                 u'answers_total':True})

    df =  pd.DataFrame(list(cursor))
    females = df.query("gender == 'Female'")
    males = df.query("gender == 'Male'")
    
    questions = r['wilcox.test'](robjects.IntVector(list(females['questions_total'])),
                                                          robjects.IntVector(list(males['questions_total'])),
                                                          alternative="g", correct=True, exact=False)[2][0]
    
    answers = r['wilcox.test'](robjects.IntVector(list(females['answers_total'])),
                                                          robjects.IntVector(list(males['answers_total'])),
                                                          alternative="g", correct=True, exact=False)[2][0]
    
    comments = r['wilcox.test'](robjects.IntVector(list(females['comments_total'])),
                                                          robjects.IntVector(list(males['comments_total'])),
                                                          alternative="g", correct=True, exact=False)[2][0]
    
    contributions = r['wilcox.test'](robjects.IntVector(list(females['contributions_total'])),
                                                          robjects.IntVector(list(males['contributions_total'])),
                                                          alternative="g", correct=True, exact=False)[2][0]
    
    results_db.update({'community': community}, {'$set': {'questions_pvalue_greater': questions,
                                                         'answers_pvalue_greater': answers,
                                                         'comments_pvalue_greater': comments,
                                                         'contributions_pvalue_greater': contributions}})

In [ ]: