In [ ]:
from scipy import stats

help(stats.mannwhitneyu)

In [4]:
from __future__ import division
from scipy import stats
import random, pymongo
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import pandas as pd
import rpy2.robjects as robjects

%matplotlib inline

In [98]:
client = pymongo.MongoClient('localhost', 27017)

community = 'poker'
stats_db = client[community].statistics

In [99]:
cursor = stats_db.find({'contributions_total': {'$gt':0} },
                       {u'_id': False, u'questions_total': True, u'contributions_total':True,
                        u'comments_total': True, u'answers_total': True, u'gender':True})

df =  pd.DataFrame(list(cursor))

In [100]:
females = df.query("gender == 'Female' ")

males = df.query("gender == 'Male' ")

In [101]:
females_r = robjects.IntVector(list(females['questions_total']))
males_r = robjects.IntVector(list(males['questions_total']))

In [102]:
wilr = robjects.r['wilcox.test']
result = wilr(females_r,males_r, alternative="t", correct=True, exact=False)
# for x in wilr(females_r,males_r):
#     print x

In [103]:
print "p-value: " + str(result[2][0])


p-value: 0.0331963503815

In [ ]:
print result

In [ ]:
r = robjects.r