In [1]:
from __future__ import division
import pymongo, pandas, random
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats

%matplotlib inline

client = pymongo.MongoClient('localhost', 27017)

results_db = client['results']['question_4']


In [2]:
cursor = results_db.find({'women_pvalue': {'$lt': 0.05}}, 
                         {u'_id': False, u'community':True, 'women_coef':True, 
                          'women_pvalue':True, 'category': True})

df =  pandas.DataFrame(list(cursor))

In [3]:
df


Out[3]:
category community women_coef women_pvalue
0 technology android 0.004155 0.000197
1 culture-recreation anime -0.068038 0.015031
2 technology askubuntu 0.006320 0.000602
3 technology codereview 0.004388 0.000045
4 technology crypto 0.007850 0.026864
5 technology drupal 0.006923 0.006701
6 culture-recreation french 0.082987 0.015788
7 technology gamedev 0.005835 0.009464
8 technology gis 0.008854 0.000360
9 culture-recreation hermeneutics 0.032043 0.005428
10 culture-recreation homebrew -0.000530 0.027490
11 culture-recreation judaism 0.009322 0.002570
12 science math 0.002990 0.002185
13 science mathoverflow 0.002571 0.006348
14 science physics 0.010334 0.019220
15 business pm -0.005089 0.042308
16 life-arts productivity 0.018492 0.005075
17 technology programmers -0.002387 0.041985
18 technology serverfault 0.003132 0.000029
19 technology sound -0.018388 0.000122
20 technology stackoverflow 0.002440 0.000015
21 technology tex 0.003648 0.010414
22 technology unix 0.002466 0.007646
23 technology ux 0.009817 0.001215
24 technology webapps -0.004390 0.045594
25 technology webmasters 0.005324 0.000662
26 technology windowsphone -0.008368 0.011275

In [ ]: