In [1]:
from __future__ import division
import pymongo, pandas, random
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats

%matplotlib inline

client = pymongo.MongoClient('localhost', 27017)

results_db = client['results']['question_2']


In [2]:
cursor = results_db.find({'acc_rate_pvalue': {'$lt': 0.05}}, 
                         {u'_id': False, u'community':True, 'acc_rate_pvalue':True, 
                          'acc_rate_difference_median':True, 'category': True,
                         'acc_rate_difference_mean':True, 'acc_rate_pvalue_greater':True})

df =  pandas.DataFrame(list(cursor))

In [3]:
df


Out[3]:
acc_rate_difference_mean acc_rate_difference_median acc_rate_pvalue acc_rate_pvalue_greater category community
0 0.206765 0.500000 0.018411 7.284618e-04 culture-recreation anime
1 -0.213494 0.000000 0.046674 9.870260e-01 culture-recreation chess
2 0.032688 0.000000 0.017664 3.154929e-09 life-arts cooking
3 -0.402076 -0.400000 0.041956 8.943073e-01 science earthscience
4 -0.098174 -0.097561 0.047993 9.400627e-01 technology electronics
5 -0.266065 -0.300000 0.042949 8.783293e-01 technology joomla
6 0.316499 0.356250 0.047161 7.107929e-03 professional moderators
7 0.033556 0.000000 0.040443 1.479494e-04 life-arts parenting
8 0.230389 0.500000 0.001159 1.036485e-04 culture-recreation skeptics
9 -0.106583 -0.032292 0.011704 9.946228e-01 technology sound
10 0.035202 0.000000 0.008958 7.312733e-05 technology ux
11 0.084079 0.122432 0.008246 2.951449e-03 technology wordpress
12 0.123577 0.133929 0.001630 1.572057e-06 life-arts writers


In [4]:
cursor = results_db.find({'mean_utility_pvalue': {'$lt': 0.05}}, 
                         {u'_id': False, u'community':True, 'mean_utility_pvalue':True, 
                          'mean_utility_difference_median':True, 'category': True,
                         'mean_utility_difference_mean':True, 'mean_utility_pvalue_greater':True})

df =  pandas.DataFrame(list(cursor))

In [5]:
df


Out[5]:
category community mean_utility_difference_mean mean_utility_difference_median mean_utility_pvalue mean_utility_pvalue_greater
0 life-arts cooking 0.144291 0.198843 0.027061 0.013531
1 technology dba -0.320409 -0.358196 0.040312 0.979872
2 culture-recreation english 0.105290 0.117866 0.037313 0.018656
3 culture-recreation gaming 0.167686 0.290507 0.044445 0.022223
4 culture-recreation ham NaN NaN 0.000000 NaN
5 culture-recreation judaism 0.523533 0.617710 0.015952 0.007976
6 professional moderators 0.852810 1.078653 0.038067 0.019033
7 life-arts movies 0.642013 1.022678 0.003465 0.001733
8 life-arts parenting 0.264191 0.328877 0.001775 0.000887
9 culture-recreation skeptics 0.410154 0.966998 0.021781 0.010890
10 technology stackoverflow -0.012649 -0.019334 0.036893 0.981568
11 technology ux 0.181439 0.106269 0.019347 0.009674
12 life-arts writers 0.315712 0.452699 0.007561 0.003781


In [6]:
cursor = results_db.find({'questions_avg_pvalue': {'$lt': 0.05}}, 
                         {u'_id': False, u'community':True, 'questions_avg_pvalue':True, 
                          'questions_avg_difference_median':True, 'category': True,
                         'questions_avg_difference_mean':True, 'questions_avg_pvalue_greater':True})

df =  pandas.DataFrame(list(cursor))

In [7]:
df


Out[7]:
category community questions_avg_difference_mean questions_avg_difference_median questions_avg_pvalue questions_avg_pvalue_greater
0 culture-recreation bicycles 3.268229 2.750000 1.342875e-02 0.009781
1 business bitcoin -2.968251 -1.000000 6.998673e-04 0.943135
2 science cogsci 3.261014 1.983333 1.955260e-02 0.216194
3 life-arts fitness 1.675207 1.666667 2.882686e-02 0.181205
4 culture-recreation gaming 1.195696 1.000000 3.109324e-03 0.039853
5 culture-recreation history 1.310390 2.263158 4.076597e-02 0.370599
6 life-arts money 1.773403 0.958333 3.812227e-03 0.563161
7 life-arts photo 0.959478 1.000000 2.763784e-02 0.025896
8 culture-recreation poker NaN NaN 0.000000e+00 NaN
9 technology pt 1.432900 1.304848 1.679421e-02 0.007495
10 technology salesforce -0.774751 -0.831696 8.102885e-03 0.992664
11 technology serverfault -0.553531 0.000000 2.379587e-05 0.999955
12 culture-recreation sports 2.410160 4.000000 4.316062e-02 0.009992
13 technology stackoverflow -0.452880 -0.266981 1.830813e-37 0.955503
14 business startups NaN NaN 0.000000e+00 NaN
15 technology tex -0.771037 -0.354167 4.875920e-02 0.520657
16 technology unix -0.920495 -0.425969 8.230345e-04 0.425506
17 technology wordpress 0.204323 0.428571 1.551014e-02 0.003743

In [ ]: