In [1]:
from __future__ import division
import pymongo, pandas, random
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats

%matplotlib inline

client = pymongo.MongoClient('localhost', 27017)

results_db = client['results']['question_1']


In [2]:
cursor = results_db.find({'questions_pvalue': {'$lt': 0.05}}, 
                         {u'_id': False, u'community':True, 'questions_pvalue':True, 
                          'questions_difference_median':True, 'category': True,
                         'questions_difference_mean':True, 'questions_pvalue_greater':True})

df =  pandas.DataFrame(list(cursor))

In [3]:
df


Out[3]:
category community questions_difference_mean questions_difference_median questions_pvalue questions_pvalue_greater
0 technology askubuntu 0.626210 0.0 8.982973e-04 2.695887e-04
1 technology blender 2.371069 2.5 7.611797e-03 3.805898e-03
2 life-arts cooking 0.925118 0.0 4.248761e-02 2.124380e-02
3 technology crypto 0.928236 0.0 2.156628e-02 1.078314e-02
4 science cs 0.294449 0.0 7.501965e-03 3.750983e-03
5 life-arts gardening -0.076477 0.0 1.207774e-03 9.993983e-01
6 culture-recreation german -1.133904 0.0 3.983413e-02 9.801365e-01
7 technology gis 1.296285 1.0 2.011292e-03 1.005646e-03
8 culture-recreation homebrew -2.786560 -1.0 2.871992e-02 9.857665e-01
9 science math 3.287745 2.0 2.744566e-30 2.206407e-31
10 science matheducators -0.125413 1.0 1.671627e-02 8.358137e-03
11 professional moderators 3.100000 3.0 5.831620e-03 2.915810e-03
12 science physics 0.630258 0.0 5.766123e-05 2.883061e-05
13 culture-recreation poker -1.017699 0.0 3.319635e-02 9.839200e-01
14 technology programmers 0.398746 1.0 1.273085e-03 1.502306e-04
15 business quant 1.772388 0.0 3.699186e-02 1.849593e-02
16 technology raspberrypi 0.635781 0.0 2.036093e-02 1.018046e-02
17 technology robotics 1.079961 0.5 2.270858e-02 1.135429e-02
18 life-arts scifi 1.029343 1.0 4.913154e-03 2.456577e-03
19 technology security 0.375476 0.0 4.607206e-02 1.600280e-02
20 technology sharepoint 1.261590 0.5 4.413821e-02 2.206910e-02
21 culture-recreation skeptics 0.983562 1.0 1.127363e-02 5.636814e-03
22 technology stackoverflow 2.129259 1.0 3.549702e-49 2.932479e-50
23 science stats 0.770764 1.0 2.962503e-05 1.481251e-05
24 technology tex 0.132985 0.0 7.428904e-04 3.714452e-04
25 technology unix -0.249885 0.0 1.553943e-02 5.654066e-03
26 technology webmasters 0.662820 0.0 5.885067e-03 2.942533e-03
27 technology wordpress 1.121892 1.0 7.313139e-03 3.656569e-03
28 professional workplace 0.407321 0.0 5.776984e-03 2.888492e-03


In [4]:
cursor = results_db.find({'answers_pvalue': {'$lt': 0.05}}, 
                         {u'_id': False, u'community':True, 'answers_pvalue':True, 
                          'answers_difference_median':True, 'category': True,
                         'answers_difference_mean':True, 'answers_pvalue_greater':True})

df =  pandas.DataFrame(list(cursor))

In [5]:
df


Out[5]:
answers_difference_mean answers_difference_median answers_pvalue answers_pvalue_greater category community
0 3.504256 1.0 3.014854e-02 1.507427e-02 culture-recreation anime
1 -1.219070 0.0 3.529751e-02 9.879948e-01 technology apple
2 -0.391364 0.0 2.836536e-02 1.418268e-02 science biology
3 0.027273 4.0 6.788403e-03 3.394201e-03 culture-recreation buddhism
4 -5.131382 -1.0 3.434901e-02 9.830079e-01 culture-recreation chess
5 2.752951 1.0 3.656488e-14 1.828244e-14 life-arts cooking
6 -5.112772 0.0 3.657293e-04 9.998181e-01 technology dsp
7 -0.775314 0.0 4.194065e-02 2.097032e-02 culture-recreation ell
8 0.059429 1.0 1.705462e-07 9.328336e-09 culture-recreation english
9 1.087152 0.0 1.852242e-02 9.261208e-03 professional freelancing
10 0.383031 1.0 2.239911e-04 1.119956e-04 life-arts gardening
11 4.165744 1.0 4.142828e-03 2.071414e-03 life-arts graphicdesign
12 -1.435625 1.0 3.576606e-02 1.788303e-02 culture-recreation history
13 -15.081519 -1.0 2.617809e-06 9.999998e-01 science math
14 -7.987704 -1.0 9.024135e-07 9.999995e-01 science mathoverflow
15 -0.755857 0.5 1.056443e-02 5.282217e-03 life-arts music
16 6.556530 0.0 1.448761e-05 7.243807e-06 life-arts parenting
17 0.386200 1.0 5.970081e-04 2.985041e-04 life-arts pets
18 -4.389483 0.0 4.719489e-02 9.840207e-01 science physics
19 1.842711 1.0 8.868694e-03 4.434347e-03 life-arts productivity
20 -0.370759 0.0 1.527983e-02 9.943805e-01 technology programmers
21 -0.756880 0.0 3.723562e-02 9.857485e-01 technology serverfault
22 5.985407 2.0 2.673899e-02 1.336949e-02 technology sharepoint
23 6.600405 1.0 9.684467e-03 4.842233e-03 technology sqa
24 -12.522677 -2.0 1.256747e-133 1.000000e+00 technology stackoverflow
25 -2.996720 0.0 4.125210e-03 9.979381e-01 science stats
26 -1.104187 0.0 2.167494e-04 9.999453e-01 technology superuser
27 -6.383446 0.0 5.752456e-03 9.990749e-01 technology tex
28 -0.094765 1.0 4.332118e-05 2.166059e-05 culture-recreation travel
29 -16.825439 -2.0 7.774677e-04 9.996192e-01 technology tridion
30 -2.338062 -1.0 3.924694e-02 9.859503e-01 technology unix
31 2.177810 0.0 1.214651e-04 2.604481e-05 technology ux
32 4.307212 1.0 1.364241e-02 6.821205e-03 professional workplace
33 11.212673 0.0 3.536663e-04 1.768332e-04 life-arts writers


In [6]:
cursor = results_db.find({'comments_pvalue': {'$lt': 0.05}}, 
                         {u'_id': False, u'community':True, 'comments_pvalue':True, 
                          'comments_difference_median':True, 'category': True,
                         'comments_difference_mean':True, 'comments_pvalue_greater':True})

df =  pandas.DataFrame(list(cursor))

In [7]:
df


Out[7]:
category comments_difference_mean comments_difference_median comments_pvalue comments_pvalue_greater community
0 technology 0.833333 5.5 0.030370 0.015185 blender
1 culture-recreation 1.552167 1.0 0.033731 0.016865 bricks
2 culture-recreation -10.028591 -1.0 0.044628 0.977908 chess
3 culture-recreation -5.473852 -1.0 0.043435 0.979450 english
4 technology -4.845635 1.0 0.018499 0.009249 gis
5 culture-recreation -21.597786 -1.0 0.027246 0.986452 japanese
6 science -34.131884 2.0 0.000010 0.000005 math
7 science -26.426509 -1.0 0.000379 0.999811 mathoverflow
8 professional 8.700000 5.0 0.027706 0.013853 moderators
9 culture-recreation -6.418458 -2.0 0.039444 0.980816 poker
10 technology 18.040000 9.0 0.015857 0.007929 reverseengineering
11 life-arts -4.068216 0.0 0.000001 1.000000 scifi
12 technology 3.705531 4.5 0.005943 0.002971 sharepoint
13 science -3.137213 1.0 0.022231 0.011115 stats
14 culture-recreation -4.847878 0.0 0.001836 0.999083 travel
15 technology -1.269860 0.0 0.034591 0.982708 webapps
16 technology 1.446039 1.0 0.009753 0.004877 wordpress


In [8]:
cursor = results_db.find({'contributions_pvalue': {'$lt': 0.05}}, 
                         {u'_id': False, u'community':True, 'contributions_pvalue':True, 
                          'contributions_difference_median':True, 'category': True,
                         'contributions_difference_mean':True, 'contributions_pvalue_greater':True})

df =  pandas.DataFrame(list(cursor))

In [9]:
df


Out[9]:
category community contributions_difference_mean contributions_difference_median contributions_pvalue contributions_pvalue_greater
0 technology blender 1.728512 9.5 2.186822e-02 1.093411e-02
1 culture-recreation chess -15.861811 -2.0 3.850832e-02 9.809424e-01
2 life-arts cooking 7.768594 1.5 7.940445e-06 3.970223e-06
3 culture-recreation ell -1.449839 2.0 4.223044e-02 2.111522e-02
4 technology gis -8.777580 2.0 4.489468e-02 2.244734e-02
5 culture-recreation homebrew -22.685933 -2.5 1.651944e-02 9.918165e-01
6 science math -45.926372 6.0 9.884043e-10 4.589688e-10
7 science mathoverflow -35.747597 -3.0 3.204355e-04 9.998398e-01
8 culture-recreation mechanics -10.246957 -1.0 2.934550e-02 9.853715e-01
9 professional moderators 15.960000 11.0 4.712101e-02 2.356051e-02
10 life-arts music -3.186743 1.5 3.846174e-02 1.923087e-02
11 life-arts parenting 11.867971 0.0 3.271796e-02 1.635898e-02
12 technology reverseengineering 27.166667 9.0 2.938934e-02 1.469467e-02
13 technology sharepoint 10.952528 7.0 4.029844e-03 2.014922e-03
14 science stats -5.361975 2.5 1.092445e-02 5.462227e-03
15 technology tor 25.972018 5.0 2.250148e-02 1.125074e-02
16 technology tridion -48.532895 -6.0 3.876057e-02 9.808822e-01
17 technology ux 7.893298 1.0 1.677968e-02 8.389840e-03
18 technology wordpress 1.599581 3.0 5.254200e-03 2.627100e-03

In [ ]: