In [1]:
from __future__ import division
import pymongo, pandas, random
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats

%matplotlib inline

client = pymongo.MongoClient('localhost', 27017)

results_db = client['results']['question_3']


In [2]:
cursor = results_db.find({'lifetime_pvalue': {'$lt': 0.05}}, 
                         {u'_id': False, u'community':True, 'lifetime_pvalue':True, 
                          'lifetime_difference_median':True, 'category': True})

df =  pandas.DataFrame(list(cursor))

In [3]:
df


Out[3]:
category community lifetime_difference_median lifetime_pvalue
0 technology apple -14.504535 3.285098e-04
1 professional aviation -6.876311 1.824813e-02
2 life-arts diy -12.619026 9.250952e-03
3 technology dsp -17.150387 1.705494e-03
4 culture-recreation english -9.046545 3.144978e-03
5 culture-recreation homebrew -22.549983 3.586600e-03
6 science mathoverflow -327.759879 9.222933e-07
7 culture-recreation mechanics -6.417729 1.797765e-02
8 technology networkengineering -7.260233 3.092324e-02
9 science philosophy -1.663282 2.130040e-02
10 life-arts photo -4.983799 3.948523e-03
11 culture-recreation poker -3.829913 6.463297e-03
12 life-arts scifi -8.804869 1.758364e-05
13 technology serverfault -35.290989 4.191207e-02
14 technology stackoverflow -346.874280 2.501768e-69
15 technology superuser -70.928687 2.551841e-03
16 culture-recreation travel -1.919741 5.696090e-03
17 technology tridion -215.047721 1.714240e-03
18 technology wordpress 74.715491 1.650433e-02


In [4]:
cursor = results_db.find({'frequency_pvalue': {'$lt': 0.05}}, 
                         {u'_id': False, u'community':True, 'frequency_pvalue':True, 
                          'frequency_difference_median':True, 'category': True,
                         'frequency_difference_mean':True, 'frequency_pvalue_greater':True})

df =  pandas.DataFrame(list(cursor))

In [5]:
df


Out[5]:
category community frequency_difference_mean frequency_difference_median frequency_pvalue frequency_pvalue_greater
0 technology askubuntu 0.109569 0.233333 1.148150e-03 3.318362e-04
1 technology blender 0.578160 0.833333 5.225765e-03 2.612883e-03
2 culture-recreation boardgames 0.241009 0.443478 4.388575e-02 2.194288e-02
3 culture-recreation chess -0.396123 -0.333333 1.635500e-02 9.919184e-01
4 life-arts cooking 0.110438 0.300000 2.232337e-03 1.116169e-03
5 culture-recreation english 0.293234 0.038462 5.897432e-03 2.185860e-03
6 technology gis 0.097155 0.166667 4.886036e-02 2.443018e-02
7 science math 0.189950 0.100000 1.680208e-07 7.121923e-08
8 life-arts movies -0.268306 -0.078788 3.227311e-02 9.838916e-01
9 life-arts parenting 0.277329 0.352941 9.921431e-03 4.960716e-03
10 technology sharepoint 0.305315 0.173333 8.171926e-03 4.085963e-03
11 technology stackoverflow 0.143532 0.145274 7.142909e-35 2.872172e-35
12 science stats 0.151898 0.172840 1.712220e-02 8.561101e-03
13 technology wordpress 0.131091 0.229710 1.671671e-02 8.358355e-03

In [ ]: