In [13]:
from __future__ import division
import pymongo, pandas, random
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats

%matplotlib inline
pandas.options.display.float_format = '{:,.2f}'.format

client = pymongo.MongoClient('localhost', 27017)


In [14]:
results_db = client['results']['question_1']

In [15]:
cursor = results_db.find({'community': {'$in': ["academia", "android", "anime", "apple", "askubuntu", "bicycles", "biology", "bitcoin", "chemistry", "chinese", "christianity", "codegolf", "codereview", "cogsci", "cooking", "crypto", "cs", "cstheory", "dba", "diy", "drupal", "dsp", "electronics", "ell", "english", "expressionengine", "fitness", "freelancing", "french", "gamedev", "gaming", "gardening", "genealogy", "german", "gis", "graphicdesign", "hermeneutics", "history", "islam", "japanese", "judaism", "linguistics", "magento", "math", "mathematica", "mathoverflow", "mechanics", "money", "movies", "music", "outdoors", "parenting", "philosophy", "photo", "physics", "pm", "productivity", "programmers", "quant", "raspberrypi", "rpg", "russian", "salesforce", "scicomp", "scifi", "security", "serverfault", "sharepoint", "skeptics", "sound", "spanish", "sqa", "stackapps", "stackoverflow", "stats", "superuser", "tex", "travel", "unix", "ux", "webapps", "webmasters", "wordpress", "workplace", "writers"]}}, 
                         {u'_id': False, 
                         "community" : True,
                        "comments_difference_median" : True,
                        "answers_difference_mean" : True,
                        "contributions_difference_mean" : True,
                        "contributions_difference_median" : True,
                        "questions_pvalue" : True,
                        "contributions_pvalue" : True,
                        "comments_difference_mean" : True,
                        "answers_difference_median" : True,
                        "answers_pvalue" : True,
                        "comments_pvalue" : True,
                        "questions_difference_mean" : True,
                        "questions_difference_median" : True,
                        "category" : True})

df =  pandas.DataFrame(list(cursor))

In [23]:
df.to_csv("/Users/milena/Desktop/tabelas/q1.csv")


In [24]:
results_db = client['results']['question_2']

In [25]:
cursor = results_db.find({'community': {'$in': ["academia", "android", "anime", "apple", "askubuntu", "bicycles", "biology", "bitcoin", "chemistry", "chinese", "christianity", "codegolf", "codereview", "cogsci", "cooking", "crypto", "cs", "cstheory", "dba", "diy", "drupal", "dsp", "electronics", "ell", "english", "expressionengine", "fitness", "freelancing", "french", "gamedev", "gaming", "gardening", "genealogy", "german", "gis", "graphicdesign", "hermeneutics", "history", "islam", "japanese", "judaism", "linguistics", "magento", "math", "mathematica", "mathoverflow", "mechanics", "money", "movies", "music", "outdoors", "parenting", "philosophy", "photo", "physics", "pm", "productivity", "programmers", "quant", "raspberrypi", "rpg", "russian", "salesforce", "scicomp", "scifi", "security", "serverfault", "sharepoint", "skeptics", "sound", "spanish", "sqa", "stackapps", "stackoverflow", "stats", "superuser", "tex", "travel", "unix", "ux", "webapps", "webmasters", "wordpress", "workplace", "writers"]}}, 
                         {u'_id': False, 
                         "acc_rate_difference_median" : True,
                        "acc_rate_pvalue" : True,
                        "community" : True,
                        "questions_avg_difference_mean" : True,
                        "questions_avg_difference_median" : True,
                        "mean_utility_difference_mean" : True,
                        "mean_utility_difference_median" : True,
                        "acc_rate_difference_mean" : True,
                        "mean_utility_pvalue" : True,
                        "questions_avg_pvalue" : True,
                        "category" : True})

df =  pandas.DataFrame(list(cursor))

In [26]:
df.to_csv("/Users/milena/Desktop/tabelas/q2.csv")


In [39]:
results_db = client['results']['question_3']

In [40]:
cursor = results_db.find({'community': {'$in': ["academia", "android", "anime", "apple", "askubuntu", "bicycles", "biology", "bitcoin", "chemistry", "chinese", "christianity", "codegolf", "codereview", "cogsci", "cooking", "crypto", "cs", "cstheory", "dba", "diy", "drupal", "dsp", "electronics", "ell", "english", "expressionengine", "fitness", "freelancing", "french", "gamedev", "gaming", "gardening", "genealogy", "german", "gis", "graphicdesign", "hermeneutics", "history", "islam", "japanese", "judaism", "linguistics", "magento", "math", "mathematica", "mathoverflow", "mechanics", "money", "movies", "music", "outdoors", "parenting", "philosophy", "photo", "physics", "pm", "productivity", "programmers", "quant", "raspberrypi", "rpg", "russian", "salesforce", "scicomp", "scifi", "security", "serverfault", "sharepoint", "skeptics", "sound", "spanish", "sqa", "stackapps", "stackoverflow", "stats", "superuser", "tex", "travel", "unix", "ux", "webapps", "webmasters", "wordpress", "workplace", "writers"]}}, 
                         {u'_id': False, 
                         "frequency_difference_mean" : True,
                        "community" : True,
                        "frequency_difference_median" : True,
                        "frequency_pvalue" : True,
                        "lifetime_difference_median" : True,
                        "lifetime_pvalue" : True,
                        "category" : True})

df =  pandas.DataFrame(list(cursor))

In [41]:
df.to_csv("/Users/milena/Desktop/tabelas/q3.csv")


In [36]:
results_db = client['results']['question_4']

In [37]:
cursor = results_db.find({'community': {'$in': ["academia", "android", "anime", "apple", "askubuntu", "bicycles", "biology", "bitcoin", "chemistry", "chinese", "christianity", "codegolf", "codereview", "cogsci", "cooking", "crypto", "cs", "cstheory", "dba", "diy", "drupal", "dsp", "electronics", "ell", "english", "expressionengine", "fitness", "freelancing", "french", "gamedev", "gaming", "gardening", "genealogy", "german", "gis", "graphicdesign", "hermeneutics", "history", "islam", "japanese", "judaism", "linguistics", "magento", "math", "mathematica", "mathoverflow", "mechanics", "money", "movies", "music", "outdoors", "parenting", "philosophy", "photo", "physics", "pm", "productivity", "programmers", "quant", "raspberrypi", "rpg", "russian", "salesforce", "scicomp", "scifi", "security", "serverfault", "sharepoint", "skeptics", "sound", "spanish", "sqa", "stackapps", "stackoverflow", "stats", "superuser", "tex", "travel", "unix", "ux", "webapps", "webmasters", "wordpress", "workplace", "writers"]}}, 
                         {u'_id': False, 
                         "women_coef" : True,
                        "women_pvalue" : True,
                        "women_rsquare" : True,
                        "women_rsquare_adj" : True,
                        "community" : True,
                        "category" : True})

df =  pandas.DataFrame(list(cursor))

In [38]:
df.to_csv("/Users/milena/Desktop/tabelas/q4.csv")


In [33]:
results_db = client['results']['question_5']

In [34]:
cursor = results_db.find({'community': {'$in': ["academia", "android", "anime", "apple", "askubuntu", "bicycles", "biology", "bitcoin", "chemistry", "chinese", "christianity", "codegolf", "codereview", "cogsci", "cooking", "crypto", "cs", "cstheory", "dba", "diy", "drupal", "dsp", "electronics", "ell", "english", "expressionengine", "fitness", "freelancing", "french", "gamedev", "gaming", "gardening", "genealogy", "german", "gis", "graphicdesign", "hermeneutics", "history", "islam", "japanese", "judaism", "linguistics", "magento", "math", "mathematica", "mathoverflow", "mechanics", "money", "movies", "music", "outdoors", "parenting", "philosophy", "photo", "physics", "pm", "productivity", "programmers", "quant", "raspberrypi", "rpg", "russian", "salesforce", "scicomp", "scifi", "security", "serverfault", "sharepoint", "skeptics", "sound", "spanish", "sqa", "stackapps", "stackoverflow", "stats", "superuser", "tex", "travel", "unix", "ux", "webapps", "webmasters", "wordpress", "workplace", "writers"]}}, 
                         {u'_id': False, 
                         "women_coef" : True,
                        "women_pvalue" : True,
                        "women_rsquare" : True,
                        "women_rsquare_adj" : True,
                        "community" : True,
                        "category" : True})

df =  pandas.DataFrame(list(cursor))

In [35]:
df.to_csv("/Users/milena/Desktop/tabelas/q5.csv")