In [13]:
from __future__ import division
import pymongo, pandas, random
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline
pandas.options.display.float_format = '{:,.2f}'.format
client = pymongo.MongoClient('localhost', 27017)
In [14]:
results_db = client['results']['question_1']
In [15]:
cursor = results_db.find({'community': {'$in': ["academia", "android", "anime", "apple", "askubuntu", "bicycles", "biology", "bitcoin", "chemistry", "chinese", "christianity", "codegolf", "codereview", "cogsci", "cooking", "crypto", "cs", "cstheory", "dba", "diy", "drupal", "dsp", "electronics", "ell", "english", "expressionengine", "fitness", "freelancing", "french", "gamedev", "gaming", "gardening", "genealogy", "german", "gis", "graphicdesign", "hermeneutics", "history", "islam", "japanese", "judaism", "linguistics", "magento", "math", "mathematica", "mathoverflow", "mechanics", "money", "movies", "music", "outdoors", "parenting", "philosophy", "photo", "physics", "pm", "productivity", "programmers", "quant", "raspberrypi", "rpg", "russian", "salesforce", "scicomp", "scifi", "security", "serverfault", "sharepoint", "skeptics", "sound", "spanish", "sqa", "stackapps", "stackoverflow", "stats", "superuser", "tex", "travel", "unix", "ux", "webapps", "webmasters", "wordpress", "workplace", "writers"]}},
{u'_id': False,
"community" : True,
"comments_difference_median" : True,
"answers_difference_mean" : True,
"contributions_difference_mean" : True,
"contributions_difference_median" : True,
"questions_pvalue" : True,
"contributions_pvalue" : True,
"comments_difference_mean" : True,
"answers_difference_median" : True,
"answers_pvalue" : True,
"comments_pvalue" : True,
"questions_difference_mean" : True,
"questions_difference_median" : True,
"category" : True})
df = pandas.DataFrame(list(cursor))
In [23]:
df.to_csv("/Users/milena/Desktop/tabelas/q1.csv")
In [24]:
results_db = client['results']['question_2']
In [25]:
cursor = results_db.find({'community': {'$in': ["academia", "android", "anime", "apple", "askubuntu", "bicycles", "biology", "bitcoin", "chemistry", "chinese", "christianity", "codegolf", "codereview", "cogsci", "cooking", "crypto", "cs", "cstheory", "dba", "diy", "drupal", "dsp", "electronics", "ell", "english", "expressionengine", "fitness", "freelancing", "french", "gamedev", "gaming", "gardening", "genealogy", "german", "gis", "graphicdesign", "hermeneutics", "history", "islam", "japanese", "judaism", "linguistics", "magento", "math", "mathematica", "mathoverflow", "mechanics", "money", "movies", "music", "outdoors", "parenting", "philosophy", "photo", "physics", "pm", "productivity", "programmers", "quant", "raspberrypi", "rpg", "russian", "salesforce", "scicomp", "scifi", "security", "serverfault", "sharepoint", "skeptics", "sound", "spanish", "sqa", "stackapps", "stackoverflow", "stats", "superuser", "tex", "travel", "unix", "ux", "webapps", "webmasters", "wordpress", "workplace", "writers"]}},
{u'_id': False,
"acc_rate_difference_median" : True,
"acc_rate_pvalue" : True,
"community" : True,
"questions_avg_difference_mean" : True,
"questions_avg_difference_median" : True,
"mean_utility_difference_mean" : True,
"mean_utility_difference_median" : True,
"acc_rate_difference_mean" : True,
"mean_utility_pvalue" : True,
"questions_avg_pvalue" : True,
"category" : True})
df = pandas.DataFrame(list(cursor))
In [26]:
df.to_csv("/Users/milena/Desktop/tabelas/q2.csv")
In [39]:
results_db = client['results']['question_3']
In [40]:
cursor = results_db.find({'community': {'$in': ["academia", "android", "anime", "apple", "askubuntu", "bicycles", "biology", "bitcoin", "chemistry", "chinese", "christianity", "codegolf", "codereview", "cogsci", "cooking", "crypto", "cs", "cstheory", "dba", "diy", "drupal", "dsp", "electronics", "ell", "english", "expressionengine", "fitness", "freelancing", "french", "gamedev", "gaming", "gardening", "genealogy", "german", "gis", "graphicdesign", "hermeneutics", "history", "islam", "japanese", "judaism", "linguistics", "magento", "math", "mathematica", "mathoverflow", "mechanics", "money", "movies", "music", "outdoors", "parenting", "philosophy", "photo", "physics", "pm", "productivity", "programmers", "quant", "raspberrypi", "rpg", "russian", "salesforce", "scicomp", "scifi", "security", "serverfault", "sharepoint", "skeptics", "sound", "spanish", "sqa", "stackapps", "stackoverflow", "stats", "superuser", "tex", "travel", "unix", "ux", "webapps", "webmasters", "wordpress", "workplace", "writers"]}},
{u'_id': False,
"frequency_difference_mean" : True,
"community" : True,
"frequency_difference_median" : True,
"frequency_pvalue" : True,
"lifetime_difference_median" : True,
"lifetime_pvalue" : True,
"category" : True})
df = pandas.DataFrame(list(cursor))
In [41]:
df.to_csv("/Users/milena/Desktop/tabelas/q3.csv")
In [36]:
results_db = client['results']['question_4']
In [37]:
cursor = results_db.find({'community': {'$in': ["academia", "android", "anime", "apple", "askubuntu", "bicycles", "biology", "bitcoin", "chemistry", "chinese", "christianity", "codegolf", "codereview", "cogsci", "cooking", "crypto", "cs", "cstheory", "dba", "diy", "drupal", "dsp", "electronics", "ell", "english", "expressionengine", "fitness", "freelancing", "french", "gamedev", "gaming", "gardening", "genealogy", "german", "gis", "graphicdesign", "hermeneutics", "history", "islam", "japanese", "judaism", "linguistics", "magento", "math", "mathematica", "mathoverflow", "mechanics", "money", "movies", "music", "outdoors", "parenting", "philosophy", "photo", "physics", "pm", "productivity", "programmers", "quant", "raspberrypi", "rpg", "russian", "salesforce", "scicomp", "scifi", "security", "serverfault", "sharepoint", "skeptics", "sound", "spanish", "sqa", "stackapps", "stackoverflow", "stats", "superuser", "tex", "travel", "unix", "ux", "webapps", "webmasters", "wordpress", "workplace", "writers"]}},
{u'_id': False,
"women_coef" : True,
"women_pvalue" : True,
"women_rsquare" : True,
"women_rsquare_adj" : True,
"community" : True,
"category" : True})
df = pandas.DataFrame(list(cursor))
In [38]:
df.to_csv("/Users/milena/Desktop/tabelas/q4.csv")
In [33]:
results_db = client['results']['question_5']
In [34]:
cursor = results_db.find({'community': {'$in': ["academia", "android", "anime", "apple", "askubuntu", "bicycles", "biology", "bitcoin", "chemistry", "chinese", "christianity", "codegolf", "codereview", "cogsci", "cooking", "crypto", "cs", "cstheory", "dba", "diy", "drupal", "dsp", "electronics", "ell", "english", "expressionengine", "fitness", "freelancing", "french", "gamedev", "gaming", "gardening", "genealogy", "german", "gis", "graphicdesign", "hermeneutics", "history", "islam", "japanese", "judaism", "linguistics", "magento", "math", "mathematica", "mathoverflow", "mechanics", "money", "movies", "music", "outdoors", "parenting", "philosophy", "photo", "physics", "pm", "productivity", "programmers", "quant", "raspberrypi", "rpg", "russian", "salesforce", "scicomp", "scifi", "security", "serverfault", "sharepoint", "skeptics", "sound", "spanish", "sqa", "stackapps", "stackoverflow", "stats", "superuser", "tex", "travel", "unix", "ux", "webapps", "webmasters", "wordpress", "workplace", "writers"]}},
{u'_id': False,
"women_coef" : True,
"women_pvalue" : True,
"women_rsquare" : True,
"women_rsquare_adj" : True,
"community" : True,
"category" : True})
df = pandas.DataFrame(list(cursor))
In [35]:
df.to_csv("/Users/milena/Desktop/tabelas/q5.csv")