In [3]:
from __future__ import division
import pymongo, pandas, random
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcdefaults()
mpl.style.use('ggplot')

connection = pymongo.MongoClient('localhost', 27017)

results_db = connection['results']['question_1']
cursor = results_db.find({'questions_pvalue': {'$lt': 0.05}}, {u'_id': False, u'community':True})

communities = list(pandas.DataFrame(list(cursor))['community'])

In [26]:
def plot_histogram(female_sample, male_sample, place, name):
    max_data = np.r_[female_sample, male_sample].max()
#     bins = np.linspace(0, max_data, max_data + 1)
#     bins=50
    bins=range(10)
    data= [list(female_sample), list(male_sample)]
    labels= ["females", "males"]
#     place.hist(data,bins, normed=True, label=labels, cumulative=True)
    place.hist(np.log(np.array(female_sample) +1),bins, normed=True, label="females",alpha=0.5)
    place.hist(np.log(np.array(male_sample)+1),bins, normed=True, label="males", alpha=0.5)
    place.set_ylim(ymax=1.2)
    place.legend(loc='upper right')
    place.set_title(name)

In [27]:
plt.close('all')
rows = len(communities)//2 + 1
fig, axes = plt.subplots(nrows=rows, ncols=2, figsize=(10,60), dpi=200)
fig.tight_layout()

In [28]:
for idx, community in enumerate(communities):
    
    community_db = connection[community]['statistics']
    cursor = community_db.find({'$or': 
                                [{'questions_total':{'$gt':0}}, 
                                {'answers_total':{'$gt':0}}, 
                                {'comments_total':{'$gt':0}}] }, 
                                {u'_id': False, u'comments_total':True, u'gender':True,
                                    u'questions_total':True,u'answers_total':True})

    df =  pandas.DataFrame(list(cursor))
    females_ = df.query("gender == 'Female'")['questions_total']
    males_ = df.query("gender == 'Male'")['questions_total']

    plot_histogram(females_, males_, axes[idx//2][idx%2], community)
    
plt.show()



In [ ]: